diff mbox series

[v4] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h

Message ID 20220624222910.1359991-1-goldstein.w.n@gmail.com
State New
Headers show
Series [v4] x86: Fix backwards Prefer_No_VZEROUPPER check in ifunc-evex.h | expand

Commit Message

Noah Goldstein June 24, 2022, 10:29 p.m. UTC
Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
CPU_FEATURES_ARCH_P check can be inverted if the
MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
the check.

Use this new macro to correct the backwards check in ifunc-evex.h
---
 sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
 sysdeps/x86/isa-level.h               | 32 ++++++++-----------
 sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
 3 files changed, 56 insertions(+), 26 deletions(-)

Comments

H.J. Lu June 24, 2022, 10:41 p.m. UTC | #1
On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> CPU_FEATURES_ARCH_P check can be inverted if the
> MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> the check.
>
> Use this new macro to correct the backwards check in ifunc-evex.h
> ---
>  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
>  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
>  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
>  3 files changed, 56 insertions(+), 26 deletions(-)
>
> diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> index ba6826d518..4cdc71e40e 100644
> --- a/sysdeps/x86/isa-ifunc-macros.h
> +++ b/sysdeps/x86/isa-ifunc-macros.h
> @@ -56,15 +56,49 @@
>  # define X86_IFUNC_IMPL_ADD_V1(...)
>  #endif
>
> -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> +   macros are wrappers for the the respective
> +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> +   ways.
> +
> +    1.  The USABLE_P version is evaluated to true when the feature
> +        is enabled.
> +
> +    2.  The ARCH_P version has a third argument `not`.  The `not`
> +        argument can either be '!' or empty.  If the feature is
> +        enabled above an ISA level, the third argument should be empty
> +        and the expression is evaluated to true when the feature is
> +        enabled.  If the feature is disabled above an ISA level, the
> +        third argument should be `!` and the expression is evaluated
> +        to true when the feature is disabled.
>

No need for the comments below.  These macros are self-explaining.

> +    i.e
> +
> +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is set                                  -> True
> +
> +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is set                                  -> True
> +
> +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> +        B) runtime bit is NOT set                              -> True
> +
> +    All cases not listed will evaluate to false.
> +
> +    Only the A) cases will constantly evaluate.
> + */
>
>  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
>     || CPU_FEATURE_USABLE_P (ptr, name))
>
> -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> -   || CPU_FEATURES_ARCH_P (ptr, name))
> +
> +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> +   || not CPU_FEATURES_ARCH_P (ptr, name))
> +
>
>  #endif
> diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> index 7cae11c228..73937fecdc 100644
> --- a/sysdeps/x86/isa-level.h
> +++ b/sysdeps/x86/isa-level.h
> @@ -64,14 +64,8 @@
>  #define MINIMUM_X86_ISA_LEVEL                                                 \
>    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
>
> -
> -/*
> - * CPU Features that are hard coded as enabled depending on ISA build
> - *   level.
> - *    - Values > 0 features are always ENABLED if:
> - *          Value >= MINIMUM_X86_ISA_LEVEL
> - */
> -
> +/* Depending on the minimum ISA level, a feature check result can be a
> +   compile-time constant.. */
>
>  /* ISA level >= 4 guaranteed includes.  */
>  #define AVX512VL_X86_ISA_LEVEL 4
> @@ -81,20 +75,22 @@
>  #define AVX2_X86_ISA_LEVEL 3
>  #define BMI2_X86_ISA_LEVEL 3
>
> -/*
> - * NB: This may not be fully assumable for ISA level >= 3. From
> - * looking over the architectures supported in cpu-features.h the
> - * following CPUs may have an issue with this being default set:
> - *      - AMD Excavator
> - */
> +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> +   for the following CPUs:
> +        - AMD Excavator
> +   when ISA level < 3.  */
>  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
>
> -/*
> - * KNL (the only cpu that sets this supported in cpu-features.h)
> - * builds with ISA V1 so this shouldn't harm any architectures.
> - */
> +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> +   for the following CPUs:
> +        - Intel KNL
> +   when ISA level < 3.  */
>  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
>
> +
> +/* ISA level >= 2 guaranteed includes.  */
> +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> +

These belong to a different patch.

>  #define ISA_SHOULD_BUILD(isa_build_level)                              \
>    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
>     || defined ISA_DEFAULT_IMPL
> diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> index 856c6261f8..310cfd269f 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
>    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
>        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
>        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                     AVX_Fast_Unaligned_Load))
> +                                     AVX_Fast_Unaligned_Load, ))
>      {
>        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
>           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
>         return OPTIMIZE (avx2_rtm);
>
>        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> -                                      Prefer_No_VZEROUPPER))
> +                                      Prefer_No_VZEROUPPER, !))
>         return OPTIMIZE (avx2);
>      }
>
> --
> 2.34.1
>
Noah Goldstein June 24, 2022, 10:57 p.m. UTC | #2
On Fri, Jun 24, 2022 at 3:42 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > CPU_FEATURES_ARCH_P check can be inverted if the
> > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > the check.
> >
> > Use this new macro to correct the backwards check in ifunc-evex.h
> > ---
> >  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
> >  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
> >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
> >  3 files changed, 56 insertions(+), 26 deletions(-)
> >
> > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > index ba6826d518..4cdc71e40e 100644
> > --- a/sysdeps/x86/isa-ifunc-macros.h
> > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > @@ -56,15 +56,49 @@
> >  # define X86_IFUNC_IMPL_ADD_V1(...)
> >  #endif
> >
> > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > +   macros are wrappers for the the respective
> > +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> > +   ways.
> > +
> > +    1.  The USABLE_P version is evaluated to true when the feature
> > +        is enabled.
> > +
> > +    2.  The ARCH_P version has a third argument `not`.  The `not`
> > +        argument can either be '!' or empty.  If the feature is
> > +        enabled above an ISA level, the third argument should be empty
> > +        and the expression is evaluated to true when the feature is
> > +        enabled.  If the feature is disabled above an ISA level, the
> > +        third argument should be `!` and the expression is evaluated
> > +        to true when the feature is disabled.
> >
>
> No need for the comments below.  These macros are self-explaining.

Do they harm things at all? I prefer them.
>
> > +    i.e
> > +
> > +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is set                                  -> True
> > +
> > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is set                                  -> True
> > +
> > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > +        B) runtime bit is NOT set                              -> True
> > +
> > +    All cases not listed will evaluate to false.
> > +
> > +    Only the A) cases will constantly evaluate.
> > + */
> >
> >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> >     || CPU_FEATURE_USABLE_P (ptr, name))
> >
> > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > +
> > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > +
> >
> >  #endif
> > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > index 7cae11c228..73937fecdc 100644
> > --- a/sysdeps/x86/isa-level.h
> > +++ b/sysdeps/x86/isa-level.h
> > @@ -64,14 +64,8 @@
> >  #define MINIMUM_X86_ISA_LEVEL                                                 \
> >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> >
> > -
> > -/*
> > - * CPU Features that are hard coded as enabled depending on ISA build
> > - *   level.
> > - *    - Values > 0 features are always ENABLED if:
> > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > - */
> > -
> > +/* Depending on the minimum ISA level, a feature check result can be a
> > +   compile-time constant.. */
> >
> >  /* ISA level >= 4 guaranteed includes.  */
> >  #define AVX512VL_X86_ISA_LEVEL 4
> > @@ -81,20 +75,22 @@
> >  #define AVX2_X86_ISA_LEVEL 3
> >  #define BMI2_X86_ISA_LEVEL 3
> >
> > -/*
> > - * NB: This may not be fully assumable for ISA level >= 3. From
> > - * looking over the architectures supported in cpu-features.h the
> > - * following CPUs may have an issue with this being default set:
> > - *      - AMD Excavator
> > - */
> > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > +   for the following CPUs:
> > +        - AMD Excavator
> > +   when ISA level < 3.  */
> >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> >
> > -/*
> > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > - * builds with ISA V1 so this shouldn't harm any architectures.
> > - */
> > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > +   for the following CPUs:
> > +        - Intel KNL
> > +   when ISA level < 3.  */
> >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> >
> > +
> > +/* ISA level >= 2 guaranteed includes.  */
> > +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > +
>
> These belong to a different patch.
>
> >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> >    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
> >     || defined ISA_DEFAULT_IMPL
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > index 856c6261f8..310cfd269f 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                     AVX_Fast_Unaligned_Load))
> > +                                     AVX_Fast_Unaligned_Load, ))
> >      {
> >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> >         return OPTIMIZE (avx2_rtm);
> >
> >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > -                                      Prefer_No_VZEROUPPER))
> > +                                      Prefer_No_VZEROUPPER, !))
> >         return OPTIMIZE (avx2);
> >      }
> >
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
H.J. Lu June 24, 2022, 11:05 p.m. UTC | #3
On Fri, Jun 24, 2022 at 3:57 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 3:42 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > > CPU_FEATURES_ARCH_P check can be inverted if the
> > > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > > the check.
> > >
> > > Use this new macro to correct the backwards check in ifunc-evex.h
> > > ---
> > >  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
> > >  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
> > >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
> > >  3 files changed, 56 insertions(+), 26 deletions(-)
> > >
> > > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > > index ba6826d518..4cdc71e40e 100644
> > > --- a/sysdeps/x86/isa-ifunc-macros.h
> > > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > > @@ -56,15 +56,49 @@
> > >  # define X86_IFUNC_IMPL_ADD_V1(...)
> > >  #endif
> > >
> > > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > > +   macros are wrappers for the the respective
> > > +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> > > +   ways.
> > > +
> > > +    1.  The USABLE_P version is evaluated to true when the feature
> > > +        is enabled.
> > > +
> > > +    2.  The ARCH_P version has a third argument `not`.  The `not`
> > > +        argument can either be '!' or empty.  If the feature is
> > > +        enabled above an ISA level, the third argument should be empty
> > > +        and the expression is evaluated to true when the feature is
> > > +        enabled.  If the feature is disabled above an ISA level, the
> > > +        third argument should be `!` and the expression is evaluated
> > > +        to true when the feature is disabled.
> > >
> >
> > No need for the comments below.  These macros are self-explaining.
>
> Do they harm things at all? I prefer them.

They lead to more questions.  How are macro arguments related
to A and B?  How is the runtime bit checked?

> >
> > > +    i.e
> > > +
> > > +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > +        B) runtime bit is set                                  -> True
> > > +
> > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > +        B) runtime bit is set                                  -> True
> > > +
> > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > +        B) runtime bit is NOT set                              -> True
> > > +
> > > +    All cases not listed will evaluate to false.
> > > +
> > > +    Only the A) cases will constantly evaluate.
> > > + */
> > >
> > >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > >     || CPU_FEATURE_USABLE_P (ptr, name))
> > >
> > > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > > +
> > > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > > +
> > >
> > >  #endif
> > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > > index 7cae11c228..73937fecdc 100644
> > > --- a/sysdeps/x86/isa-level.h
> > > +++ b/sysdeps/x86/isa-level.h
> > > @@ -64,14 +64,8 @@
> > >  #define MINIMUM_X86_ISA_LEVEL                                                 \
> > >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> > >
> > > -
> > > -/*
> > > - * CPU Features that are hard coded as enabled depending on ISA build
> > > - *   level.
> > > - *    - Values > 0 features are always ENABLED if:
> > > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > > - */
> > > -
> > > +/* Depending on the minimum ISA level, a feature check result can be a
> > > +   compile-time constant.. */
> > >
> > >  /* ISA level >= 4 guaranteed includes.  */
> > >  #define AVX512VL_X86_ISA_LEVEL 4
> > > @@ -81,20 +75,22 @@
> > >  #define AVX2_X86_ISA_LEVEL 3
> > >  #define BMI2_X86_ISA_LEVEL 3
> > >
> > > -/*
> > > - * NB: This may not be fully assumable for ISA level >= 3. From
> > > - * looking over the architectures supported in cpu-features.h the
> > > - * following CPUs may have an issue with this being default set:
> > > - *      - AMD Excavator
> > > - */
> > > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > > +   for the following CPUs:
> > > +        - AMD Excavator
> > > +   when ISA level < 3.  */
> > >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> > >
> > > -/*
> > > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > > - * builds with ISA V1 so this shouldn't harm any architectures.
> > > - */
> > > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > > +   for the following CPUs:
> > > +        - Intel KNL
> > > +   when ISA level < 3.  */
> > >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> > >
> > > +
> > > +/* ISA level >= 2 guaranteed includes.  */
> > > +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > > +
> >
> > These belong to a different patch.
> >
> > >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> > >    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
> > >     || defined ISA_DEFAULT_IMPL
> > > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > index 856c6261f8..310cfd269f 100644
> > > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> > >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > -                                     AVX_Fast_Unaligned_Load))
> > > +                                     AVX_Fast_Unaligned_Load, ))
> > >      {
> > >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> > >         return OPTIMIZE (avx2_rtm);
> > >
> > >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > -                                      Prefer_No_VZEROUPPER))
> > > +                                      Prefer_No_VZEROUPPER, !))
> > >         return OPTIMIZE (avx2);
> > >      }
> > >
> > > --
> > > 2.34.1
> > >
> >
> >
> > --
> > H.J.
Noah Goldstein June 24, 2022, 11:16 p.m. UTC | #4
On Fri, Jun 24, 2022 at 4:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Jun 24, 2022 at 3:57 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Fri, Jun 24, 2022 at 3:42 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Fri, Jun 24, 2022 at 3:29 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > Add third argument to X86_ISA_CPU_FEATURES_ARCH_P macro so the runtime
> > > > CPU_FEATURES_ARCH_P check can be inverted if the
> > > > MINIMUM_X86_ISA_LEVEL is not high enough to constantly evaluate
> > > > the check.
> > > >
> > > > Use this new macro to correct the backwards check in ifunc-evex.h
> > > > ---
> > > >  sysdeps/x86/isa-ifunc-macros.h        | 46 +++++++++++++++++++++++----
> > > >  sysdeps/x86/isa-level.h               | 32 ++++++++-----------
> > > >  sysdeps/x86_64/multiarch/ifunc-evex.h |  4 +--
> > > >  3 files changed, 56 insertions(+), 26 deletions(-)
> > > >
> > > > diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
> > > > index ba6826d518..4cdc71e40e 100644
> > > > --- a/sysdeps/x86/isa-ifunc-macros.h
> > > > +++ b/sysdeps/x86/isa-ifunc-macros.h
> > > > @@ -56,15 +56,49 @@
> > > >  # define X86_IFUNC_IMPL_ADD_V1(...)
> > > >  #endif
> > > >
> > > > -#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
> > > > -  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
> > > > +/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
> > > > +   macros are wrappers for the the respective
> > > > +   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
> > > > +   ways.
> > > > +
> > > > +    1.  The USABLE_P version is evaluated to true when the feature
> > > > +        is enabled.
> > > > +
> > > > +    2.  The ARCH_P version has a third argument `not`.  The `not`
> > > > +        argument can either be '!' or empty.  If the feature is
> > > > +        enabled above an ISA level, the third argument should be empty
> > > > +        and the expression is evaluated to true when the feature is
> > > > +        enabled.  If the feature is disabled above an ISA level, the
> > > > +        third argument should be `!` and the expression is evaluated
> > > > +        to true when the feature is disabled.
> > > >
> > >
> > > No need for the comments below.  These macros are self-explaining.
> >
> > Do they harm things at all? I prefer them.
>
> They lead to more questions.  How are macro arguments related
> to A and B?  How is the runtime bit checked?

Okay. removed in v5.
>
> > >
> > > > +    i.e
> > > > +
> > > > +    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
> > > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > > +        B) runtime bit is set                                  -> True
> > > > +
> > > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
> > > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > > +        B) runtime bit is set                                  -> True
> > > > +
> > > > +    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
> > > > +        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
> > > > +        B) runtime bit is NOT set                              -> True
> > > > +
> > > > +    All cases not listed will evaluate to false.
> > > > +
> > > > +    Only the A) cases will constantly evaluate.
> > > > + */
> > > >
> > > >  #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
> > > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > > >     || CPU_FEATURE_USABLE_P (ptr, name))
> > > >
> > > > -#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
> > > > -  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
> > > > -   || CPU_FEATURES_ARCH_P (ptr, name))
> > > > +
> > > > +#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
> > > > +  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
> > > > +   || not CPU_FEATURES_ARCH_P (ptr, name))
> > > > +
> > > >
> > > >  #endif
> > > > diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
> > > > index 7cae11c228..73937fecdc 100644
> > > > --- a/sysdeps/x86/isa-level.h
> > > > +++ b/sysdeps/x86/isa-level.h
> > > > @@ -64,14 +64,8 @@
> > > >  #define MINIMUM_X86_ISA_LEVEL                                                 \
> > > >    (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
> > > >
> > > > -
> > > > -/*
> > > > - * CPU Features that are hard coded as enabled depending on ISA build
> > > > - *   level.
> > > > - *    - Values > 0 features are always ENABLED if:
> > > > - *          Value >= MINIMUM_X86_ISA_LEVEL
> > > > - */
> > > > -
> > > > +/* Depending on the minimum ISA level, a feature check result can be a
> > > > +   compile-time constant.. */
> > > >
> > > >  /* ISA level >= 4 guaranteed includes.  */
> > > >  #define AVX512VL_X86_ISA_LEVEL 4
> > > > @@ -81,20 +75,22 @@
> > > >  #define AVX2_X86_ISA_LEVEL 3
> > > >  #define BMI2_X86_ISA_LEVEL 3
> > > >
> > > > -/*
> > > > - * NB: This may not be fully assumable for ISA level >= 3. From
> > > > - * looking over the architectures supported in cpu-features.h the
> > > > - * following CPUs may have an issue with this being default set:
> > > > - *      - AMD Excavator
> > > > - */
> > > > +/* NB: This feature is enabled when ISA level >= 3, which was disabled
> > > > +   for the following CPUs:
> > > > +        - AMD Excavator
> > > > +   when ISA level < 3.  */
> > > >  #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
> > > >
> > > > -/*
> > > > - * KNL (the only cpu that sets this supported in cpu-features.h)
> > > > - * builds with ISA V1 so this shouldn't harm any architectures.
> > > > - */
> > > > +/* NB: This feature is disabled when ISA level >= 3, which was enabled
> > > > +   for the following CPUs:
> > > > +        - Intel KNL
> > > > +   when ISA level < 3.  */
> > > >  #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
> > > >
> > > > +
> > > > +/* ISA level >= 2 guaranteed includes.  */
> > > > +#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > > > +
> > >
> > > These belong to a different patch.

Removed in v5.
> > >
> > > >  #define ISA_SHOULD_BUILD(isa_build_level)                              \
> > > >    (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
> > > >     || defined ISA_DEFAULT_IMPL
> > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > > index 856c6261f8..310cfd269f 100644
> > > > --- a/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > > +++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
> > > > @@ -37,7 +37,7 @@ IFUNC_SELECTOR (void)
> > > >    if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > > >        && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > > >        && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > > -                                     AVX_Fast_Unaligned_Load))
> > > > +                                     AVX_Fast_Unaligned_Load, ))
> > > >      {
> > > >        if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > >           && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > > > @@ -52,7 +52,7 @@ IFUNC_SELECTOR (void)
> > > >         return OPTIMIZE (avx2_rtm);
> > > >
> > > >        if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > > > -                                      Prefer_No_VZEROUPPER))
> > > > +                                      Prefer_No_VZEROUPPER, !))
> > > >         return OPTIMIZE (avx2);
> > > >      }
> > > >
> > > > --
> > > > 2.34.1
> > > >
> > >
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
diff mbox series

Patch

diff --git a/sysdeps/x86/isa-ifunc-macros.h b/sysdeps/x86/isa-ifunc-macros.h
index ba6826d518..4cdc71e40e 100644
--- a/sysdeps/x86/isa-ifunc-macros.h
+++ b/sysdeps/x86/isa-ifunc-macros.h
@@ -56,15 +56,49 @@ 
 # define X86_IFUNC_IMPL_ADD_V1(...)
 #endif
 
-#define X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED(name)                  \
-  ((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)
+/* Both X86_ISA_CPU_FEATURE_USABLE_P and X86_ISA_CPU_FEATURES_ARCH_P
+   macros are wrappers for the the respective
+   CPU_FEATURE{S}_{USABLE|ARCH}_P runtime checks.  They differ in two
+   ways.
+
+    1.  The USABLE_P version is evaluated to true when the feature
+        is enabled.
+
+    2.  The ARCH_P version has a third argument `not`.  The `not`
+        argument can either be '!' or empty.  If the feature is
+        enabled above an ISA level, the third argument should be empty
+        and the expression is evaluated to true when the feature is
+        enabled.  If the feature is disabled above an ISA level, the
+        third argument should be `!` and the expression is evaluated
+        to true when the feature is disabled.
+
+    i.e
+
+    X86_ISA_CPU_FEATURE_USABLE_P(p, FEATURE):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is set                                  -> True
+
+    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, <empty>):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is set                                  -> True
+
+    X86_ISA_CPU_FEATURE_ARCH_P(p, FEATURE, !):
+        A) default value of feature <= MINIMUM_X86_ISA_LEVEL   -> True
+        B) runtime bit is NOT set                              -> True
+
+    All cases not listed will evaluate to false.
+
+    Only the A) cases will constantly evaluate.
+ */
 
 #define X86_ISA_CPU_FEATURE_USABLE_P(ptr, name)                        \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
    || CPU_FEATURE_USABLE_P (ptr, name))
 
-#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name)                         \
-  (X86_ISA_CPU_FEATURE_CONST_CHECK_ENABLED (name)                      \
-   || CPU_FEATURES_ARCH_P (ptr, name))
+
+#define X86_ISA_CPU_FEATURES_ARCH_P(ptr, name, not)                    \
+  (((name##_X86_ISA_LEVEL) <= MINIMUM_X86_ISA_LEVEL)                   \
+   || not CPU_FEATURES_ARCH_P (ptr, name))
+
 
 #endif
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 7cae11c228..73937fecdc 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -64,14 +64,8 @@ 
 #define MINIMUM_X86_ISA_LEVEL                                                 \
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
 
-
-/*
- * CPU Features that are hard coded as enabled depending on ISA build
- *   level.
- *    - Values > 0 features are always ENABLED if:
- *          Value >= MINIMUM_X86_ISA_LEVEL
- */
-
+/* Depending on the minimum ISA level, a feature check result can be a
+   compile-time constant.. */
 
 /* ISA level >= 4 guaranteed includes.  */
 #define AVX512VL_X86_ISA_LEVEL 4
@@ -81,20 +75,22 @@ 
 #define AVX2_X86_ISA_LEVEL 3
 #define BMI2_X86_ISA_LEVEL 3
 
-/*
- * NB: This may not be fully assumable for ISA level >= 3. From
- * looking over the architectures supported in cpu-features.h the
- * following CPUs may have an issue with this being default set:
- *      - AMD Excavator
- */
+/* NB: This feature is enabled when ISA level >= 3, which was disabled
+   for the following CPUs:
+        - AMD Excavator
+   when ISA level < 3.  */
 #define AVX_Fast_Unaligned_Load_X86_ISA_LEVEL 3
 
-/*
- * KNL (the only cpu that sets this supported in cpu-features.h)
- * builds with ISA V1 so this shouldn't harm any architectures.
- */
+/* NB: This feature is disabled when ISA level >= 3, which was enabled
+   for the following CPUs:
+        - Intel KNL
+   when ISA level < 3.  */
 #define Prefer_No_VZEROUPPER_X86_ISA_LEVEL 3
 
+
+/* ISA level >= 2 guaranteed includes.  */
+#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
+
 #define ISA_SHOULD_BUILD(isa_build_level)                              \
   (MINIMUM_X86_ISA_LEVEL <= (isa_build_level) && IS_IN (libc))         \
    || defined ISA_DEFAULT_IMPL
diff --git a/sysdeps/x86_64/multiarch/ifunc-evex.h b/sysdeps/x86_64/multiarch/ifunc-evex.h
index 856c6261f8..310cfd269f 100644
--- a/sysdeps/x86_64/multiarch/ifunc-evex.h
+++ b/sysdeps/x86_64/multiarch/ifunc-evex.h
@@ -37,7 +37,7 @@  IFUNC_SELECTOR (void)
   if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
       && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
       && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				      AVX_Fast_Unaligned_Load))
+				      AVX_Fast_Unaligned_Load, ))
     {
       if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
 	  && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
@@ -52,7 +52,7 @@  IFUNC_SELECTOR (void)
 	return OPTIMIZE (avx2_rtm);
 
       if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
-				       Prefer_No_VZEROUPPER))
+				       Prefer_No_VZEROUPPER, !))
 	return OPTIMIZE (avx2);
     }