Message ID | 20220628152628.17802-2-goldstein.w.n@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v1] x86: Add support for building strstr with explicit ISA level | expand |
On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > Small changes for this function as the generic implementation remains > the same for all ISA levels. > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P > macros so that some of the checks at least can constant evaluate > and some comments explaining the ISA constraints on the function. > --- > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------ > sysdeps/x86_64/multiarch/strstr.c | 10 +++++----- > 2 files changed, 12 insertions(+), 11 deletions(-) > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > index 0d28319905..a1bff560bc 100644 > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > /* Support sysdeps/x86_64/multiarch/strstr.c. */ > IFUNC_IMPL (i, name, strstr, > - IFUNC_IMPL_ADD (array, i, strstr, > - (CPU_FEATURE_USABLE (AVX512VL) > - && CPU_FEATURE_USABLE (AVX512BW) > - && CPU_FEATURE_USABLE (AVX512DQ) > - && CPU_FEATURE_USABLE (BMI2)), > - __strstr_avx512) > + /* All implementations of strstr are built at all ISA levels. */ > + IFUNC_IMPL_ADD (array, i, strstr, > + (CPU_FEATURE_USABLE (AVX512VL) > + && CPU_FEATURE_USABLE (AVX512BW) > + && CPU_FEATURE_USABLE (AVX512DQ) > + && CPU_FEATURE_USABLE (BMI2)), > + __strstr_avx512) > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic)) > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c > index 2b83199245..3f86bfa5f2 100644 > --- a/sysdeps/x86_64/multiarch/strstr.c > +++ b/sysdeps/x86_64/multiarch/strstr.c > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void) > const struct cpu_features *cpu_features = __get_cpu_features (); > > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > return __strstr_avx512; > > - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, )) Is Fast_Unaligned_Load set on all processors before? If not, we should revert /* Feature(s) enabled when ISA level >= 2. */ #define Fast_Unaligned_Load_X86_ISA_LEVEL 2 > return __strstr_sse2_unaligned; > > return __strstr_generic; > -- > 2.34.1 >
On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > Small changes for this function as the generic implementation remains > > the same for all ISA levels. > > > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P > > macros so that some of the checks at least can constant evaluate > > and some comments explaining the ISA constraints on the function. > > --- > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------ > > sysdeps/x86_64/multiarch/strstr.c | 10 +++++----- > > 2 files changed, 12 insertions(+), 11 deletions(-) > > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > index 0d28319905..a1bff560bc 100644 > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > /* Support sysdeps/x86_64/multiarch/strstr.c. */ > > IFUNC_IMPL (i, name, strstr, > > - IFUNC_IMPL_ADD (array, i, strstr, > > - (CPU_FEATURE_USABLE (AVX512VL) > > - && CPU_FEATURE_USABLE (AVX512BW) > > - && CPU_FEATURE_USABLE (AVX512DQ) > > - && CPU_FEATURE_USABLE (BMI2)), > > - __strstr_avx512) > > + /* All implementations of strstr are built at all ISA levels. */ > > + IFUNC_IMPL_ADD (array, i, strstr, > > + (CPU_FEATURE_USABLE (AVX512VL) > > + && CPU_FEATURE_USABLE (AVX512BW) > > + && CPU_FEATURE_USABLE (AVX512DQ) > > + && CPU_FEATURE_USABLE (BMI2)), > > + __strstr_avx512) > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic)) > > > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c > > index 2b83199245..3f86bfa5f2 100644 > > --- a/sysdeps/x86_64/multiarch/strstr.c > > +++ b/sysdeps/x86_64/multiarch/strstr.c > > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void) > > const struct cpu_features *cpu_features = __get_cpu_features (); > > > > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > return __strstr_avx512; > > > > - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, )) > > Is Fast_Unaligned_Load set on all processors before? If not, we should > revert It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that CPUs with slow sse42 can fallback on an unaligned sse2 implementation if it's available as opposed to the generic / often quite expensive aligned sse2 impl. Example in strcmp > > /* Feature(s) enabled when ISA level >= 2. */ > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2 > > > return __strstr_sse2_unaligned; > > > > return __strstr_generic; > > -- > > 2.34.1 > > > > > -- > H.J.
On Tue, Jun 28, 2022 at 11:24 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > Small changes for this function as the generic implementation remains > > > the same for all ISA levels. > > > > > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P > > > macros so that some of the checks at least can constant evaluate > > > and some comments explaining the ISA constraints on the function. > > > --- > > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------ > > > sysdeps/x86_64/multiarch/strstr.c | 10 +++++----- > > > 2 files changed, 12 insertions(+), 11 deletions(-) > > > > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > > index 0d28319905..a1bff560bc 100644 > > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > > > /* Support sysdeps/x86_64/multiarch/strstr.c. */ > > > IFUNC_IMPL (i, name, strstr, > > > - IFUNC_IMPL_ADD (array, i, strstr, > > > - (CPU_FEATURE_USABLE (AVX512VL) > > > - && CPU_FEATURE_USABLE (AVX512BW) > > > - && CPU_FEATURE_USABLE (AVX512DQ) > > > - && CPU_FEATURE_USABLE (BMI2)), > > > - __strstr_avx512) > > > + /* All implementations of strstr are built at all ISA levels. */ > > > + IFUNC_IMPL_ADD (array, i, strstr, > > > + (CPU_FEATURE_USABLE (AVX512VL) > > > + && CPU_FEATURE_USABLE (AVX512BW) > > > + && CPU_FEATURE_USABLE (AVX512DQ) > > > + && CPU_FEATURE_USABLE (BMI2)), > > > + __strstr_avx512) > > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) > > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic)) > > > > > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c > > > index 2b83199245..3f86bfa5f2 100644 > > > --- a/sysdeps/x86_64/multiarch/strstr.c > > > +++ b/sysdeps/x86_64/multiarch/strstr.c > > > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void) > > > const struct cpu_features *cpu_features = __get_cpu_features (); > > > > > > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512) > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > > return __strstr_avx512; > > > > > > - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, )) > > > > Is Fast_Unaligned_Load set on all processors before? If not, we should > > revert > > It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that > CPUs with slow sse42 can fallback on an unaligned sse2 implementation > if it's available as opposed to the generic / often quite expensive aligned > sse2 impl. Is Fast_Unaligned_Load set on Zhaoxin processors? > Example in strcmp > > > > > /* Feature(s) enabled when ISA level >= 2. */ > > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2 > > > > > return __strstr_sse2_unaligned; > > > > > > return __strstr_generic; > > > -- > > > 2.34.1 > > > > > > > > > -- > > H.J.
On Tue, Jun 28, 2022 at 11:35 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Tue, Jun 28, 2022 at 11:24 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > Small changes for this function as the generic implementation remains > > > > the same for all ISA levels. > > > > > > > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P > > > > macros so that some of the checks at least can constant evaluate > > > > and some comments explaining the ISA constraints on the function. > > > > --- > > > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------ > > > > sysdeps/x86_64/multiarch/strstr.c | 10 +++++----- > > > > 2 files changed, 12 insertions(+), 11 deletions(-) > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > > > index 0d28319905..a1bff560bc 100644 > > > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c > > > > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, > > > > > > > > /* Support sysdeps/x86_64/multiarch/strstr.c. */ > > > > IFUNC_IMPL (i, name, strstr, > > > > - IFUNC_IMPL_ADD (array, i, strstr, > > > > - (CPU_FEATURE_USABLE (AVX512VL) > > > > - && CPU_FEATURE_USABLE (AVX512BW) > > > > - && CPU_FEATURE_USABLE (AVX512DQ) > > > > - && CPU_FEATURE_USABLE (BMI2)), > > > > - __strstr_avx512) > > > > + /* All implementations of strstr are built at all ISA levels. */ > > > > + IFUNC_IMPL_ADD (array, i, strstr, > > > > + (CPU_FEATURE_USABLE (AVX512VL) > > > > + && CPU_FEATURE_USABLE (AVX512BW) > > > > + && CPU_FEATURE_USABLE (AVX512DQ) > > > > + && CPU_FEATURE_USABLE (BMI2)), > > > > + __strstr_avx512) > > > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) > > > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic)) > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c > > > > index 2b83199245..3f86bfa5f2 100644 > > > > --- a/sysdeps/x86_64/multiarch/strstr.c > > > > +++ b/sysdeps/x86_64/multiarch/strstr.c > > > > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void) > > > > const struct cpu_features *cpu_features = __get_cpu_features (); > > > > > > > > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512) > > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > > > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) > > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) > > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) > > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) > > > > return __strstr_avx512; > > > > > > > > - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) > > > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, )) > > > > > > Is Fast_Unaligned_Load set on all processors before? If not, we should > > > revert > > > > It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that > > CPUs with slow sse42 can fallback on an unaligned sse2 implementation > > if it's available as opposed to the generic / often quite expensive aligned > > sse2 impl. > > Is Fast_Unaligned_Load set on Zhaoxin processors? No, but I believe we agreed to treat that specially only for the functions where it really mattered. I.e for memcpy where this is a meaningful distinction we won't use the X86_ISA_CPU_FEATURE... checks, we will force a runtime check. > > > Example in strcmp > > > > > > > > /* Feature(s) enabled when ISA level >= 2. */ > > > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2 > > > > > > > return __strstr_sse2_unaligned; > > > > > > > > return __strstr_generic; > > > > -- > > > > 2.34.1 > > > > > > > > > > > > > -- > > > H.J. > > > > -- > H.J.
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 0d28319905..a1bff560bc 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strstr.c. */ IFUNC_IMPL (i, name, strstr, - IFUNC_IMPL_ADD (array, i, strstr, - (CPU_FEATURE_USABLE (AVX512VL) - && CPU_FEATURE_USABLE (AVX512BW) - && CPU_FEATURE_USABLE (AVX512DQ) - && CPU_FEATURE_USABLE (BMI2)), - __strstr_avx512) + /* All implementations of strstr are built at all ISA levels. */ + IFUNC_IMPL_ADD (array, i, strstr, + (CPU_FEATURE_USABLE (AVX512VL) + && CPU_FEATURE_USABLE (AVX512BW) + && CPU_FEATURE_USABLE (AVX512DQ) + && CPU_FEATURE_USABLE (BMI2)), + __strstr_avx512) IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic)) diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c index 2b83199245..3f86bfa5f2 100644 --- a/sysdeps/x86_64/multiarch/strstr.c +++ b/sysdeps/x86_64/multiarch/strstr.c @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void) const struct cpu_features *cpu_features = __get_cpu_features (); if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) - && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ) + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)) return __strstr_avx512; - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, )) return __strstr_sse2_unaligned; return __strstr_generic;