diff mbox series

[v1,3/3] x86: Add sse42 implementation to strcmp's ifunc

Message ID 20220615002533.1741934-3-goldstein.w.n@gmail.com
State New
Headers show
Series [v1,1/3] x86: Fix misordered logic for setting `rep_movsb_stop_threshold` | expand

Commit Message

Noah Goldstein June 15, 2022, 12:25 a.m. UTC
This has been missing since the the ifuncs where added.

The performance of SSE4.2 is preferable to to SSE2.

Measured on Tigerlake with N = 20 runs.
Geometric Mean of all benchmarks SSE4.2 / SSE2: 0.906
---
 sysdeps/x86_64/multiarch/strcmp.c | 5 +++++
 1 file changed, 5 insertions(+)

Comments

H.J. Lu June 15, 2022, 1:08 a.m. UTC | #1
On Tue, Jun 14, 2022 at 5:25 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> This has been missing since the the ifuncs where added.
>
> The performance of SSE4.2 is preferable to to SSE2.
>
> Measured on Tigerlake with N = 20 runs.
> Geometric Mean of all benchmarks SSE4.2 / SSE2: 0.906
> ---
>  sysdeps/x86_64/multiarch/strcmp.c | 5 +++++
>  1 file changed, 5 insertions(+)
>
> diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
> index a248c2a6e6..9c1677724c 100644
> --- a/sysdeps/x86_64/multiarch/strcmp.c
> +++ b/sysdeps/x86_64/multiarch/strcmp.c
> @@ -28,6 +28,7 @@
>
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
>  extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
> @@ -52,6 +53,10 @@ IFUNC_SELECTOR (void)
>         return OPTIMIZE (avx2);
>      }
>
> +  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
> +      && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
> +    return OPTIMIZE (sse42);
> +
>    if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
>      return OPTIMIZE (sse2_unaligned);
>
> --
> 2.34.1
>

LGTM.

Thanks.
Sunil Pandey July 14, 2022, 2:54 a.m. UTC | #2
On Tue, Jun 14, 2022 at 6:09 PM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Tue, Jun 14, 2022 at 5:25 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > This has been missing since the the ifuncs where added.
> >
> > The performance of SSE4.2 is preferable to to SSE2.
> >
> > Measured on Tigerlake with N = 20 runs.
> > Geometric Mean of all benchmarks SSE4.2 / SSE2: 0.906
> > ---
> >  sysdeps/x86_64/multiarch/strcmp.c | 5 +++++
> >  1 file changed, 5 insertions(+)
> >
> > diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
> > index a248c2a6e6..9c1677724c 100644
> > --- a/sysdeps/x86_64/multiarch/strcmp.c
> > +++ b/sysdeps/x86_64/multiarch/strcmp.c
> > @@ -28,6 +28,7 @@
> >
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
> > +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
> >  extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
> > @@ -52,6 +53,10 @@ IFUNC_SELECTOR (void)
> >         return OPTIMIZE (avx2);
> >      }
> >
> > +  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
> > +      && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
> > +    return OPTIMIZE (sse42);
> > +
> >    if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> >      return OPTIMIZE (sse2_unaligned);
> >
> > --
> > 2.34.1
> >
>
> LGTM.
>
> Thanks.
>
> --
> H.J.

I would like to backport this patch to release branches.
Any comments or objections?

--Sunil
diff mbox series

Patch

diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
index a248c2a6e6..9c1677724c 100644
--- a/sysdeps/x86_64/multiarch/strcmp.c
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -28,6 +28,7 @@ 
 
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
@@ -52,6 +53,10 @@  IFUNC_SELECTOR (void)
 	return OPTIMIZE (avx2);
     }
 
+  if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+      && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
+    return OPTIMIZE (sse42);
+
   if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
     return OPTIMIZE (sse2_unaligned);