diff mbox series

[i386] Add x86 tune to enable v2df vector reduction by paddpd.

Message ID 20210817090554.92213-1-hongtao.liu@intel.com
State New
Headers show
Series [i386] Add x86 tune to enable v2df vector reduction by paddpd. | expand

Commit Message

liuhongt Aug. 17, 2021, 9:05 a.m. UTC
Hi:
  This patch add a new x86 tune named X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD
to enable haddpd for v2df vector reduction, the tune is disabled by default.

  Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
  Ok for trunk?

gcc/ChangeLog:

	PR target/97147
	* config/i386/i386.h (TARGET_V2DF_REDUCTION_PREFER_HADDPD):
	New macro.
	* config/i386/sse.md (*sse3_haddv2df3_low): Add
	TARGET_V2DF_REDUCTION_PREFER_HADDPD.
	(*sse3_hsubv2df3_low): Ditto.
	* config/i386/x86-tune.def
	(X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD): New tune.

gcc/testsuite/ChangeLog:

	PR target/97147
	* gcc.target/i386/pr54400.c: Adjust testcase.
	* gcc.target/i386/pr94147.c: New test.
---
 gcc/config/i386/i386.h                  |  2 ++
 gcc/config/i386/sse.md                  |  4 ++--
 gcc/config/i386/x86-tune.def            |  5 +++++
 gcc/testsuite/gcc.target/i386/pr54400.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr94147.c | 22 ++++++++++++++++++++++
 5 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr94147.c

Comments

Hongtao Liu Aug. 18, 2021, 3:35 a.m. UTC | #1
On Tue, Aug 17, 2021 at 5:06 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> Hi:
>   This patch add a new x86 tune named X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD
> to enable haddpd for v2df vector reduction, the tune is disabled by default.
>
>   Bootstrapped and regtested on x86_64-linux-gnu{-m32,}
>   Ok for trunk?
>
Pushed to trunk.
> gcc/ChangeLog:
>
>         PR target/97147
>         * config/i386/i386.h (TARGET_V2DF_REDUCTION_PREFER_HADDPD):
>         New macro.
>         * config/i386/sse.md (*sse3_haddv2df3_low): Add
>         TARGET_V2DF_REDUCTION_PREFER_HADDPD.
>         (*sse3_hsubv2df3_low): Ditto.
>         * config/i386/x86-tune.def
>         (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD): New tune.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/97147
>         * gcc.target/i386/pr54400.c: Adjust testcase.
>         * gcc.target/i386/pr94147.c: New test.
> ---
>  gcc/config/i386/i386.h                  |  2 ++
>  gcc/config/i386/sse.md                  |  4 ++--
>  gcc/config/i386/x86-tune.def            |  5 +++++
>  gcc/testsuite/gcc.target/i386/pr54400.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr94147.c | 22 ++++++++++++++++++++++
>  5 files changed, 32 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr94147.c
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 21fe51bba40..b3e57a83846 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -418,6 +418,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>         ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
>  #define TARGET_EXPAND_ABS \
>         ix86_tune_features[X86_TUNE_EXPAND_ABS]
> +#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \
> +       ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD]
>
>  /* Feature tests against the various architecture variations.  */
>  enum ix86_arch_indices {
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 27e25cc7952..13889687793 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -2771,7 +2771,7 @@ (define_insn "*sse3_haddv2df3_low"
>           (vec_select:DF
>             (match_dup 1)
>             (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
> -  "TARGET_SSE3
> +  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD
>     && INTVAL (operands[2]) != INTVAL (operands[3])"
>    "@
>     haddpd\t{%0, %0|%0, %0}
> @@ -2790,7 +2790,7 @@ (define_insn "*sse3_hsubv2df3_low"
>           (vec_select:DF
>             (match_dup 1)
>             (parallel [(const_int 1)]))))]
> -  "TARGET_SSE3"
> +  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD"
>    "@
>     hsubpd\t{%0, %0|%0, %0}
>     vhsubpd\t{%1, %1, %0|%0, %1, %1}"
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index eb057a67750..8f55da89c92 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -452,6 +452,11 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER)
>     smaller FMA chain.  */
>  DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3)
>
> +/* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd
> +   for v2df vector reduction.  */
> +DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
> +         "v2df_reduction_prefer_haddpd", m_NONE)
> +
>  /*****************************************************************************/
>  /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
>  /*****************************************************************************/
> diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c b/gcc/testsuite/gcc.target/i386/pr54400.c
> index 5ed5ba06644..3a450376b9e 100644
> --- a/gcc/testsuite/gcc.target/i386/pr54400.c
> +++ b/gcc/testsuite/gcc.target/i386/pr54400.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
> +/* { dg-options "-O2 -msse3 -mfpmath=sse -mtune-ctrl=v2df_reduction_prefer_haddpd" } */
>
>  #include <x86intrin.h>
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr94147.c b/gcc/testsuite/gcc.target/i386/pr94147.c
> new file mode 100644
> index 00000000000..8ff5c34834f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr94147.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
> +
> +#include <x86intrin.h>
> +
> +double f (__m128d p)
> +{
> +  return p[0] - p[1];
> +}
> +
> +double g1 (__m128d p)
> +{
> +  return p[0] + p[1];
> +}
> +
> +double g2 (__m128d p)
> +{
> +  return p[1] + p[0];
> +}
> +
> +/* { dg-final { scan-assembler-not "hsubpd" } } */
> +/* { dg-final { scan-assembler-not "haddpd" } } */
> --
> 2.18.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 21fe51bba40..b3e57a83846 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -418,6 +418,8 @@  extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
 #define TARGET_EXPAND_ABS \
 	ix86_tune_features[X86_TUNE_EXPAND_ABS]
+#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \
+	ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 27e25cc7952..13889687793 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2771,7 +2771,7 @@  (define_insn "*sse3_haddv2df3_low"
 	  (vec_select:DF
 	    (match_dup 1)
 	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
-  "TARGET_SSE3
+  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD
    && INTVAL (operands[2]) != INTVAL (operands[3])"
   "@
    haddpd\t{%0, %0|%0, %0}
@@ -2790,7 +2790,7 @@  (define_insn "*sse3_hsubv2df3_low"
 	  (vec_select:DF
 	    (match_dup 1)
 	    (parallel [(const_int 1)]))))]
-  "TARGET_SSE3"
+  "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD"
   "@
    hsubpd\t{%0, %0|%0, %0}
    vhsubpd\t{%1, %1, %0|%0, %1, %1}"
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index eb057a67750..8f55da89c92 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -452,6 +452,11 @@  DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER)
    smaller FMA chain.  */
 DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3)
 
+/* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd
+   for v2df vector reduction.  */
+DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
+	  "v2df_reduction_prefer_haddpd", m_NONE)
+
 /*****************************************************************************/
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
 /*****************************************************************************/
diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c b/gcc/testsuite/gcc.target/i386/pr54400.c
index 5ed5ba06644..3a450376b9e 100644
--- a/gcc/testsuite/gcc.target/i386/pr54400.c
+++ b/gcc/testsuite/gcc.target/i386/pr54400.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse -mtune-ctrl=v2df_reduction_prefer_haddpd" } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/pr94147.c b/gcc/testsuite/gcc.target/i386/pr94147.c
new file mode 100644
index 00000000000..8ff5c34834f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94147.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse3 -mfpmath=sse" } */
+
+#include <x86intrin.h>
+
+double f (__m128d p)
+{
+  return p[0] - p[1];
+}
+
+double g1 (__m128d p)
+{
+  return p[0] + p[1];
+}
+
+double g2 (__m128d p)
+{
+  return p[1] + p[0];
+}
+
+/* { dg-final { scan-assembler-not "hsubpd" } } */
+/* { dg-final { scan-assembler-not "haddpd" } } */