Message ID | 20210817090554.92213-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | [i386] Add x86 tune to enable v2df vector reduction by paddpd. | expand |
On Tue, Aug 17, 2021 at 5:06 PM liuhongt <hongtao.liu@intel.com> wrote: > > Hi: > This patch add a new x86 tune named X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD > to enable haddpd for v2df vector reduction, the tune is disabled by default. > > Bootstrapped and regtested on x86_64-linux-gnu{-m32,} > Ok for trunk? > Pushed to trunk. > gcc/ChangeLog: > > PR target/97147 > * config/i386/i386.h (TARGET_V2DF_REDUCTION_PREFER_HADDPD): > New macro. > * config/i386/sse.md (*sse3_haddv2df3_low): Add > TARGET_V2DF_REDUCTION_PREFER_HADDPD. > (*sse3_hsubv2df3_low): Ditto. > * config/i386/x86-tune.def > (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD): New tune. > > gcc/testsuite/ChangeLog: > > PR target/97147 > * gcc.target/i386/pr54400.c: Adjust testcase. > * gcc.target/i386/pr94147.c: New test. > --- > gcc/config/i386/i386.h | 2 ++ > gcc/config/i386/sse.md | 4 ++-- > gcc/config/i386/x86-tune.def | 5 +++++ > gcc/testsuite/gcc.target/i386/pr54400.c | 2 +- > gcc/testsuite/gcc.target/i386/pr94147.c | 22 ++++++++++++++++++++++ > 5 files changed, 32 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr94147.c > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 21fe51bba40..b3e57a83846 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -418,6 +418,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; > ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] > #define TARGET_EXPAND_ABS \ > ix86_tune_features[X86_TUNE_EXPAND_ABS] > +#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \ > + ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD] > > /* Feature tests against the various architecture variations. */ > enum ix86_arch_indices { > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 27e25cc7952..13889687793 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -2771,7 +2771,7 @@ (define_insn "*sse3_haddv2df3_low" > (vec_select:DF > (match_dup 1) > (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))] > - "TARGET_SSE3 > + "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD > && INTVAL (operands[2]) != INTVAL (operands[3])" > "@ > haddpd\t{%0, %0|%0, %0} > @@ -2790,7 +2790,7 @@ (define_insn "*sse3_hsubv2df3_low" > (vec_select:DF > (match_dup 1) > (parallel [(const_int 1)]))))] > - "TARGET_SSE3" > + "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD" > "@ > hsubpd\t{%0, %0|%0, %0} > vhsubpd\t{%1, %1, %0|%0, %1, %1}" > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def > index eb057a67750..8f55da89c92 100644 > --- a/gcc/config/i386/x86-tune.def > +++ b/gcc/config/i386/x86-tune.def > @@ -452,6 +452,11 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER) > smaller FMA chain. */ > DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3) > > +/* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd > + for v2df vector reduction. */ > +DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, > + "v2df_reduction_prefer_haddpd", m_NONE) > + > /*****************************************************************************/ > /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ > /*****************************************************************************/ > diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c b/gcc/testsuite/gcc.target/i386/pr54400.c > index 5ed5ba06644..3a450376b9e 100644 > --- a/gcc/testsuite/gcc.target/i386/pr54400.c > +++ b/gcc/testsuite/gcc.target/i386/pr54400.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -msse3 -mfpmath=sse" } */ > +/* { dg-options "-O2 -msse3 -mfpmath=sse -mtune-ctrl=v2df_reduction_prefer_haddpd" } */ > > #include <x86intrin.h> > > diff --git a/gcc/testsuite/gcc.target/i386/pr94147.c b/gcc/testsuite/gcc.target/i386/pr94147.c > new file mode 100644 > index 00000000000..8ff5c34834f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr94147.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse3 -mfpmath=sse" } */ > + > +#include <x86intrin.h> > + > +double f (__m128d p) > +{ > + return p[0] - p[1]; > +} > + > +double g1 (__m128d p) > +{ > + return p[0] + p[1]; > +} > + > +double g2 (__m128d p) > +{ > + return p[1] + p[0]; > +} > + > +/* { dg-final { scan-assembler-not "hsubpd" } } */ > +/* { dg-final { scan-assembler-not "haddpd" } } */ > -- > 2.18.1 >
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 21fe51bba40..b3e57a83846 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -418,6 +418,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] #define TARGET_EXPAND_ABS \ ix86_tune_features[X86_TUNE_EXPAND_ABS] +#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \ + ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 27e25cc7952..13889687793 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2771,7 +2771,7 @@ (define_insn "*sse3_haddv2df3_low" (vec_select:DF (match_dup 1) (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))] - "TARGET_SSE3 + "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD && INTVAL (operands[2]) != INTVAL (operands[3])" "@ haddpd\t{%0, %0|%0, %0} @@ -2790,7 +2790,7 @@ (define_insn "*sse3_hsubv2df3_low" (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))))] - "TARGET_SSE3" + "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD" "@ hsubpd\t{%0, %0|%0, %0} vhsubpd\t{%1, %1, %0|%0, %1, %1}" diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index eb057a67750..8f55da89c92 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -452,6 +452,11 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER) smaller FMA chain. */ DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3) +/* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd + for v2df vector reduction. */ +DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, + "v2df_reduction_prefer_haddpd", m_NONE) + /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ diff --git a/gcc/testsuite/gcc.target/i386/pr54400.c b/gcc/testsuite/gcc.target/i386/pr54400.c index 5ed5ba06644..3a450376b9e 100644 --- a/gcc/testsuite/gcc.target/i386/pr54400.c +++ b/gcc/testsuite/gcc.target/i386/pr54400.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -msse3 -mfpmath=sse" } */ +/* { dg-options "-O2 -msse3 -mfpmath=sse -mtune-ctrl=v2df_reduction_prefer_haddpd" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/pr94147.c b/gcc/testsuite/gcc.target/i386/pr94147.c new file mode 100644 index 00000000000..8ff5c34834f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr94147.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse3 -mfpmath=sse" } */ + +#include <x86intrin.h> + +double f (__m128d p) +{ + return p[0] - p[1]; +} + +double g1 (__m128d p) +{ + return p[0] + p[1]; +} + +double g2 (__m128d p) +{ + return p[1] + p[0]; +} + +/* { dg-final { scan-assembler-not "hsubpd" } } */ +/* { dg-final { scan-assembler-not "haddpd" } } */