Message ID | 20211216055939.13323-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | [i386] Optimize bit_and op1 float_vector_all_ones_operands to op1. | expand |
On Thu, Dec 16, 2021 at 1:59 PM liuhongt <hongtao.liu@intel.com> wrote: > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? Pushed to trunk. > > gcc/ChangeLog: > > PR target/98468 > * config/i386/sse.md (*bit_and_float_vector_all_ones): New > pre-reload splitter. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr98468.c: New test. > --- > gcc/config/i386/sse.md | 12 +++++++++ > gcc/testsuite/gcc.target/i386/pr98468.c | 35 +++++++++++++++++++++++++ > 2 files changed, 47 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr98468.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 5421fb51684..a715263740b 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -4432,6 +4432,18 @@ (define_insn "*<code><mode>3<mask_name>" > (const_string "<sseinsnmode>") > (const_string "XI")))]) > > +;; Generic part doesn't support the simpliciation of logic operation with > +;; float_vector_all_ones_operand since it's not valid rtl. Add combine spiltter > +;; for them, it should be safe since there's no SIMD Floating-Point Exceptions. > +(define_insn_and_split "*bit_and_float_vector_all_ones" > + [(set (match_operand:VFB 0 "nonimmediate_operand") > + (and:VFB (match_operand:VFB 1 "nonimmediate_operand") > + (match_operand:VFB 2 "float_vector_all_ones_operand")))] > + "TARGET_SSE && ix86_pre_reload_split ()" > + "#" > + "&& 1" > + [(set (match_dup 0) (match_dup 1))]) > + > (define_expand "copysign<mode>3" > [(set (match_dup 4) > (and:VFB > diff --git a/gcc/testsuite/gcc.target/i386/pr98468.c b/gcc/testsuite/gcc.target/i386/pr98468.c > new file mode 100644 > index 00000000000..ca3aa256c52 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr98468.c > @@ -0,0 +1,35 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512dq -mavx512vl -O2" } */ > +/* { dg-final { scan-assembler-not "vxorp" } } */ > +/* { dg-final { scan-assembler-not "vandnp" } } */ > + > +#include<immintrin.h> > +__m128 f(__m128 val) > +{ > + return _mm_andnot_ps(_mm_set_ps1(0.0f), val); > +} > + > +__m256 f2(__m256 val) > +{ > + return _mm256_andnot_ps(_mm256_set1_ps(0.0f), val); > +} > + > +__m512 f3(__m512 val) > +{ > + return _mm512_andnot_ps(_mm512_set1_ps(0.0f), val); > +} > + > +__m128d f4(__m128d val) > +{ > + return _mm_andnot_pd(_mm_set_pd1(0.0), val); > +} > + > +__m256d f5(__m256d val) > +{ > + return _mm256_andnot_pd(_mm256_set1_pd(0.0), val); > +} > + > +__m512d f6(__m512d val) > +{ > + return _mm512_andnot_pd(_mm512_set1_pd(0.0), val); > +} > -- > 2.18.1 >
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5421fb51684..a715263740b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4432,6 +4432,18 @@ (define_insn "*<code><mode>3<mask_name>" (const_string "<sseinsnmode>") (const_string "XI")))]) +;; Generic part doesn't support the simpliciation of logic operation with +;; float_vector_all_ones_operand since it's not valid rtl. Add combine spiltter +;; for them, it should be safe since there's no SIMD Floating-Point Exceptions. +(define_insn_and_split "*bit_and_float_vector_all_ones" + [(set (match_operand:VFB 0 "nonimmediate_operand") + (and:VFB (match_operand:VFB 1 "nonimmediate_operand") + (match_operand:VFB 2 "float_vector_all_ones_operand")))] + "TARGET_SSE && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 1))]) + (define_expand "copysign<mode>3" [(set (match_dup 4) (and:VFB diff --git a/gcc/testsuite/gcc.target/i386/pr98468.c b/gcc/testsuite/gcc.target/i386/pr98468.c new file mode 100644 index 00000000000..ca3aa256c52 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98468.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-not "vxorp" } } */ +/* { dg-final { scan-assembler-not "vandnp" } } */ + +#include<immintrin.h> +__m128 f(__m128 val) +{ + return _mm_andnot_ps(_mm_set_ps1(0.0f), val); +} + +__m256 f2(__m256 val) +{ + return _mm256_andnot_ps(_mm256_set1_ps(0.0f), val); +} + +__m512 f3(__m512 val) +{ + return _mm512_andnot_ps(_mm512_set1_ps(0.0f), val); +} + +__m128d f4(__m128d val) +{ + return _mm_andnot_pd(_mm_set_pd1(0.0), val); +} + +__m256d f5(__m256d val) +{ + return _mm256_andnot_pd(_mm256_set1_pd(0.0), val); +} + +__m512d f6(__m512d val) +{ + return _mm512_andnot_pd(_mm512_set1_pd(0.0), val); +}