Message ID | 20240109104648.675293-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Optimize A < B ? A : B to MIN_EXPR. | expand |
On Tue, Jan 9, 2024 at 11:48 AM liuhongt <hongtao.liu@intel.com> wrote: > > > I wonder if you can amend the existing patterns instead by iterating > > over cond/vec_cond. There are quite some (look for uses of > > minmax_from_comparison) that could be adapted to vectors. > > > > The ones matching the simple form you match are > > > > #if GIMPLE > > /* A >= B ? A : B -> max (A, B) and friends. The code is still > > in fold_cond_expr_with_comparison for GENERIC folding with > > some extra constraints. */ > > (for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt) > > (simplify > > (cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1)) > > (convert3? @0) (convert4? @1)) > > (if (!HONOR_SIGNED_ZEROS (type) > > ... > This pattern is a conditional operation that treats a vector as a complete > unit, it's more like cbranchm which is different from vec_cond_expr. > So I add my patterns after this. > > > > I think. Consider at least placing the new patterns next to that. > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? OK. Richard. > Similar for A < B ? B : A to MAX_EXPR. > There're codes in the frontend to optimize such pattern but failed to > handle testcase in the PR since it's exposed at gimple level when > folding backend builtins. > > pr95906 now can be optimized to MAX_EXPR as it's commented in the > testcase. > > // FIXME: this should further optimize to a MAX_EXPR > typedef signed char v16i8 __attribute__((vector_size(16))); > v16i8 f(v16i8 a, v16i8 b) > > gcc/ChangeLog: > > PR target/104401 > * match.pd (VEC_COND_EXPR: A < B ? A : B -> MIN_EXPR): New patten match. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr104401.c: New test. > * gcc.dg/tree-ssa/pr95906.c: Adjust testcase. > --- > gcc/match.pd | 21 ++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr95906.c | 3 +-- > gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++ > 3 files changed, 49 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 7b4b15acc41..d8e2009a83f 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -5672,6 +5672,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (VECTOR_TYPE_P (type)) > (view_convert @c0) > (convert @c0)))))))) > + > +/* This is for VEC_COND_EXPR > + Optimize A < B ? A : B to MIN (A, B) > + A > B ? A : B to MAX (A, B). */ > +(for cmp (lt le ungt unge gt ge unlt unle) > + minmax (min min min min max max max max) > + MINMAX (MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR) > + (simplify > + (vec_cond (cmp @0 @1) @0 @1) > + (if (VECTOR_INTEGER_TYPE_P (type) > + && target_supports_op_p (type, MINMAX, optab_vector)) > + (minmax @0 @1)))) > + > +(for cmp (lt le ungt unge gt ge unlt unle) > + minmax (max max max max min min min min) > + MINMAX (MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR) > + (simplify > + (vec_cond (cmp @0 @1) @1 @0) > + (if (VECTOR_INTEGER_TYPE_P (type) > + && target_supports_op_p (type, MINMAX, optab_vector)) > + (minmax @0 @1)))) > #endif > > (for cnd (cond vec_cond) > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c > index 3d820a58e93..d15670f3e9e 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c > @@ -1,7 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */ > > -// FIXME: this should further optimize to a MAX_EXPR > typedef signed char v16i8 __attribute__((vector_size(16))); > v16i8 f(v16i8 a, v16i8 b) > { > @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b) > } > > /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */ > -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */ > +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c b/gcc/testsuite/gcc.target/i386/pr104401.c > new file mode 100644 > index 00000000000..8ce7ff88d9e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr104401.c > @@ -0,0 +1,27 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse4.1" } */ > +/* { dg-final { scan-assembler-times "pminsd" 2 } } */ > +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */ > + > +#include <smmintrin.h> > + > +__m128i min32(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); > +} > + > +__m128i max32(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input)); > +} > + > +__m128i min32_1(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value)); > +} > + > +__m128i max32_1(__m128i value, __m128i input) > +{ > + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value)); > +} > + > -- > 2.31.1 >
diff --git a/gcc/match.pd b/gcc/match.pd index 7b4b15acc41..d8e2009a83f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5672,6 +5672,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (VECTOR_TYPE_P (type)) (view_convert @c0) (convert @c0)))))))) + +/* This is for VEC_COND_EXPR + Optimize A < B ? A : B to MIN (A, B) + A > B ? A : B to MAX (A, B). */ +(for cmp (lt le ungt unge gt ge unlt unle) + minmax (min min min min max max max max) + MINMAX (MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR) + (simplify + (vec_cond (cmp @0 @1) @0 @1) + (if (VECTOR_INTEGER_TYPE_P (type) + && target_supports_op_p (type, MINMAX, optab_vector)) + (minmax @0 @1)))) + +(for cmp (lt le ungt unge gt ge unlt unle) + minmax (max max max max min min min min) + MINMAX (MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR) + (simplify + (vec_cond (cmp @0 @1) @1 @0) + (if (VECTOR_INTEGER_TYPE_P (type) + && target_supports_op_p (type, MINMAX, optab_vector)) + (minmax @0 @1)))) #endif (for cnd (cond vec_cond) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c index 3d820a58e93..d15670f3e9e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c @@ -1,7 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */ -// FIXME: this should further optimize to a MAX_EXPR typedef signed char v16i8 __attribute__((vector_size(16))); v16i8 f(v16i8 a, v16i8 b) { @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b) } /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */ -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */ +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c b/gcc/testsuite/gcc.target/i386/pr104401.c new file mode 100644 index 00000000000..8ce7ff88d9e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr104401.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ +/* { dg-final { scan-assembler-times "pminsd" 2 } } */ +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */ + +#include <smmintrin.h> + +__m128i min32(__m128i value, __m128i input) +{ + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); +} + +__m128i max32(__m128i value, __m128i input) +{ + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input)); +} + +__m128i min32_1(__m128i value, __m128i input) +{ + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value)); +} + +__m128i max32_1(__m128i value, __m128i input) +{ + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value)); +} +