Message ID | 20240813043023.3685386-2-quic_apinski@quicinc.com |
---|---|
State | New |
Headers | show |
Series | [1/3] testsuite: Add testcases for part of PR 103660 | expand |
On Tue, Aug 13, 2024 at 6:31 AM Andrew Pinski <quic_apinski@quicinc.com> wrote: > > r13-4620-g4d9db4bdd458 Added a few patterns and some of them can be extended to support XOR and PLUS. > This extends the patterns to support XOR and PLUS instead of just IOR. > > Bootstrapped and tested on x86_64-linux-gnu. OK. > PR tree-optimization/103660 > > gcc/ChangeLog: > > * match.pd (`((a CMP b) ? c : 0) | ((a CMP' b) ? d : 0)`): Extend to support > XOR and PLUS. > > gcc/testsuite/ChangeLog: > > * g++.dg/tree-ssa/pr103660-2.C: New test. > * g++.dg/tree-ssa/pr103660-3.C: New test. > * gcc.dg/tree-ssa/pr103660-2.c: New test. > * gcc.dg/tree-ssa/pr103660-3.c: New test. > > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com> > --- > gcc/match.pd | 42 +++++++++++--------- > gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C | 30 +++++++++++++++ > gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C | 30 +++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c | 45 ++++++++++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c | 35 +++++++++++++++++ > 5 files changed, 163 insertions(+), 19 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index c9c8478d286..b43ceb6def0 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -2356,18 +2356,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > /* Fold ((-(a < b) & c) | (-(a >= b) & d)) into a < b ? c : d. This is > canonicalized further and we recognize the conditional form: > - (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. */ > - (simplify > - (bit_ior > - (cond (cmp@0 @01 @02) @3 zerop) > - (cond (icmp@4 @01 @02) @5 zerop)) > - (if (INTEGRAL_TYPE_P (type) > - && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp > - /* The scalar version has to be canonicalized after vectorization > - because it makes unconditional loads conditional ones, which > - means we lose vectorization because the loads may trap. */ > - && canonicalize_math_after_vectorization_p ()) > - (cond @0 @3 @5))) > + (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. > + Handle also ^ and + in replacement of `|`. */ > + (for op (bit_ior bit_xor plus) > + (simplify > + (op > + (cond (cmp@0 @01 @02) @3 zerop) > + (cond (icmp@4 @01 @02) @5 zerop)) > + (if (INTEGRAL_TYPE_P (type) > + && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp > + /* The scalar version has to be canonicalized after vectorization > + because it makes unconditional loads conditional ones, which > + means we lose vectorization because the loads may trap. */ > + && canonicalize_math_after_vectorization_p ()) > + (cond @0 @3 @5)))) > > /* Vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d. > and ((~(a < b) & c) | (~(a >= b) & d)) into a < b ? c : d. */ > @@ -2391,13 +2393,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (vec_cond @0 @3 @2)))))) > > /* Scalar Vectorized Fold ((-(a < b) & c) | (-(a >= b) & d)) > - into a < b ? d : c. */ > - (simplify > - (bit_ior > - (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) > - (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) > - (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) > - (vec_cond @0 @2 @3)))) > + into a < b ? d : c. > + Handle also ^ and + in replacement of `|`. */ > + (for op (bit_ior bit_xor plus) > + (simplify > + (op > + (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) > + (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) > + (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) > + (vec_cond @0 @2 @3))))) > > /* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ > (simplify > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C > new file mode 100644 > index 00000000000..95205c02bc3 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C > @@ -0,0 +1,30 @@ > +/* PR tree-optimization/103660 */ > +/* Vector type version. */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ > + > +typedef int v4si __attribute((__vector_size__(4 * sizeof(int)))); > +#define funcs(OP,n) \ > +v4si min_##n(v4si a, v4si b) { \ > + v4si X = a < b ? a : 0; \ > + v4si Y = a >= b ? b : 0; \ > + return (X OP Y); \ > +} \ > +v4si f_##n(v4si a, v4si b, \ > + v4si c, v4si d) { \ > + v4si X = a < b ? c : 0; \ > + v4si Y = a >= b ? d : 0; \ > + return (X OP Y); \ > +} > + > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> or `a < b ? a : b` depending on if the target > + supports min on the vector type or not. */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), " 4 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 4 "forwprop1" } } */ > diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C > new file mode 100644 > index 00000000000..0800ad8e90e > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C > @@ -0,0 +1,30 @@ > +/* PR tree-optimization/103660 */ > +/* Vector type version. */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ > + > +typedef int v4si __attribute((__vector_size__(4 * sizeof(int)))); > +#define funcs(OP,n) \ > +v4si min_##n(v4si a, v4si b) { \ > + v4si X = -(a < b) * a; \ > + v4si Y = -(a >= b) * b; \ > + return (X OP Y); \ > +} \ > +v4si f_##n(v4si a, v4si b, \ > + v4si c, v4si d) { \ > + v4si X = -(a < b) * c; \ > + v4si Y = -(a >= b) * d; \ > + return (X OP Y); \ > +} > + > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> or `a < b ? a : b` depending on if the target > + supports min on the vector type or not. */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), " 4 "forwprop1" } } */ > +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 4 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c > new file mode 100644 > index 00000000000..ce4da00a888 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c > @@ -0,0 +1,45 @@ > +/* PR tree-optimization/103660 */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fgimple -fdump-tree-forwprop4-raw" } */ > + > +#define funcs(OP,n) \ > +__GIMPLE() \ > +int min_##n(int a, int b) { \ > + _Bool X; \ > + _Bool Y; \ > + int t; \ > + int t1; \ > + int t2; \ > + X = a < b; \ > + Y = a >= b; \ > + t1 = X ? a : 0; \ > + t2 = Y ? b : 0; \ > + t = t1 OP t2; \ > + return t; \ > +} \ > +__GIMPLE() \ > +int f_##n(int a, int b, int c, \ > + int d) { \ > + _Bool X; \ > + _Bool Y; \ > + int t; \ > + int t1; \ > + int t2; \ > + X = a < b; \ > + Y = a >= b; \ > + t1 = X ? c : 0; \ > + t2 = Y ? d : 0; \ > + t = t1 OP t2; \ > + return t; \ > +} > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "min_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "lt_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "cond_expr, " 2 "forwprop4" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c > new file mode 100644 > index 00000000000..bd770b1b6d7 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c > @@ -0,0 +1,35 @@ > +/* PR tree-optimization/103660 */ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-forwprop4-raw" } */ > + > +#define funcs(OP,n) \ > +int min_##n(int a, int b) { \ > + int t; \ > + int t1; \ > + int t2; \ > + t1 = (a < b) * a; \ > + t2 = (a >= b) * b; \ > + t = t1 OP t2; \ > + return t; \ > +} \ > +int f_##n(int a, int b, int c, \ > + int d) { \ > + int t; \ > + int t1; \ > + int t2; \ > + t1 = (a < b) * c; \ > + t2 = (a >= b) * d; \ > + t = t1 OP t2; \ > + return t; \ > +} > + > +funcs(^, xor) > +funcs(+, plus) > + > +/* min_xor/min_plus should produce min<a,b> */ > +/* f_xor/f_plus should produce (a < b) ? c : d */ > +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "min_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "lt_expr, " 2 "forwprop4" } } */ > +/* { dg-final { scan-tree-dump-times "cond_expr, " 2 "forwprop4" } } */ > -- > 2.43.0 >
diff --git a/gcc/match.pd b/gcc/match.pd index c9c8478d286..b43ceb6def0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2356,18 +2356,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Fold ((-(a < b) & c) | (-(a >= b) & d)) into a < b ? c : d. This is canonicalized further and we recognize the conditional form: - (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. */ - (simplify - (bit_ior - (cond (cmp@0 @01 @02) @3 zerop) - (cond (icmp@4 @01 @02) @5 zerop)) - (if (INTEGRAL_TYPE_P (type) - && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp - /* The scalar version has to be canonicalized after vectorization - because it makes unconditional loads conditional ones, which - means we lose vectorization because the loads may trap. */ - && canonicalize_math_after_vectorization_p ()) - (cond @0 @3 @5))) + (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d. + Handle also ^ and + in replacement of `|`. */ + (for op (bit_ior bit_xor plus) + (simplify + (op + (cond (cmp@0 @01 @02) @3 zerop) + (cond (icmp@4 @01 @02) @5 zerop)) + (if (INTEGRAL_TYPE_P (type) + && invert_tree_comparison (cmp, HONOR_NANS (@01)) == icmp + /* The scalar version has to be canonicalized after vectorization + because it makes unconditional loads conditional ones, which + means we lose vectorization because the loads may trap. */ + && canonicalize_math_after_vectorization_p ()) + (cond @0 @3 @5)))) /* Vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d. and ((~(a < b) & c) | (~(a >= b) & d)) into a < b ? c : d. */ @@ -2391,13 +2393,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (vec_cond @0 @3 @2)))))) /* Scalar Vectorized Fold ((-(a < b) & c) | (-(a >= b) & d)) - into a < b ? d : c. */ - (simplify - (bit_ior - (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) - (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) - (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) - (vec_cond @0 @2 @3)))) + into a < b ? d : c. + Handle also ^ and + in replacement of `|`. */ + (for op (bit_ior bit_xor plus) + (simplify + (op + (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop) + (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop)) + (if (invert_tree_comparison (cmp, HONOR_NANS (@4)) == icmp) + (vec_cond @0 @2 @3))))) /* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ (simplify diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C new file mode 100644 index 00000000000..95205c02bc3 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C @@ -0,0 +1,30 @@ +/* PR tree-optimization/103660 */ +/* Vector type version. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ + +typedef int v4si __attribute((__vector_size__(4 * sizeof(int)))); +#define funcs(OP,n) \ +v4si min_##n(v4si a, v4si b) { \ + v4si X = a < b ? a : 0; \ + v4si Y = a >= b ? b : 0; \ + return (X OP Y); \ +} \ +v4si f_##n(v4si a, v4si b, \ + v4si c, v4si d) { \ + v4si X = a < b ? c : 0; \ + v4si Y = a >= b ? d : 0; \ + return (X OP Y); \ +} + + +funcs(^, xor) +funcs(+, plus) + +/* min_xor/min_plus should produce min<a,b> or `a < b ? a : b` depending on if the target + supports min on the vector type or not. */ +/* f_xor/f_plus should produce (a < b) ? c : d */ +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop1" } } */ +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), " 4 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 4 "forwprop1" } } */ diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C b/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C new file mode 100644 index 00000000000..0800ad8e90e --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C @@ -0,0 +1,30 @@ +/* PR tree-optimization/103660 */ +/* Vector type version. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop1-raw -Wno-psabi" } */ + +typedef int v4si __attribute((__vector_size__(4 * sizeof(int)))); +#define funcs(OP,n) \ +v4si min_##n(v4si a, v4si b) { \ + v4si X = -(a < b) * a; \ + v4si Y = -(a >= b) * b; \ + return (X OP Y); \ +} \ +v4si f_##n(v4si a, v4si b, \ + v4si c, v4si d) { \ + v4si X = -(a < b) * c; \ + v4si Y = -(a >= b) * d; \ + return (X OP Y); \ +} + + +funcs(^, xor) +funcs(+, plus) + +/* min_xor/min_plus should produce min<a,b> or `a < b ? a : b` depending on if the target + supports min on the vector type or not. */ +/* f_xor/f_plus should produce (a < b) ? c : d */ +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop1" } } */ +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:lt_expr|min_expr), " 4 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "(?:vec_cond_expr|min_expr), " 4 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c new file mode 100644 index 00000000000..ce4da00a888 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c @@ -0,0 +1,45 @@ +/* PR tree-optimization/103660 */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fgimple -fdump-tree-forwprop4-raw" } */ + +#define funcs(OP,n) \ +__GIMPLE() \ +int min_##n(int a, int b) { \ + _Bool X; \ + _Bool Y; \ + int t; \ + int t1; \ + int t2; \ + X = a < b; \ + Y = a >= b; \ + t1 = X ? a : 0; \ + t2 = Y ? b : 0; \ + t = t1 OP t2; \ + return t; \ +} \ +__GIMPLE() \ +int f_##n(int a, int b, int c, \ + int d) { \ + _Bool X; \ + _Bool Y; \ + int t; \ + int t1; \ + int t2; \ + X = a < b; \ + Y = a >= b; \ + t1 = X ? c : 0; \ + t2 = Y ? d : 0; \ + t = t1 OP t2; \ + return t; \ +} + +funcs(^, xor) +funcs(+, plus) + +/* min_xor/min_plus should produce min<a,b> */ +/* f_xor/f_plus should produce (a < b) ? c : d */ +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop4" } } */ +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop4" } } */ +/* { dg-final { scan-tree-dump-times "min_expr, " 2 "forwprop4" } } */ +/* { dg-final { scan-tree-dump-times "lt_expr, " 2 "forwprop4" } } */ +/* { dg-final { scan-tree-dump-times "cond_expr, " 2 "forwprop4" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c new file mode 100644 index 00000000000..bd770b1b6d7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c @@ -0,0 +1,35 @@ +/* PR tree-optimization/103660 */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-forwprop4-raw" } */ + +#define funcs(OP,n) \ +int min_##n(int a, int b) { \ + int t; \ + int t1; \ + int t2; \ + t1 = (a < b) * a; \ + t2 = (a >= b) * b; \ + t = t1 OP t2; \ + return t; \ +} \ +int f_##n(int a, int b, int c, \ + int d) { \ + int t; \ + int t1; \ + int t2; \ + t1 = (a < b) * c; \ + t2 = (a >= b) * d; \ + t = t1 OP t2; \ + return t; \ +} + +funcs(^, xor) +funcs(+, plus) + +/* min_xor/min_plus should produce min<a,b> */ +/* f_xor/f_plus should produce (a < b) ? c : d */ +/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop4" } } */ +/* { dg-final { scan-tree-dump-not "plus_expr, " "forwprop4" } } */ +/* { dg-final { scan-tree-dump-times "min_expr, " 2 "forwprop4" } } */ +/* { dg-final { scan-tree-dump-times "lt_expr, " 2 "forwprop4" } } */ +/* { dg-final { scan-tree-dump-times "cond_expr, " 2 "forwprop4" } } */
r13-4620-g4d9db4bdd458 Added a few patterns and some of them can be extended to support XOR and PLUS. This extends the patterns to support XOR and PLUS instead of just IOR. Bootstrapped and tested on x86_64-linux-gnu. PR tree-optimization/103660 gcc/ChangeLog: * match.pd (`((a CMP b) ? c : 0) | ((a CMP' b) ? d : 0)`): Extend to support XOR and PLUS. gcc/testsuite/ChangeLog: * g++.dg/tree-ssa/pr103660-2.C: New test. * g++.dg/tree-ssa/pr103660-3.C: New test. * gcc.dg/tree-ssa/pr103660-2.c: New test. * gcc.dg/tree-ssa/pr103660-3.c: New test. Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com> --- gcc/match.pd | 42 +++++++++++--------- gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C | 30 +++++++++++++++ gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C | 30 +++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c | 45 ++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c | 35 +++++++++++++++++ 5 files changed, 163 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr103660-2.C create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr103660-3.C create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103660-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103660-3.c