Message ID | patch-16259-tamar@arm.com |
---|---|
State | New |
Headers | show |
Series | middle-end Recognize more conditional comparisons idioms. | expand |
On Fri, 23 Sep 2022, Tamar Christina wrote: > Hi All, > > GCC currently recognizes some of these for signed but not unsigned types. > It also has trouble dealing with casts in between because these are handled > by the fold machinery. > > This moves the pattern detection to match.pd instead. where's the other copy and is it possible to remove it with this patch? > We fold e.g.: > > uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > uint32_t result; > uint32_t m = (a >= b) - 1; > result = (c & m) | (d & ~m); > return result; > } > > into a >= b ? c : d for all integral types. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * match.pd: New cond select pattern. > > gcc/testsuite/ChangeLog: > > * gcc.dg/select_cond_1.c: New test. > * gcc.dg/select_cond_2.c: New test. > * gcc.dg/select_cond_3.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/match.pd b/gcc/match.pd > index 39da61bf117a6eb2924fc8a6473fb37ddadd60e9..7b8f50410acfd0afafc5606e972cfc4e125d3a5d 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3577,6 +3577,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1) > (max @2 @1)) > > +/* (a & ((c `op` d) - 1)) | (b & ~((c `op` d) - 1)) -> c `op` d ? a : b. */ > +(for op (simple_comparison) > + (simplify > + (bit_xor:c > + (convert2? @0) > + (bit_and:c > + (convert2? (bit_xor:c @1 @0)) > + (convert3? (negate (convert? (op@4 @2 @3)))))) I have a hard time matching this up with the comment above. It looks like (T2)@0 ^ ((T2)(@1 ^ @0) & (T3)(-(T)(@2 <=> @3)) I suppose we've canonicalized (a & x) | (b & ~x) somehow. It also looks like we move coversions inside a negate but not outside or into a bit_and? Can we do a better job here to avoid the explosions in the number of patterns? > + /* Alternative form, where some canonicalization were not done due to the > + arguments being signed. */ these two comment lines belong ... > + (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4)) > + (convert:type (cond @4 @1 @0)))) ... here? > + (simplify > + (bit_ior:c > + (mult:c @0 (convert (convert2? (op@4 @2 @3)))) > + (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 @2 @3)))))) can you add a comment with what you match here as well? You are using (op@4 @2 @3) twice, the point of the @4 capture is that the second occurance could be just @4. I wonder how we arrived at the multiplication here? That seems somewhat premature :/ Thanks, Richard. > + (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4)) > + (cond @4 @0 @1)))) > + > /* Simplifications of shift and rotates. */ > > (for rotate (lrotate rrotate) > diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c > new file mode 100644 > index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/select_cond_1.c > @@ -0,0 +1,97 @@ > +/* { dg-do run } */ > +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ > + > +#include <stdint.h> > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a <= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a > b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a < b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a == b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a != b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */ > + > +extern void abort (); > + > +int main () { > + > + if (min1_32u (3, 5, 7 , 8) != 7) > + abort (); > + > + if (max1_32u (3, 5, 7 , 8) != 8) > + abort (); > + > + if (min1_32u (5, 3, 7 , 8) != 8) > + abort (); > + > + if (max1_32u (5, 3, 7 , 8) != 7) > + abort (); > + > + if (min2_32u (3, 5, 7 , 8) != 7) > + abort (); > + > + if (max2_32u (3, 5, 7 , 8) != 8) > + abort (); > + > + if (min2_32u (5, 3, 7 , 8) != 8) > + abort (); > + > + if (max2_32u (5, 3, 7 , 8) != 7) > + abort (); > + > + if (min3_32u (3, 5, 7 , 8) != 7) > + abort (); > + > + if (max3_32u (3, 5, 7 , 8) != 8) > + abort (); > + > + if (min3_32u (5, 3, 7 , 8) != 7) > + abort (); > + > + if (max3_32u (5, 3, 7 , 8) != 8) > + abort (); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c > new file mode 100644 > index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/select_cond_2.c > @@ -0,0 +1,99 @@ > +/* { dg-do run } */ > +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ > + > +#include <stdint.h> > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max1_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a <= b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min2_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a > b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max2_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a < b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min3_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a == b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max3_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a != b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */ > + > +extern void abort (); > + > +int main () { > + > + if (min1_32u (7 , 8) != 7) > + abort (); > + > + if (max1_32u (7 , 8) != 8) > + abort (); > + > + if (min1_32u (7 , 8) != 7) > + abort (); > + > + if (max1_32u (7, 8) != 8) > + abort (); > + > + if (min2_32u (7 , 8) != 7) > + abort (); > + > + if (max2_32u (7 , 8) != 8) > + abort (); > + > + if (min2_32u (7 , 8) != 7) > + abort (); > + > + if (max2_32u (7 , 8) != 8) > + abort (); > + > + if (min3_32u (7 , 8) != 7) > + abort (); > + > + if (max3_32u (7 , 8) != 8) > + abort (); > + > + if (min3_32u (7 , 8) != 7) > + abort (); > + > + if (max3_32u (7 , 8) != 8) > + abort (); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c > new file mode 100644 > index 0000000000000000000000000000000000000000..754e73b121ce360564481b1fa7e1dd4fb27642a3 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/select_cond_3.c > @@ -0,0 +1,101 @@ > +/* { dg-do run } */ > +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ > + > +#include <stdint.h> > + > +__attribute__((noipa, noinline)) > +int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + int32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + int32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + int32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c[^ ]+ : d[^ ]+;} 8 "optimized" } } */ > + > +extern void abort (); > + > +int main () { > + > + if (min1_32u1 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u2 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u3 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u4 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u5 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u6 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u7 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u8 (3, 5, 7 , 8) != 7) > + abort (); > + > + return 0; > +} > > > > >
> > This moves the pattern detection to match.pd instead. > > where's the other copy and is it possible to remove it with this patch? > It looks like it's spread over various passes. Starting with forwardprop. > > > + (simplify > > + (bit_ior:c > > + (mult:c @0 (convert (convert2? (op@4 @2 @3)))) > > + (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 > > + @2 @3)))))) > > can you add a comment with what you match here as well? You are using > (op@4 @2 @3) twice, the point of the @4 capture is that the second > occurance could be just @4. I wonder how we arrived at the multiplication > here? That seems somewhat premature :/ That's being done by forwardprop. To remove the various converts I broke down the operations into several Smaller steps that on their own are sound optimizations. When taken all together it reduced the versions with casts down to the final optimized version. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * match.pd: New cond select pattern. (inverted_simple_comparison): New. gcc/testsuite/ChangeLog: * gcc.dg/select_cond_1.c: New test. * gcc.dg/select_cond_2.c: New test. * gcc.dg/select_cond_3.c: New test. --- inline copy of patch --- diff --git a/gcc/match.pd b/gcc/match.pd index 45f8941396ffd843f1bb23ab638c3b8c6d0d54b6..84d931c5d559ef1a605ea1121fbe655fcf870195 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -51,8 +51,9 @@ along with GCC; see the file COPYING3. If not see unge ungt ne eq unlt unle ordered unordered ge gt le lt ltgt uneq) (define_operator_list swapped_tcc_comparison gt ge eq ne le lt unordered ordered ungt unge unlt unle uneq ltgt) -(define_operator_list simple_comparison lt le eq ne ge gt) -(define_operator_list swapped_simple_comparison gt ge eq ne le lt) +(define_operator_list simple_comparison lt le eq ne ge gt) +(define_operator_list inverted_simple_comparison ge gt ne eq lt le) +(define_operator_list swapped_simple_comparison gt ge eq ne le lt) #include "cfn-operators.pd" @@ -2081,7 +2082,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (!canonicalize_math_p ()) (for cmp (gt lt ge le) (simplify - (mult (convert (cmp @0 @1)) @2) + (mult:c (convert (cmp @0 @1)) @2) (cond (cmp @0 @1) @2 { build_zero_cst (type); })))) /* For integral types with undefined overflow and C != 0 fold @@ -3551,6 +3552,63 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1) (max @2 @1)) +/* (a & ((c `cmp` d) - 1)) | (b & ~((c `cmp` d) - 1)) -> c `cmp` d ? a : b. + by matching the canonicanized xor form. This relies on: 0 ^ a == a and + (a ^ b) ^ a == b. So (((a ^ b) & -((unsigned T)(c `cmp` d))) ^ b) into + c `cmp` d ? a : b. */ +(for cmp (simple_comparison) + icmp (inverted_simple_comparison) + (simplify + (bit_xor:c + @0 + (bit_and:c + (bit_xor:c @1 @0) + (negate (cmp@4 @2 @3)))) + (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4)) + (convert:type (cond @4 @1 @0)))) + /* ((unsigned T)(((signed T) (a `cmp` b)) + -1)) -> a `icmp` b ? -1 : 0. */ + (simplify + (convert (plus:c integer_minus_onep (convert2?@1 (cmp@2 @3 @4)))) + (if (tree_zero_one_valued_p (@2) + && INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (type) + && !TYPE_UNSIGNED (TREE_TYPE (@1)) + && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@1)), type)) + (cond (icmp @3 @4) { build_all_ones_cst (type); } + { build_zero_cst (type); }))) + /* ((-(unsigned T)(a `cmp` b)) & c) -> a `cmp` b ? c : 0. */ + (simplify + (bit_and:c + (convert2?@5 (negate@4 (convert (cmp@0 @1 @2)))) @3) + (if (tree_zero_one_valued_p (@0) + && INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@4)) + && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@5)), + TREE_TYPE (@4))) + (cond (cmp @1 @2) @3 { build_zero_cst (type); }))) + /* (a `cmp` b) ? d : (a `icmp` b) ? c : e -> a `cmp` b ? d : c. */ + (simplify + (cond (cmp@0 @1 @2) @4 + (cond (icmp @1 @2) @3 @5)) + (cond @0 @4 @3)) + + /* (a `cmp` b) ? ((a `icmp` b) ? <dead> : c) : d -> a `cmp` b ? c : d. */ + (simplify + (cond (cmp@0 @1 @2) + (cond (icmp @1 @2) @3 @4) @5) + (cond @0 @4 @5))) + +(for op (bit_ior bit_and bit_xor) + /* (a `op` (b ? 0 : c) -> b ? a `op` 0 : a `op` c + and (a `op` (b ? c : 0) -> b ? a `op` c : a `op` 0 + we only fold 0 because one operand is guaranteed to simplify. */ + (simplify + (op:c (cond @0 zerop@3 @1) @2) + (cond @0 (op @3 @2) (op @1 @2))) + (simplify + (op:c (cond @0 @1 zerop@3) @2) + (cond @0 (op @1 @2) (op @3 @2)))) + /* Simplifications of shift and rotates. */ (for rotate (lrotate rrotate) diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c new file mode 100644 index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_1.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include <stdint.h> + +__attribute__((noipa, noinline)) +uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a <= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a > b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a < b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a == b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a != b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max1_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min1_32u (5, 3, 7 , 8) != 8) + abort (); + + if (max1_32u (5, 3, 7 , 8) != 7) + abort (); + + if (min2_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max2_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min2_32u (5, 3, 7 , 8) != 8) + abort (); + + if (max2_32u (5, 3, 7 , 8) != 7) + abort (); + + if (min3_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max3_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min3_32u (5, 3, 7 , 8) != 7) + abort (); + + if (max3_32u (5, 3, 7 , 8) != 8) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c new file mode 100644 index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_2.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include <stdint.h> + +__attribute__((noipa, noinline)) +uint32_t min1_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max1_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a <= b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min2_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a > b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max2_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a < b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min3_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a == b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max3_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a != b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */ +/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u (7 , 8) != 7) + abort (); + + if (max1_32u (7 , 8) != 8) + abort (); + + if (min1_32u (7 , 8) != 7) + abort (); + + if (max1_32u (7, 8) != 8) + abort (); + + if (min2_32u (7 , 8) != 7) + abort (); + + if (max2_32u (7 , 8) != 8) + abort (); + + if (min2_32u (7 , 8) != 7) + abort (); + + if (max2_32u (7 , 8) != 8) + abort (); + + if (min3_32u (7 , 8) != 7) + abort (); + + if (max3_32u (7 , 8) != 8) + abort (); + + if (min3_32u (7 , 8) != 7) + abort (); + + if (max3_32u (7 , 8) != 8) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c new file mode 100644 index 0000000000000000000000000000000000000000..80087d4a0bd37066dbd44335aa3d1bf13fe3fa95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_3.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include <stdint.h> + +__attribute__((noipa, noinline)) +int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? [^ ]+ : [^ ]+;} 8 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u1 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u2 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u3 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u4 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u5 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u6 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u7 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u8 (3, 5, 7 , 8) != 7) + abort (); + + return 0; +}
On Mon, 31 Oct 2022, Tamar Christina wrote: > > > This moves the pattern detection to match.pd instead. > > > > where's the other copy and is it possible to remove it with this patch? > > > > It looks like it's spread over various passes. Starting with forwardprop. Can you be more specific? If forwprop contains special pattern matching for these please extend that instead. > > > > > + (simplify > > > + (bit_ior:c > > > + (mult:c @0 (convert (convert2? (op@4 @2 @3)))) > > > + (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 > > > + @2 @3)))))) > > > > can you add a comment with what you match here as well? You are using > > (op@4 @2 @3) twice, the point of the @4 capture is that the second > > occurance could be just @4. I wonder how we arrived at the multiplication > > here? That seems somewhat premature :/ > > That's being done by forwardprop. That's maybe the pass where the multiplication comes from but it's likely some match.pd pattern that triggers instead? > To remove the various converts I broke down the operations into several > Smaller steps that on their own are sound optimizations. When taken all > together it reduced the versions with casts down to the final optimized version. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * match.pd: New cond select pattern. > (inverted_simple_comparison): New. > > gcc/testsuite/ChangeLog: > > * gcc.dg/select_cond_1.c: New test. > * gcc.dg/select_cond_2.c: New test. > * gcc.dg/select_cond_3.c: New test. > > --- inline copy of patch --- > > diff --git a/gcc/match.pd b/gcc/match.pd > index 45f8941396ffd843f1bb23ab638c3b8c6d0d54b6..84d931c5d559ef1a605ea1121fbe655fcf870195 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -51,8 +51,9 @@ along with GCC; see the file COPYING3. If not see > unge ungt ne eq unlt unle ordered unordered ge gt le lt ltgt uneq) > (define_operator_list swapped_tcc_comparison > gt ge eq ne le lt unordered ordered ungt unge unlt unle uneq ltgt) > -(define_operator_list simple_comparison lt le eq ne ge gt) > -(define_operator_list swapped_simple_comparison gt ge eq ne le lt) > +(define_operator_list simple_comparison lt le eq ne ge gt) > +(define_operator_list inverted_simple_comparison ge gt ne eq lt le) > +(define_operator_list swapped_simple_comparison gt ge eq ne le lt) > > #include "cfn-operators.pd" > > @@ -2081,7 +2082,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (!canonicalize_math_p ()) > (for cmp (gt lt ge le) > (simplify > - (mult (convert (cmp @0 @1)) @2) > + (mult:c (convert (cmp @0 @1)) @2) > (cond (cmp @0 @1) @2 { build_zero_cst (type); })))) That looks OK. > /* For integral types with undefined overflow and C != 0 fold > @@ -3551,6 +3552,63 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1) > (max @2 @1)) > > +/* (a & ((c `cmp` d) - 1)) | (b & ~((c `cmp` d) - 1)) -> c `cmp` d ? a : b. > + by matching the canonicanized xor form. This relies on: 0 ^ a == a and > + (a ^ b) ^ a == b. So (((a ^ b) & -((unsigned T)(c `cmp` d))) ^ b) into > + c `cmp` d ? a : b. */ > +(for cmp (simple_comparison) > + icmp (inverted_simple_comparison) > + (simplify > + (bit_xor:c > + @0 > + (bit_and:c > + (bit_xor:c @1 @0) > + (negate (cmp@4 @2 @3)))) > + (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4)) > + (convert:type (cond @4 @1 @0)))) > + /* ((unsigned T)(((signed T) (a `cmp` b)) + -1)) -> a `icmp` b ? -1 : 0. */ > + (simplify > + (convert (plus:c integer_minus_onep (convert2?@1 (cmp@2 @3 @4)))) > + (if (tree_zero_one_valued_p (@2) > + && INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (type) > + && !TYPE_UNSIGNED (TREE_TYPE (@1)) > + && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@1)), type)) > + (cond (icmp @3 @4) { build_all_ones_cst (type); } > + { build_zero_cst (type); }))) > + /* ((-(unsigned T)(a `cmp` b)) & c) -> a `cmp` b ? c : 0. */ > + (simplify > + (bit_and:c > + (convert2?@5 (negate@4 (convert (cmp@0 @1 @2)))) @3) > + (if (tree_zero_one_valued_p (@0) > + && INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@4)) > + && useless_type_conversion_p (unsigned_type_for (TREE_TYPE (@5)), > + TREE_TYPE (@4))) > + (cond (cmp @1 @2) @3 { build_zero_cst (type); }))) Can you split out the above three patterns? > + /* (a `cmp` b) ? d : (a `icmp` b) ? c : e -> a `cmp` b ? d : c. */ > + (simplify > + (cond (cmp@0 @1 @2) @4 > + (cond (icmp @1 @2) @3 @5)) > + (cond @0 @4 @3)) > + > + /* (a `cmp` b) ? ((a `icmp` b) ? <dead> : c) : d -> a `cmp` b ? c : d. */ > + (simplify > + (cond (cmp@0 @1 @2) > + (cond (icmp @1 @2) @3 @4) @5) > + (cond @0 @4 @5))) Can you put the two above next to the set of related (for cnd (cond vec_cond) /* A ? B : (A ? X : C) -> A ? B : C. */ (simplify (cnd @0 (cnd @0 @1 @2) @3) (cnd @0 @1 @3)) (simplify (cnd @0 @1 (cnd @0 @2 @3)) (cnd @0 @1 @3)) ... please? I wonder if these aren't already handled by /* A ? B : (!A ? C : X) -> A ? B : C. */ /* ??? This matches embedded conditions open-coded because genmatch would generate matching code for conditions in separate stmts only. The following is still important to merge then and else arm cases from if-conversion. */ (simplify (cnd @0 @1 (cnd @2 @3 @4)) (if (inverse_conditions_p (@0, @2)) (cnd @0 @1 @3))) (simplify (cnd @0 (cnd @1 @2 @3) @4) (if (inverse_conditions_p (@0, @1)) (cnd @0 @3 @4))) more specifically if not inverse_conditions_p should be enhanced to follow the SSA def chain here? > +(for op (bit_ior bit_and bit_xor) > + /* (a `op` (b ? 0 : c) -> b ? a `op` 0 : a `op` c > + and (a `op` (b ? c : 0) -> b ? a `op` c : a `op` 0 > + we only fold 0 because one operand is guaranteed to simplify. */ > + (simplify > + (op:c (cond @0 zerop@3 @1) @2) > + (cond @0 (op @3 @2) (op @1 @2))) > + (simplify > + (op:c (cond @0 @1 zerop@3) @2) > + (cond @0 (op @1 @2) (op @3 @2)))) > + Not sure if I said it before but we have fold_binary_op_with_conditional_arg doing this on GENERIC so the above should be #if GIMPLE and possibly next to #if GIMPLE /* From fold_binary_op_with_conditional_arg handle the case of rewriting (a ? b : c) > d to a ? (b > d) : (c > d) when the compares simplify. */ (for cmp (simple_comparison) (simplify (cmp:c (cond @0 @1 @2) @3) /* Do not move possibly trapping operations into the conditional as this pessimizes code and causes gimplification issues when applied late. */ (if (!FLOAT_TYPE_P (TREE_TYPE (@3)) || !operation_could_trap_p (cmp, true, false, @3)) (cond @0 (cmp! @1 @3) (cmp! @2 @3))))) #endif covering fold_binary_op_with_conditional_arg in a more general way would be nice but it's hard (or rather expensive) to express in match.pd terms since you have to enumerate each and every supported outer operation. Thanks, Richard. > /* Simplifications of shift and rotates. */ > > (for rotate (lrotate rrotate) > diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c > new file mode 100644 > index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/select_cond_1.c > @@ -0,0 +1,97 @@ > +/* { dg-do run } */ > +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ > + > +#include <stdint.h> > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a <= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a > b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a < b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a == b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a != b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */ > + > +extern void abort (); > + > +int main () { > + > + if (min1_32u (3, 5, 7 , 8) != 7) > + abort (); > + > + if (max1_32u (3, 5, 7 , 8) != 8) > + abort (); > + > + if (min1_32u (5, 3, 7 , 8) != 8) > + abort (); > + > + if (max1_32u (5, 3, 7 , 8) != 7) > + abort (); > + > + if (min2_32u (3, 5, 7 , 8) != 7) > + abort (); > + > + if (max2_32u (3, 5, 7 , 8) != 8) > + abort (); > + > + if (min2_32u (5, 3, 7 , 8) != 8) > + abort (); > + > + if (max2_32u (5, 3, 7 , 8) != 7) > + abort (); > + > + if (min3_32u (3, 5, 7 , 8) != 7) > + abort (); > + > + if (max3_32u (3, 5, 7 , 8) != 8) > + abort (); > + > + if (min3_32u (5, 3, 7 , 8) != 7) > + abort (); > + > + if (max3_32u (5, 3, 7 , 8) != 8) > + abort (); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c > new file mode 100644 > index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/select_cond_2.c > @@ -0,0 +1,99 @@ > +/* { dg-do run } */ > +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ > + > +#include <stdint.h> > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max1_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a <= b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min2_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a > b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max2_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a < b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min3_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a == b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t max3_32u(uint32_t a, uint32_t b) { > + uint32_t result; > + uint32_t m = (a != b) - 1; > + result = (a & m) | (b & ~m); > + return result; > +} > + > +/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */ > + > +extern void abort (); > + > +int main () { > + > + if (min1_32u (7 , 8) != 7) > + abort (); > + > + if (max1_32u (7 , 8) != 8) > + abort (); > + > + if (min1_32u (7 , 8) != 7) > + abort (); > + > + if (max1_32u (7, 8) != 8) > + abort (); > + > + if (min2_32u (7 , 8) != 7) > + abort (); > + > + if (max2_32u (7 , 8) != 8) > + abort (); > + > + if (min2_32u (7 , 8) != 7) > + abort (); > + > + if (max2_32u (7 , 8) != 8) > + abort (); > + > + if (min3_32u (7 , 8) != 7) > + abort (); > + > + if (max3_32u (7 , 8) != 8) > + abort (); > + > + if (min3_32u (7 , 8) != 7) > + abort (); > + > + if (max3_32u (7 , 8) != 8) > + abort (); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c > new file mode 100644 > index 0000000000000000000000000000000000000000..80087d4a0bd37066dbd44335aa3d1bf13fe3fa95 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/select_cond_3.c > @@ -0,0 +1,101 @@ > +/* { dg-do run } */ > +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ > + > +#include <stdint.h> > + > +__attribute__((noipa, noinline)) > +int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + uint32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + int32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + int32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +__attribute__((noipa, noinline)) > +uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) { > + uint32_t result; > + int32_t m = (a >= b) - 1; > + result = (c & m) | (d & ~m); > + return result; > +} > + > +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? [^ ]+ : [^ ]+;} 8 "optimized" } } */ > + > +extern void abort (); > + > +int main () { > + > + if (min1_32u1 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u2 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u3 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u4 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u5 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u6 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u7 (3, 5, 7 , 8) != 7) > + abort (); > + > + if (min1_32u8 (3, 5, 7 , 8) != 7) > + abort (); > + > + return 0; > +} >
--- a/gcc/match.pd +++ b/gcc/match.pd @@ -3577,6 +3577,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond (le @0 integer_zerop@1) (negate@2 @0) integer_zerop@1) (max @2 @1)) +/* (a & ((c `op` d) - 1)) | (b & ~((c `op` d) - 1)) -> c `op` d ? a : b. */ +(for op (simple_comparison) + (simplify + (bit_xor:c + (convert2? @0) + (bit_and:c + (convert2? (bit_xor:c @1 @0)) + (convert3? (negate (convert? (op@4 @2 @3)))))) + /* Alternative form, where some canonicalization were not done due to the + arguments being signed. */ + (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4)) + (convert:type (cond @4 @1 @0)))) + (simplify + (bit_ior:c + (mult:c @0 (convert (convert2? (op@4 @2 @3)))) + (bit_and:c @1 (convert (plus:c integer_minus_onep (convert (op@4 @2 @3)))))) + (if (INTEGRAL_TYPE_P (type) && tree_zero_one_valued_p (@4)) + (cond @4 @0 @1)))) + /* Simplifications of shift and rotates. */ (for rotate (lrotate rrotate) diff --git a/gcc/testsuite/gcc.dg/select_cond_1.c b/gcc/testsuite/gcc.dg/select_cond_1.c new file mode 100644 index 0000000000000000000000000000000000000000..9eb9959baafe5fffeec24e4e3ae656f8fcfe943c --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_1.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include <stdint.h> + +__attribute__((noipa, noinline)) +uint32_t min1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max1_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a <= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a > b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max2_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a < b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a == b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max3_32u(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a != b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} 6 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max1_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min1_32u (5, 3, 7 , 8) != 8) + abort (); + + if (max1_32u (5, 3, 7 , 8) != 7) + abort (); + + if (min2_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max2_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min2_32u (5, 3, 7 , 8) != 8) + abort (); + + if (max2_32u (5, 3, 7 , 8) != 7) + abort (); + + if (min3_32u (3, 5, 7 , 8) != 7) + abort (); + + if (max3_32u (3, 5, 7 , 8) != 8) + abort (); + + if (min3_32u (5, 3, 7 , 8) != 7) + abort (); + + if (max3_32u (5, 3, 7 , 8) != 8) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/select_cond_2.c b/gcc/testsuite/gcc.dg/select_cond_2.c new file mode 100644 index 0000000000000000000000000000000000000000..623b2272ee7b4b00130d5e9fb8c781dbc5d4189e --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_2.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include <stdint.h> + +__attribute__((noipa, noinline)) +uint32_t min1_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max1_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a <= b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min2_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a > b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max2_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a < b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min3_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a == b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t max3_32u(uint32_t a, uint32_t b) { + uint32_t result; + uint32_t m = (a != b) - 1; + result = (a & m) | (b & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-not {_[0-9]+ \? c_[0-9]+\(D\) : d_[0-9]+\(D\)} "optimized" } } */ +/* { dg-final { scan-tree-dump-times {MIN_EXPR} 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times {MAX_EXPR} 2 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u (7 , 8) != 7) + abort (); + + if (max1_32u (7 , 8) != 8) + abort (); + + if (min1_32u (7 , 8) != 7) + abort (); + + if (max1_32u (7, 8) != 8) + abort (); + + if (min2_32u (7 , 8) != 7) + abort (); + + if (max2_32u (7 , 8) != 8) + abort (); + + if (min2_32u (7 , 8) != 7) + abort (); + + if (max2_32u (7 , 8) != 8) + abort (); + + if (min3_32u (7 , 8) != 7) + abort (); + + if (max3_32u (7 , 8) != 8) + abort (); + + if (min3_32u (7 , 8) != 7) + abort (); + + if (max3_32u (7 , 8) != 8) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/select_cond_3.c b/gcc/testsuite/gcc.dg/select_cond_3.c new file mode 100644 index 0000000000000000000000000000000000000000..754e73b121ce360564481b1fa7e1dd4fb27642a3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/select_cond_3.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -std=c99 -fdump-tree-optimized -save-temps" } */ + +#include <stdint.h> + +__attribute__((noipa, noinline)) +int32_t min1_32u1(int32_t a, int32_t b, int32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u2(uint32_t a, uint32_t b, int32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u3(uint32_t a, uint32_t b, uint32_t c, int32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u4(uint32_t a, uint32_t b, int32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u5(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + uint32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u7(uint8_t a, uint16_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +__attribute__((noipa, noinline)) +uint32_t min1_32u8(uint32_t a, uint64_t b, uint32_t c, uint32_t d) { + uint32_t result; + int32_t m = (a >= b) - 1; + result = (c & m) | (d & ~m); + return result; +} + +/* { dg-final { scan-tree-dump-times {_[0-9]+ \? c[^ ]+ : d[^ ]+;} 8 "optimized" } } */ + +extern void abort (); + +int main () { + + if (min1_32u1 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u2 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u3 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u4 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u5 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u6 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u7 (3, 5, 7 , 8) != 7) + abort (); + + if (min1_32u8 (3, 5, 7 , 8) != 7) + abort (); + + return 0; +}