Message ID | alpine.DEB.2.02.1605100810480.7805@laptop-mg.saclay.inria.fr |
---|---|
State | New |
Headers | show |
On Tue, May 10, 2016 at 8:11 AM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Fri, 6 May 2016, Marc Glisse wrote: > >> Here they are. I did (X&Y)&X and (X&Y)&(X&Z). The next one would be >> ((X&Y)&Z)&X, but at some point we have to defer to reassoc. >> >> I didn't add the convert?+tree_nop_conversion_p to the existing transform >> I modified. I guess at some point we should make a pass and add them to all >> the transformations on bit operations... >> >> For (X & Y) & Y, I believe that any conversion is fine. For the others, >> tree_nop_conversion_p is probably too strict (narrowing should be fine for >> all), but I was too lazy to look for tighter conditions. >> >> (X ^ Y) ^ Y -> X should probably have (non_lvalue ...) on its output, but >> in a simple test it didn't seem to matter. Is non_lvalue still needed? >> >> >> Bootstrap+regtest on powerpc64le-unknown-linux-gnu. >> >> 2016-05-06 Marc Glisse <marc.glisse@inria.fr> >> >> gcc/ >> * fold-const.c (fold_binary_loc) [(X ^ Y) & Y]: Remove and merge >> with... >> * match.pd ((X & Y) ^ Y): ... this. >> ((X & Y) & Y, (X | Y) | Y, (X ^ Y) ^ Y, (X & Y) & (X & Z), (X | Y) >> | (X | Z), (X ^ Y) ^ (X ^ Z)): New transformations. >> >> gcc/testsuite/ >> * gcc.dg/tree-ssa/bit-assoc.c: New testcase. >> * gcc.dg/tree-ssa/pr69270.c: Adjust. >> * gcc.dg/tree-ssa/vrp59.c: Disable forwprop. > > > Here it is again, I just replaced convert with convert[12] in the last 2 > transforms. This should matter for (unsigned)(si & 42) & (ui & 42u). I > didn't change it in the other transform, because it would only matter in the > case (T)(X & CST) & CST, which I think would be better served by extending > the transform that currently handles (X & CST1) & CST2 (not done in this > patch). Ok. Thanks! Richard. > -- > Marc Glisse > Index: gcc/fold-const.c > =================================================================== > --- gcc/fold-const.c (revision 236047) > +++ gcc/fold-const.c (working copy) > @@ -10064,59 +10064,20 @@ fold_binary_loc (location_t loc, > } > /* Fold !X & 1 as X == 0. */ > if (TREE_CODE (arg0) == TRUTH_NOT_EXPR > && integer_onep (arg1)) > { > tem = TREE_OPERAND (arg0, 0); > return fold_build2_loc (loc, EQ_EXPR, type, tem, > build_zero_cst (TREE_TYPE (tem))); > } > > - /* Fold (X ^ Y) & Y as ~X & Y. */ > - if (TREE_CODE (arg0) == BIT_XOR_EXPR > - && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0)) > - { > - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); > - return fold_build2_loc (loc, BIT_AND_EXPR, type, > - fold_build1_loc (loc, BIT_NOT_EXPR, type, > tem), > - fold_convert_loc (loc, type, arg1)); > - } > - /* Fold (X ^ Y) & X as ~Y & X. */ > - if (TREE_CODE (arg0) == BIT_XOR_EXPR > - && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0) > - && reorder_operands_p (TREE_OPERAND (arg0, 1), arg1)) > - { > - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); > - return fold_build2_loc (loc, BIT_AND_EXPR, type, > - fold_build1_loc (loc, BIT_NOT_EXPR, type, > tem), > - fold_convert_loc (loc, type, arg1)); > - } > - /* Fold X & (X ^ Y) as X & ~Y. */ > - if (TREE_CODE (arg1) == BIT_XOR_EXPR > - && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0)) > - { > - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); > - return fold_build2_loc (loc, BIT_AND_EXPR, type, > - fold_convert_loc (loc, type, arg0), > - fold_build1_loc (loc, BIT_NOT_EXPR, type, > tem)); > - } > - /* Fold X & (Y ^ X) as ~Y & X. */ > - if (TREE_CODE (arg1) == BIT_XOR_EXPR > - && operand_equal_p (arg0, TREE_OPERAND (arg1, 1), 0) > - && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0))) > - { > - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); > - return fold_build2_loc (loc, BIT_AND_EXPR, type, > - fold_build1_loc (loc, BIT_NOT_EXPR, type, > tem), > - fold_convert_loc (loc, type, arg0)); > - } > - > /* Fold (X * Y) & -(1 << CST) to X * Y if Y is a constant > multiple of 1 << CST. */ > if (TREE_CODE (arg1) == INTEGER_CST) > { > wide_int cst1 = arg1; > wide_int ncst1 = -cst1; > if ((cst1 & ncst1) == ncst1 > && multiple_of_p (type, arg0, > wide_int_to_tree (TREE_TYPE (arg1), ncst1))) > return fold_convert_loc (loc, type, arg0); > Index: gcc/match.pd > =================================================================== > --- gcc/match.pd (revision 236047) > +++ gcc/match.pd (working copy) > @@ -667,39 +667,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (tree_nop_conversion_p (type, TREE_TYPE (@0)) > && tree_nop_conversion_p (type, TREE_TYPE (@1))) > (bit_xor (convert @0) (convert @1)))) > > /* Convert ~X ^ C to X ^ ~C. */ > (simplify > (bit_xor (convert? (bit_not @0)) INTEGER_CST@1) > (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) > (bit_xor (convert @0) (bit_not @1)))) > > -/* Fold (X & Y) ^ Y as ~X & Y. */ > -(simplify > - (bit_xor:c (bit_and:c @0 @1) @1) > - (bit_and (bit_not @0) @1)) > +/* Fold (X & Y) ^ Y and (X ^ Y) & Y as ~X & Y. */ > +(for opo (bit_and bit_xor) > + opi (bit_xor bit_and) > + (simplify > + (opo:c (opi:c @0 @1) @1) > + (bit_and (bit_not @0) @1))) > > /* Given a bit-wise operation CODE applied to ARG0 and ARG1, see if both > operands are another bit-wise operation with a common input. If so, > distribute the bit operations to save an operation and possibly two if > constants are involved. For example, convert > (A | B) & (A | C) into A | (B & C) > Further simplification will occur if B and C are constants. */ > (for op (bit_and bit_ior bit_xor) > rop (bit_ior bit_and bit_and) > (simplify > (op (convert? (rop:c @0 @1)) (convert? (rop:c @0 @2))) > (if (tree_nop_conversion_p (type, TREE_TYPE (@1)) > && tree_nop_conversion_p (type, TREE_TYPE (@2))) > (rop (convert @0) (op (convert @1) (convert @2)))))) > > +/* Some simple reassociation for bit operations, also handled in reassoc. > */ > +/* (X & Y) & Y -> X & Y > + (X | Y) | Y -> X | Y */ > +(for op (bit_and bit_ior) > + (simplify > + (op:c (convert?@2 (op:c @0 @1)) (convert? @1)) > + @2)) > +/* (X ^ Y) ^ Y -> X */ > +(simplify > + (bit_xor:c (convert? (bit_xor:c @0 @1)) (convert? @1)) > + (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) > + (convert @0))) > +/* (X & Y) & (X & Z) -> (X & Y) & Z > + (X | Y) | (X | Z) -> (X | Y) | Z */ > +(for op (bit_and bit_ior) > + (simplify > + (op:c (convert1?@3 (op:c@4 @0 @1)) (convert2?@5 (op:c@6 @0 @2))) > + (if (tree_nop_conversion_p (type, TREE_TYPE (@1)) > + && tree_nop_conversion_p (type, TREE_TYPE (@2))) > + (if (single_use (@5) && single_use (@6)) > + (op @3 (convert @2)) > + (if (single_use (@3) && single_use (@4)) > + (op (convert @1) @5)))))) > +/* (X ^ Y) ^ (X ^ Z) -> Y ^ Z */ > +(simplify > + (bit_xor (convert1? (bit_xor:c @0 @1)) (convert2? (bit_xor:c @0 @2))) > + (if (tree_nop_conversion_p (type, TREE_TYPE (@1)) > + && tree_nop_conversion_p (type, TREE_TYPE (@2))) > + (convert (bit_xor @1 @2)))) > > (simplify > (abs (abs@1 @0)) > @1) > (simplify > (abs (negate @0)) > (abs @0)) > (simplify > (abs tree_expr_nonnegative_p@0) > @0) > Index: gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c (working copy) > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-forwprop1-details -fdump-tree-ccp1-details" > } */ > + > +int f1(int a, int b){ > + int c = a & b; > + return c & b; > +} > +int f2(int a, int b){ > + int c = a | b; > + return b | c; > +} > +int g1(int a, int b, int c){ > + int d = a & b; > + int e = b & c; > + return d & e; > +} > +int g2(int a, int b, int c){ > + int d = a | b; > + int e = c | b; > + return d | e; > +} > +int g3(int a, int b, int c){ > + int d = a ^ b; > + int e = b ^ c; > + return e ^ d; > +} > + > +/* { dg-final { scan-tree-dump-times "Match-and-simplified" 2 "ccp1" } } */ > +/* { dg-final { scan-tree-dump-times "gimple_simplified" 3 "forwprop1" } } > */ > Index: gcc/testsuite/gcc.dg/tree-ssa/pr69270.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/pr69270.c (revision 236047) > +++ gcc/testsuite/gcc.dg/tree-ssa/pr69270.c (working copy) > @@ -1,21 +1,23 @@ > /* { dg-do compile } */ > /* { dg-options "-O2 -fsplit-paths -fdump-tree-dom3-details" } */ > > /* There should be two references to bufferstep that turn into > constants. */ > /* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with > constant .0." 1 "dom3"} } */ > /* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with > constant .1." 1 "dom3"} } */ > > /* And some assignments ought to fold down to constants. */ > -/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 2 "dom3"} > } */ > -/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 2 "dom3"} > } */ > +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = -1;" 1 "dom3"} > } */ > +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = -2;" 1 "dom3"} > } */ > +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 1 "dom3"} > } */ > +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 1 "dom3"} > } */ > > /* The XOR operations should have been optimized to constants. */ > /* { dg-final { scan-tree-dump-not "bit_xor" "dom3"} } */ > > > extern int *stepsizeTable; > > void > adpcm_coder (signed char *outdata, int len) > { > Index: gcc/testsuite/gcc.dg/tree-ssa/vrp59.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/vrp59.c (revision 236047) > +++ gcc/testsuite/gcc.dg/tree-ssa/vrp59.c (working copy) > @@ -1,12 +1,12 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fno-tree-ccp -fdump-tree-vrp1" } */ > +/* { dg-options "-O2 -fno-tree-ccp -fno-tree-forwprop -fdump-tree-vrp1" } > */ > > int f(int x) > { > if (x >= 0 && x <= 3) > { > x = x ^ 3; > x = x & 3; > } > return x; > } >
On Mon, May 9, 2016 at 11:11 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Fri, 6 May 2016, Marc Glisse wrote: > >> Here they are. I did (X&Y)&X and (X&Y)&(X&Z). The next one would be >> ((X&Y)&Z)&X, but at some point we have to defer to reassoc. >> >> I didn't add the convert?+tree_nop_conversion_p to the existing transform >> I modified. I guess at some point we should make a pass and add them to all >> the transformations on bit operations... >> >> For (X & Y) & Y, I believe that any conversion is fine. For the others, >> tree_nop_conversion_p is probably too strict (narrowing should be fine for >> all), but I was too lazy to look for tighter conditions. >> >> (X ^ Y) ^ Y -> X should probably have (non_lvalue ...) on its output, but >> in a simple test it didn't seem to matter. Is non_lvalue still needed? >> >> >> Bootstrap+regtest on powerpc64le-unknown-linux-gnu. >> >> 2016-05-06 Marc Glisse <marc.glisse@inria.fr> >> >> gcc/ >> * fold-const.c (fold_binary_loc) [(X ^ Y) & Y]: Remove and merge >> with... >> * match.pd ((X & Y) ^ Y): ... this. >> ((X & Y) & Y, (X | Y) | Y, (X ^ Y) ^ Y, (X & Y) & (X & Z), (X | Y) >> | (X | Z), (X ^ Y) ^ (X ^ Z)): New transformations. >> >> gcc/testsuite/ >> * gcc.dg/tree-ssa/bit-assoc.c: New testcase. >> * gcc.dg/tree-ssa/pr69270.c: Adjust. >> * gcc.dg/tree-ssa/vrp59.c: Disable forwprop. > > > Here it is again, I just replaced convert with convert[12] in the last 2 > transforms. This should matter for (unsigned)(si & 42) & (ui & 42u). I > didn't change it in the other transform, because it would only matter in the > case (T)(X & CST) & CST, which I think would be better served by extending > the transform that currently handles (X & CST1) & CST2 (not done in this > patch). It caused: FAIL: gcc.dg/tree-ssa/vrp47.c scan-tree-dump-times vrp2 " & 1;" 0 on x86. H.J.
On Wed, 11 May 2016, H.J. Lu wrote: >>> * fold-const.c (fold_binary_loc) [(X ^ Y) & Y]: Remove and merge with... >>> * match.pd ((X & Y) ^ Y): ... this. > It caused: > > FAIL: gcc.dg/tree-ssa/vrp47.c scan-tree-dump-times vrp2 " & 1;" 0 > > on x86. Ah, yes, logical_op_short_circuit so I didn't test it. Hmm, doesn't seem so easy. We want to compute (int)(x==0) in a branch where x is known to be in [0, 1]. VRP1 gives (int)(_Bool)(x^1). forwprop3 handles the double conversion, which gives (x^1)&1. Here the new transform applies and gives (~x)&1. VRP2 comes, and with (x^1)&1 it used to notice that this is just x^1 (remember that x is in [0, 1] in this branch), while it doesn't know what to do with (~x)&1. In the end, we get notl %eax andl $1, %eax instead of xorl $1, %eax (andn doesn't seem to have a version with an immediate) The transformation seems right to me, but we are then missing another transformation like ~X & Y -> X ^ Y when we know that X & ~Y is 0 (the 1 bits of X are included in those of Y). That may not be easy with the limited bit tracking we have. A version limited to constant Y of the form 2^n-1 (i.e. 0...01...1) where X has a range included in [0, Y] may be simpler. We could also simplify (int)(_Bool)x to x using VRP information that x is in [0, 1], but apparently when VRP replaces x==0 with y=x^1,(_Bool)y, it does not compute a range for the new variable y, and by the time the next VRP pass comes, it is too late. In the mean time, I propose xfailing this test...
On 05/11/2016 10:17 AM, Marc Glisse wrote: > The transformation seems right to me, but we are then missing another > transformation like ~X & Y -> X ^ Y when we know that X & ~Y is 0 (the 1 > bits of X are included in those of Y). That may not be easy with the > limited bit tracking we have. A version limited to constant Y of the > form 2^n-1 (i.e. 0...01...1) where X has a range included in [0, Y] may > be simpler. While we don't have strong bit tracking, we do track [0..1] ranges reasonably well, so it may be worth doing. > We could also simplify (int)(_Bool)x to x using VRP information that x > is in [0, 1], but apparently when VRP replaces x==0 with y=x^1,(_Bool)y, > it does not compute a range for the new variable y, and by the time the > next VRP pass comes, it is too late. Seems like a clear oversight. jeff
On Wed, 11 May 2016, Jeff Law wrote: >> We could also simplify (int)(_Bool)x to x using VRP information that x >> is in [0, 1], but apparently when VRP replaces x==0 with y=x^1,(_Bool)y, >> it does not compute a range for the new variable y, and by the time the >> next VRP pass comes, it is too late. > Seems like a clear oversight. In get_value_range, there is: /* If we query the range for a new SSA name return an unmodifiable VARYING. We should get here at most from the substitute-and-fold stage which will never try to change values. */ so this is a known limitation. We could try to change that (XRESIZEVEC, memset(0) on the new elements, update num_vr_values to the new num_ssa_names, at this point vr_value should be replaced with a vector). We could also use set_range_info and make simplify_conversion_using_ranges use get_range_info instead of get_value_range. Might even move the whole function to match.pd then ;-)
On Wed, 11 May 2016, Jeff Law wrote: > On 05/11/2016 10:17 AM, Marc Glisse wrote: >> The transformation seems right to me, but we are then missing another >> transformation like ~X & Y -> X ^ Y when we know that X & ~Y is 0 (the 1 >> bits of X are included in those of Y). That may not be easy with the >> limited bit tracking we have. A version limited to constant Y of the >> form 2^n-1 (i.e. 0...01...1) where X has a range included in [0, Y] may >> be simpler. > While we don't have strong bit tracking, we do track [0..1] ranges reasonably > well, so it may be worth doing. I had started writing +/* Simplify (~X & Y) to X ^ Y if we know that (X & ~Y) is 0. */ +#if GIMPLE +(simplify + (bit_and (bit_not SSA_NAME@0) INTEGER_CST@1) + (if ((get_nonzero_bits (@0) & wi::bit_not (@1)) == 0) + (bit_xor @0 @1))) +#endif but then I realized that VRP does not call the simplify machinery, so this would have to be rewritten in simplify_bit_ops_using_ranges. The good point is that we could then compute: may_be_nonzero_X & ~must_be_nonzero_Y instead of assuming that Y is a constant (not that it would change anything in practice).
On Wed, May 11, 2016 at 7:56 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Wed, 11 May 2016, Jeff Law wrote: > >>> We could also simplify (int)(_Bool)x to x using VRP information that x >>> is in [0, 1], but apparently when VRP replaces x==0 with y=x^1,(_Bool)y, >>> it does not compute a range for the new variable y, and by the time the >>> next VRP pass comes, it is too late. >> >> Seems like a clear oversight. > > > In get_value_range, there is: > /* If we query the range for a new SSA name return an unmodifiable > VARYING. > We should get here at most from the substitute-and-fold stage which > will never try to change values. */ > so this is a known limitation. > > We could try to change that (XRESIZEVEC, memset(0) on the new elements, > update num_vr_values to the new num_ssa_names, at this point vr_value should > be replaced with a vector). > > We could also use set_range_info and make simplify_conversion_using_ranges > use get_range_info instead of get_value_range. Might even move the whole > function to match.pd then ;-) Yeah - note that VRP already calls set_range_info before simplifying stmts. It's just that substitute_and_fold doesn't apply fold_stmt (and thus match.pd) to all stmts but it only applies the pass specific "fold" (vrp_fold_stmt) to all stmts. Richard. > -- > Marc Glisse
On Thu, 12 May 2016, Richard Biener wrote: > Yeah - note that VRP already calls set_range_info before simplifying > stmts. It's just that substitute_and_fold doesn't apply fold_stmt (and > thus match.pd) to all stmts but it only applies the pass specific "fold" > (vrp_fold_stmt) to all stmts. Just to be sure: is the fact that VRP doesn't apply fold_stmt on purpose? The restriction makes sense, it is just that it may yield a bit of duplication. We already indirectly use get_range_info in match.pd and may miss out on opportunities that only occur in branches during the VRP pass.
On May 12, 2016 6:02:47 PM GMT+02:00, Marc Glisse <marc.glisse@inria.fr> wrote: >On Thu, 12 May 2016, Richard Biener wrote: > >> Yeah - note that VRP already calls set_range_info before simplifying >> stmts. It's just that substitute_and_fold doesn't apply fold_stmt >(and >> thus match.pd) to all stmts but it only applies the pass specific >"fold" >> (vrp_fold_stmt) to all stmts. > >Just to be sure: is the fact that VRP doesn't apply fold_stmt on >purpose? The propagator only folds stmts that had operands replaced (that doesn't enable all simplifications as match.PD patterns cover more than one statement). >The restriction makes sense, it is just that it may yield a bit of >duplication. We already indirectly use get_range_info in match.pd and >may >miss out on opportunities that only occur in branches during the VRP >pass. Yes. The pass specific fold is called on each stmt. Maybe we can omit the propagators folding if the pass specific folding applied. Richard.
Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c (revision 236047) +++ gcc/fold-const.c (working copy) @@ -10064,59 +10064,20 @@ fold_binary_loc (location_t loc, } /* Fold !X & 1 as X == 0. */ if (TREE_CODE (arg0) == TRUTH_NOT_EXPR && integer_onep (arg1)) { tem = TREE_OPERAND (arg0, 0); return fold_build2_loc (loc, EQ_EXPR, type, tem, build_zero_cst (TREE_TYPE (tem))); } - /* Fold (X ^ Y) & Y as ~X & Y. */ - if (TREE_CODE (arg0) == BIT_XOR_EXPR - && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0)) - { - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 0)); - return fold_build2_loc (loc, BIT_AND_EXPR, type, - fold_build1_loc (loc, BIT_NOT_EXPR, type, tem), - fold_convert_loc (loc, type, arg1)); - } - /* Fold (X ^ Y) & X as ~Y & X. */ - if (TREE_CODE (arg0) == BIT_XOR_EXPR - && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0) - && reorder_operands_p (TREE_OPERAND (arg0, 1), arg1)) - { - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg0, 1)); - return fold_build2_loc (loc, BIT_AND_EXPR, type, - fold_build1_loc (loc, BIT_NOT_EXPR, type, tem), - fold_convert_loc (loc, type, arg1)); - } - /* Fold X & (X ^ Y) as X & ~Y. */ - if (TREE_CODE (arg1) == BIT_XOR_EXPR - && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0)) - { - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 1)); - return fold_build2_loc (loc, BIT_AND_EXPR, type, - fold_convert_loc (loc, type, arg0), - fold_build1_loc (loc, BIT_NOT_EXPR, type, tem)); - } - /* Fold X & (Y ^ X) as ~Y & X. */ - if (TREE_CODE (arg1) == BIT_XOR_EXPR - && operand_equal_p (arg0, TREE_OPERAND (arg1, 1), 0) - && reorder_operands_p (arg0, TREE_OPERAND (arg1, 0))) - { - tem = fold_convert_loc (loc, type, TREE_OPERAND (arg1, 0)); - return fold_build2_loc (loc, BIT_AND_EXPR, type, - fold_build1_loc (loc, BIT_NOT_EXPR, type, tem), - fold_convert_loc (loc, type, arg0)); - } - /* Fold (X * Y) & -(1 << CST) to X * Y if Y is a constant multiple of 1 << CST. */ if (TREE_CODE (arg1) == INTEGER_CST) { wide_int cst1 = arg1; wide_int ncst1 = -cst1; if ((cst1 & ncst1) == ncst1 && multiple_of_p (type, arg0, wide_int_to_tree (TREE_TYPE (arg1), ncst1))) return fold_convert_loc (loc, type, arg0); Index: gcc/match.pd =================================================================== --- gcc/match.pd (revision 236047) +++ gcc/match.pd (working copy) @@ -667,39 +667,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (tree_nop_conversion_p (type, TREE_TYPE (@0)) && tree_nop_conversion_p (type, TREE_TYPE (@1))) (bit_xor (convert @0) (convert @1)))) /* Convert ~X ^ C to X ^ ~C. */ (simplify (bit_xor (convert? (bit_not @0)) INTEGER_CST@1) (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) (bit_xor (convert @0) (bit_not @1)))) -/* Fold (X & Y) ^ Y as ~X & Y. */ -(simplify - (bit_xor:c (bit_and:c @0 @1) @1) - (bit_and (bit_not @0) @1)) +/* Fold (X & Y) ^ Y and (X ^ Y) & Y as ~X & Y. */ +(for opo (bit_and bit_xor) + opi (bit_xor bit_and) + (simplify + (opo:c (opi:c @0 @1) @1) + (bit_and (bit_not @0) @1))) /* Given a bit-wise operation CODE applied to ARG0 and ARG1, see if both operands are another bit-wise operation with a common input. If so, distribute the bit operations to save an operation and possibly two if constants are involved. For example, convert (A | B) & (A | C) into A | (B & C) Further simplification will occur if B and C are constants. */ (for op (bit_and bit_ior bit_xor) rop (bit_ior bit_and bit_and) (simplify (op (convert? (rop:c @0 @1)) (convert? (rop:c @0 @2))) (if (tree_nop_conversion_p (type, TREE_TYPE (@1)) && tree_nop_conversion_p (type, TREE_TYPE (@2))) (rop (convert @0) (op (convert @1) (convert @2)))))) +/* Some simple reassociation for bit operations, also handled in reassoc. */ +/* (X & Y) & Y -> X & Y + (X | Y) | Y -> X | Y */ +(for op (bit_and bit_ior) + (simplify + (op:c (convert?@2 (op:c @0 @1)) (convert? @1)) + @2)) +/* (X ^ Y) ^ Y -> X */ +(simplify + (bit_xor:c (convert? (bit_xor:c @0 @1)) (convert? @1)) + (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) + (convert @0))) +/* (X & Y) & (X & Z) -> (X & Y) & Z + (X | Y) | (X | Z) -> (X | Y) | Z */ +(for op (bit_and bit_ior) + (simplify + (op:c (convert1?@3 (op:c@4 @0 @1)) (convert2?@5 (op:c@6 @0 @2))) + (if (tree_nop_conversion_p (type, TREE_TYPE (@1)) + && tree_nop_conversion_p (type, TREE_TYPE (@2))) + (if (single_use (@5) && single_use (@6)) + (op @3 (convert @2)) + (if (single_use (@3) && single_use (@4)) + (op (convert @1) @5)))))) +/* (X ^ Y) ^ (X ^ Z) -> Y ^ Z */ +(simplify + (bit_xor (convert1? (bit_xor:c @0 @1)) (convert2? (bit_xor:c @0 @2))) + (if (tree_nop_conversion_p (type, TREE_TYPE (@1)) + && tree_nop_conversion_p (type, TREE_TYPE (@2))) + (convert (bit_xor @1 @2)))) (simplify (abs (abs@1 @0)) @1) (simplify (abs (negate @0)) (abs @0)) (simplify (abs tree_expr_nonnegative_p@0) @0) Index: gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/bit-assoc.c (working copy) @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-forwprop1-details -fdump-tree-ccp1-details" } */ + +int f1(int a, int b){ + int c = a & b; + return c & b; +} +int f2(int a, int b){ + int c = a | b; + return b | c; +} +int g1(int a, int b, int c){ + int d = a & b; + int e = b & c; + return d & e; +} +int g2(int a, int b, int c){ + int d = a | b; + int e = c | b; + return d | e; +} +int g3(int a, int b, int c){ + int d = a ^ b; + int e = b ^ c; + return e ^ d; +} + +/* { dg-final { scan-tree-dump-times "Match-and-simplified" 2 "ccp1" } } */ +/* { dg-final { scan-tree-dump-times "gimple_simplified" 3 "forwprop1" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/pr69270.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr69270.c (revision 236047) +++ gcc/testsuite/gcc.dg/tree-ssa/pr69270.c (working copy) @@ -1,21 +1,23 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fsplit-paths -fdump-tree-dom3-details" } */ /* There should be two references to bufferstep that turn into constants. */ /* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .0." 1 "dom3"} } */ /* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .1." 1 "dom3"} } */ /* And some assignments ought to fold down to constants. */ -/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 2 "dom3"} } */ -/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 2 "dom3"} } */ +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = -1;" 1 "dom3"} } */ +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = -2;" 1 "dom3"} } */ +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 1 "dom3"} } */ +/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 1 "dom3"} } */ /* The XOR operations should have been optimized to constants. */ /* { dg-final { scan-tree-dump-not "bit_xor" "dom3"} } */ extern int *stepsizeTable; void adpcm_coder (signed char *outdata, int len) { Index: gcc/testsuite/gcc.dg/tree-ssa/vrp59.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp59.c (revision 236047) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp59.c (working copy) @@ -1,12 +1,12 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-ccp -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fno-tree-ccp -fno-tree-forwprop -fdump-tree-vrp1" } */ int f(int x) { if (x >= 0 && x <= 3) { x = x ^ 3; x = x & 3; } return x; }