Message ID | alpine.DEB.2.02.1508300944340.4656@laptop-mg.saclay.inria.fr |
---|---|
State | New |
Headers | show |
On Sun, Aug 30, 2015 at 9:57 AM, Marc Glisse <marc.glisse@inria.fr> wrote: > Hello, > > just trying to shrink fold-const.c a bit more. > > initializer_zerop is close to what I was looking for with zerop, but I > wasn't sure if it would be safe (it accepts some CONSTRUCTOR and > STRING_CST). At some point I tried using sign_bit_p, but using the return of > that function in the simplification confused the machinery too much. I added > an "overload" of element_precision like the one in element_mode, for > convenience. > > Bootstrap+testsuite on ppc64le-redhat-linux. Ok. Thanks, Richard. > > 2015-08-31 Marc Glisse <marc.glisse@inria.fr> > > gcc/ > * tree.h (zerop): New function. > * tree.c (zerop): Likewise. > (element_precision): Handle expressions. > * match.pd (define_predicates): Add zerop. > (x <= +Inf): Fix comment. > (abs (x) == 0, A & C == C, A & C != 0): Converted from ... > * fold-const.c (fold_binary_loc): ... here. Remove. > > gcc/testsuite/ > * gcc.dg/tree-ssa/cmp-1.c: New file. > > -- > Marc Glisse > Index: gcc/fold-const.c > =================================================================== > --- gcc/fold-const.c (revision 227316) > +++ gcc/fold-const.c (working copy) > @@ -10761,25 +10761,20 @@ fold_binary_loc (location_t loc, > 1)), > arg1, 0) > && wi::extract_uhwi (TREE_OPERAND (arg0, 0), 0, 1) == 1) > { > return omit_two_operands_loc (loc, type, > code == NE_EXPR > ? boolean_true_node : > boolean_false_node, > TREE_OPERAND (arg0, 1), arg1); > } > > - /* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0. > */ > - if (TREE_CODE (arg0) == ABS_EXPR > - && (integer_zerop (arg1) || real_zerop (arg1))) > - return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), > arg1); > - > /* If this is an EQ or NE comparison with zero and ARG0 is > (1 << foo) & bar, convert it to (bar >> foo) & 1. Both require > two operations, but the latter can be done in one less insn > on machines that have only two-operand insns or on which a > constant cannot be the first operand. */ > if (TREE_CODE (arg0) == BIT_AND_EXPR > && integer_zerop (arg1)) > { > tree arg00 = TREE_OPERAND (arg0, 0); > tree arg01 = TREE_OPERAND (arg0, 1); > @@ -10868,35 +10863,20 @@ fold_binary_loc (location_t loc, > ((X >> C1) & C2) != 0 is rewritten as (X,false), and > ((X >> C1) & C2) == 0 is rewritten as (X,true). */ > else > return omit_one_operand_loc (loc, type, > code == EQ_EXPR ? integer_one_node > : > integer_zero_node, > arg000); > } > } > > - /* If we have (A & C) == C where C is a power of 2, convert this into > - (A & C) != 0. Similarly for NE_EXPR. */ > - if (TREE_CODE (arg0) == BIT_AND_EXPR > - && integer_pow2p (TREE_OPERAND (arg0, 1)) > - && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0)) > - return fold_build2_loc (loc, code == EQ_EXPR ? NE_EXPR : EQ_EXPR, > type, > - arg0, fold_convert_loc (loc, TREE_TYPE (arg0), > - integer_zero_node)); > - > - /* If we have (A & C) != 0 or (A & C) == 0 and C is the sign > - bit, then fold the expression into A < 0 or A >= 0. */ > - tem = fold_single_bit_test_into_sign_test (loc, code, arg0, arg1, > type); > - if (tem) > - return tem; > - > /* If we have (A & C) == D where D & ~C != 0, convert this into 0. > Similarly for NE_EXPR. */ > if (TREE_CODE (arg0) == BIT_AND_EXPR > && TREE_CODE (arg1) == INTEGER_CST > && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST) > { > tree notc = fold_build1_loc (loc, BIT_NOT_EXPR, > TREE_TYPE (TREE_OPERAND (arg0, 1)), > TREE_OPERAND (arg0, 1)); > tree dandnotc > Index: gcc/match.pd > =================================================================== > --- gcc/match.pd (revision 227316) > +++ gcc/match.pd (working copy) > @@ -21,20 +21,21 @@ for more details. > You should have received a copy of the GNU General Public License > along with GCC; see the file COPYING3. If not see > <http://www.gnu.org/licenses/>. */ > > > /* Generic tree predicates we inherit. */ > (define_predicates > integer_onep integer_zerop integer_all_onesp integer_minus_onep > integer_each_onep integer_truep integer_nonzerop > real_zerop real_onep real_minus_onep > + zerop > CONSTANT_CLASS_P > tree_expr_nonnegative_p > integer_pow2p > HONOR_NANS) > > /* Operator lists. */ > (define_operator_list tcc_comparison > lt le eq ne ge gt unordered ordered unlt unle ungt unge uneq > ltgt) > (define_operator_list inverted_tcc_comparison > ge gt ne eq lt le ordered unordered ge gt le lt ltgt > uneq) > @@ -1570,21 +1571,21 @@ along with GCC; see the file COPYING3. > } > (switch > /* x > +Inf is always false, if with ignore sNANs. */ > (if (code == GT_EXPR > && ! HONOR_SNANS (@0)) > { constant_boolean_node (false, type); }) > (if (code == LE_EXPR) > /* x <= +Inf is always true, if we don't case about NaNs. */ > (if (! HONOR_NANS (@0)) > { constant_boolean_node (true, type); } > - /* x <= +Inf is the same as x == x, i.e. isfinite(x). */ > + /* x <= +Inf is the same as x == x, i.e. !isnan(x). */ > (eq @0 @0))) > /* x == +Inf and x >= +Inf are always equal to x > DBL_MAX. */ > (if (code == EQ_EXPR || code == GE_EXPR) > (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); } > (if (neg) > (lt @0 { build_real (TREE_TYPE (@0), max); }) > (gt @0 { build_real (TREE_TYPE (@0), max); })))) > /* x < +Inf is always equal to x <= DBL_MAX. */ > (if (code == LT_EXPR) > (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); } > @@ -1727,20 +1728,26 @@ along with GCC; see the file COPYING3. > (scmp @0 @1))) > (simplify > (cmp (negate @0) CONSTANT_CLASS_P@1) > (if (FLOAT_TYPE_P (TREE_TYPE (@0)) > || (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) > && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))) > (with { tree tem = fold_unary (NEGATE_EXPR, TREE_TYPE (@0), @1); } > (if (tem && !TREE_OVERFLOW (tem)) > (scmp @0 { tem; })))))) > > +/* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0. */ > +(for op (eq ne) > + (simplify > + (op (abs @0) zerop@1) > + (op @0 @1))) > + > /* From fold_sign_changed_comparison and fold_widened_comparison. */ > (for cmp (simple_comparison) > (simplify > (cmp (convert@0 @00) (convert?@1 @10)) > (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE > /* Disable this optimization if we're casting a function pointer > type on targets that require function pointer canonicalization. > */ > && !(targetm.have_canonicalize_funcptr_for_compare () > && TREE_CODE (TREE_TYPE (@00)) == POINTER_TYPE > && TREE_CODE (TREE_TYPE (TREE_TYPE (@00))) == FUNCTION_TYPE) > @@ -1833,20 +1840,42 @@ along with GCC; see the file COPYING3. > (simplify > (cmp (convert?@3 (bit_xor @0 INTEGER_CST@1)) INTEGER_CST@2) > (if (tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0))) > (cmp @0 (bit_xor @1 (convert @2))))) > > (simplify > (cmp (convert? addr@0) integer_zerop) > (if (tree_single_nonzero_warnv_p (@0, NULL)) > { constant_boolean_node (cmp == NE_EXPR, type); }))) > > +/* If we have (A & C) == C where C is a power of 2, convert this into > + (A & C) != 0. Similarly for NE_EXPR. */ > +(for cmp (eq ne) > + icmp (ne eq) > + (simplify > + (cmp (bit_and@2 @0 integer_pow2p@1) @1) > + (icmp @2 { build_zero_cst (TREE_TYPE (@0)); }))) > + > +/* If we have (A & C) != 0 where C is the sign bit of A, convert > + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ > +(for cmp (eq ne) > + ncmp (ge lt) > + (simplify > + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) > + && (TYPE_PRECISION (TREE_TYPE (@0)) > + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0)))) > + && element_precision (@2) >= element_precision (@0) > + && wi::only_sign_bit_p (@1, element_precision (@0))) > + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } > + (ncmp (convert:stype @0) { build_zero_cst (stype); }))))) > + > /* When the addresses are not directly of decls compare base and offset. > This implements some remaining parts of fold_comparison address > comparisons but still no complete part of it. Still it is good > enough to make fold_stmt not regress when not dispatching to > fold_binary. */ > (for cmp (simple_comparison) > (simplify > (cmp (convert1?@2 addr@0) (convert2? addr@1)) > (with > { > HOST_WIDE_INT off0, off1; > Index: gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c (working copy) > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-gimple -fdump-tree-optimized" } */ > + > +int f(int a){ > + int b = -__INT_MAX__-1; > + a &= b; > + return a == b; > +} > +int g(int x){ > + x = x < 0 ? -x : x; > + return x == 0; > +} > + > +/* This should work even if int is not 32 bits, it is just not meaningful > in > + that case. */ > +/* { dg-final { scan-tree-dump-not "-2147483648" "optimized"} } */ > +/* { dg-final { scan-tree-dump " < 0" "optimized"} } */ > +/* { dg-final { scan-tree-dump "ABS_EXPR" "gimple"} } */ > +/* { dg-final { scan-tree-dump-not "ABS_EXPR" "optimized"} } */ > Index: gcc/tree.c > =================================================================== > --- gcc/tree.c (revision 227316) > +++ gcc/tree.c (working copy) > @@ -2208,20 +2208,31 @@ grow_tree_vec_stat (tree v, int len MEM_ > > record_node_allocation_statistics (TREE_VEC, length - oldlength); > > v = (tree) ggc_realloc (v, length PASS_MEM_STAT); > > TREE_VEC_LENGTH (v) = len; > > return v; > } > > +/* Return 1 if EXPR is the constant zero, whether it is integral, float or > + fixed, and scalar, complex or vector. */ > + > +int > +zerop (const_tree expr) > +{ > + return (integer_zerop (expr) > + || real_zerop (expr) > + || fixed_zerop (expr)); > +} > + > /* Return 1 if EXPR is the integer constant zero or a complex constant > of zero. */ > > int > integer_zerop (const_tree expr) > { > STRIP_NOPS (expr); > > switch (TREE_CODE (expr)) > { > @@ -7505,20 +7516,22 @@ valid_constant_size_p (const_tree size) > return false; > return true; > } > > /* Return the precision of the type, or for a complex or vector type the > precision of the type of its elements. */ > > unsigned int > element_precision (const_tree type) > { > + if (!TYPE_P (type)) > + type = TREE_TYPE (type); > enum tree_code code = TREE_CODE (type); > if (code == COMPLEX_TYPE || code == VECTOR_TYPE) > type = TREE_TYPE (type); > > return TYPE_PRECISION (type); > } > > /* Return true if CODE represents an associative tree code. Otherwise > return false. */ > bool > Index: gcc/tree.h > =================================================================== > --- gcc/tree.h (revision 227316) > +++ gcc/tree.h (working copy) > @@ -4102,20 +4102,24 @@ extern bool initializer_zerop (const_tre > > /* Given a vector VEC, return its first element if all elements are > the same. Otherwise return NULL_TREE. */ > > extern tree uniform_vector_p (const_tree); > > /* Given a CONSTRUCTOR CTOR, return the element values as a vector. */ > > extern vec<tree, va_gc> *ctor_to_vec (tree); > > +/* zerop (tree x) is nonzero if X is a constant of value 0. */ > + > +extern int zerop (const_tree); > + > /* integer_zerop (tree x) is nonzero if X is an integer constant of value > 0. */ > > extern int integer_zerop (const_tree); > > /* integer_onep (tree x) is nonzero if X is an integer constant of value 1. > */ > > extern int integer_onep (const_tree); > > /* integer_onep (tree x) is nonzero if X is an integer constant of value 1, > or > a vector or complex where each part is 1. */ >
Hi Marc, On 30/08/15 08:57, Marc Glisse wrote: > Hello, > > just trying to shrink fold-const.c a bit more. > > initializer_zerop is close to what I was looking for with zerop, but I > wasn't sure if it would be safe (it accepts some CONSTRUCTOR and > STRING_CST). At some point I tried using sign_bit_p, but using the return > of that function in the simplification confused the machinery too much. I > added an "overload" of element_precision like the one in element_mode, for > convenience. > > Bootstrap+testsuite on ppc64le-redhat-linux. > > > 2015-08-31 Marc Glisse <marc.glisse@inria.fr> > > gcc/ > * tree.h (zerop): New function. > * tree.c (zerop): Likewise. > (element_precision): Handle expressions. > * match.pd (define_predicates): Add zerop. > (x <= +Inf): Fix comment. > (abs (x) == 0, A & C == C, A & C != 0): Converted from ... > * fold-const.c (fold_binary_loc): ... here. Remove. > > gcc/testsuite/ > * gcc.dg/tree-ssa/cmp-1.c: New file. > +/* If we have (A & C) != 0 where C is the sign bit of A, convert + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ +(for cmp (eq ne) + ncmp (ge lt) + (simplify + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (TYPE_PRECISION (TREE_TYPE (@0)) + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0)))) + && element_precision (@2) >= element_precision (@0) + && wi::only_sign_bit_p (@1, element_precision (@0))) + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } + (ncmp (convert:stype @0) { build_zero_cst (stype); }))))) + With this patch and this pattern pattern in particular I've seen some code quality regressions on aarch64. I'm still trying to reduce a testcase to demonstrate the issue but it seems to involve intorucing extra conversions from unsigned to signed values. If I gate this pattern on !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve. Any thoughts? Thanks, Kyrill
On Tue, 27 Oct 2015, Kyrill Tkachov wrote: > Hi Marc, > > On 30/08/15 08:57, Marc Glisse wrote: >> Hello, >> >> just trying to shrink fold-const.c a bit more. >> >> initializer_zerop is close to what I was looking for with zerop, but I >> wasn't sure if it would be safe (it accepts some CONSTRUCTOR and >> STRING_CST). At some point I tried using sign_bit_p, but using the return >> of that function in the simplification confused the machinery too much. I >> added an "overload" of element_precision like the one in element_mode, for >> convenience. >> >> Bootstrap+testsuite on ppc64le-redhat-linux. >> >> >> 2015-08-31 Marc Glisse <marc.glisse@inria.fr> >> >> gcc/ >> * tree.h (zerop): New function. >> * tree.c (zerop): Likewise. >> (element_precision): Handle expressions. >> * match.pd (define_predicates): Add zerop. >> (x <= +Inf): Fix comment. >> (abs (x) == 0, A & C == C, A & C != 0): Converted from ... >> * fold-const.c (fold_binary_loc): ... here. Remove. >> >> gcc/testsuite/ >> * gcc.dg/tree-ssa/cmp-1.c: New file. >> > > +/* If we have (A & C) != 0 where C is the sign bit of A, convert > + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ > +(for cmp (eq ne) > + ncmp (ge lt) > + (simplify > + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) > + && (TYPE_PRECISION (TREE_TYPE (@0)) > + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0)))) > + && element_precision (@2) >= element_precision (@0) > + && wi::only_sign_bit_p (@1, element_precision (@0))) This condition is a bit strict when @0 is signed, for an int32_t i, (i & (5LL << 42)) != 0 would work as well thanks to sign extension. > + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } > + (ncmp (convert:stype @0) { build_zero_cst (stype); }))))) > + > > With this patch and this pattern pattern in particular I've seen some code > quality regressions on aarch64. > I'm still trying to reduce a testcase to demonstrate the issue but it seems > to involve > intorucing extra conversions from unsigned to signed values. If I gate this > pattern on > !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve. > > Any thoughts? Hmm, my first thoughts would be: * conversion from unsigned to signed is a NOP, * if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 should be expanded by the target, * so the problem is probably something else, maybe the bit_and combined better with another operation, or the cast obfuscates things enough to confuse a later pass. Or maybe something related to @2. An example would help understand what we are talking about...
On 27/10/15 15:57, Marc Glisse wrote: > On Tue, 27 Oct 2015, Kyrill Tkachov wrote: > >> Hi Marc, >> >> On 30/08/15 08:57, Marc Glisse wrote: >>> Hello, >>> >>> just trying to shrink fold-const.c a bit more. >>> >>> initializer_zerop is close to what I was looking for with zerop, but I >>> wasn't sure if it would be safe (it accepts some CONSTRUCTOR and >>> STRING_CST). At some point I tried using sign_bit_p, but using the return >>> of that function in the simplification confused the machinery too much. I >>> added an "overload" of element_precision like the one in element_mode, for >>> convenience. >>> >>> Bootstrap+testsuite on ppc64le-redhat-linux. >>> >>> >>> 2015-08-31 Marc Glisse <marc.glisse@inria.fr> >>> >>> gcc/ >>> * tree.h (zerop): New function. >>> * tree.c (zerop): Likewise. >>> (element_precision): Handle expressions. >>> * match.pd (define_predicates): Add zerop. >>> (x <= +Inf): Fix comment. >>> (abs (x) == 0, A & C == C, A & C != 0): Converted from ... >>> * fold-const.c (fold_binary_loc): ... here. Remove. >>> >>> gcc/testsuite/ >>> * gcc.dg/tree-ssa/cmp-1.c: New file. >>> >> >> +/* If we have (A & C) != 0 where C is the sign bit of A, convert >> + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ >> +(for cmp (eq ne) >> + ncmp (ge lt) >> + (simplify >> + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) >> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) >> + && (TYPE_PRECISION (TREE_TYPE (@0)) >> + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0)))) >> + && element_precision (@2) >= element_precision (@0) >> + && wi::only_sign_bit_p (@1, element_precision (@0))) > > This condition is a bit strict when @0 is signed, for an int32_t i, (i & (5LL << 42)) != 0 would work as well thanks to sign extension. > >> + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } >> + (ncmp (convert:stype @0) { build_zero_cst (stype); }))))) >> + >> >> With this patch and this pattern pattern in particular I've seen some code quality regressions on aarch64. >> I'm still trying to reduce a testcase to demonstrate the issue but it seems to involve >> intorucing extra conversions from unsigned to signed values. If I gate this pattern on >> !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve. >> >> Any thoughts? > > Hmm, my first thoughts would be: > * conversion from unsigned to signed is a NOP, > * if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 should be expanded by the target, > * so the problem is probably something else, maybe the bit_and combined better with another operation, or the cast obfuscates things enough to confuse a later pass. Or maybe something related to @2. > Thanks, So here the types are shorts and unsigned shorts. On aarch64 these are HImode values and there's no direct arithmetic operations on them, so they have to be extended to SImode and truncated back. > An example would help understand what we are talking about... Working on it. I'm trying to introduce gcc_unreachable calls into the compiler when the bad situation happens and reducing the original file, but I think I'm not capturing the conditions that trigger this behaviour exactly right :( Kyrill
On Tue, 27 Oct 2015, Kyrill Tkachov wrote: > So here the types are shorts and unsigned shorts. On aarch64 these are > HImode values and there's no direct arithmetic operations on them, so > they have to be extended to SImode and truncated back. Ah ok, that makes sense. I expect it is in the case where @0 is shorter than a word and @2 is longer than @0. I wouldn't expect the signedness to matter that much, but maybe one of sign/zero-extension simplifies better than the other. This problem would happen because we are still missing a pass that handles type promotion. It means we could consider gating the transformation by the existence of lt in the mode of @0, but I don't see any query of optabs in match.pd yet... Still, a testcase would be necessary to go with whatever patch, and I may be completely on the wrong track.
Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c (revision 227316) +++ gcc/fold-const.c (working copy) @@ -10761,25 +10761,20 @@ fold_binary_loc (location_t loc, 1)), arg1, 0) && wi::extract_uhwi (TREE_OPERAND (arg0, 0), 0, 1) == 1) { return omit_two_operands_loc (loc, type, code == NE_EXPR ? boolean_true_node : boolean_false_node, TREE_OPERAND (arg0, 1), arg1); } - /* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0. */ - if (TREE_CODE (arg0) == ABS_EXPR - && (integer_zerop (arg1) || real_zerop (arg1))) - return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), arg1); - /* If this is an EQ or NE comparison with zero and ARG0 is (1 << foo) & bar, convert it to (bar >> foo) & 1. Both require two operations, but the latter can be done in one less insn on machines that have only two-operand insns or on which a constant cannot be the first operand. */ if (TREE_CODE (arg0) == BIT_AND_EXPR && integer_zerop (arg1)) { tree arg00 = TREE_OPERAND (arg0, 0); tree arg01 = TREE_OPERAND (arg0, 1); @@ -10868,35 +10863,20 @@ fold_binary_loc (location_t loc, ((X >> C1) & C2) != 0 is rewritten as (X,false), and ((X >> C1) & C2) == 0 is rewritten as (X,true). */ else return omit_one_operand_loc (loc, type, code == EQ_EXPR ? integer_one_node : integer_zero_node, arg000); } } - /* If we have (A & C) == C where C is a power of 2, convert this into - (A & C) != 0. Similarly for NE_EXPR. */ - if (TREE_CODE (arg0) == BIT_AND_EXPR - && integer_pow2p (TREE_OPERAND (arg0, 1)) - && operand_equal_p (TREE_OPERAND (arg0, 1), arg1, 0)) - return fold_build2_loc (loc, code == EQ_EXPR ? NE_EXPR : EQ_EXPR, type, - arg0, fold_convert_loc (loc, TREE_TYPE (arg0), - integer_zero_node)); - - /* If we have (A & C) != 0 or (A & C) == 0 and C is the sign - bit, then fold the expression into A < 0 or A >= 0. */ - tem = fold_single_bit_test_into_sign_test (loc, code, arg0, arg1, type); - if (tem) - return tem; - /* If we have (A & C) == D where D & ~C != 0, convert this into 0. Similarly for NE_EXPR. */ if (TREE_CODE (arg0) == BIT_AND_EXPR && TREE_CODE (arg1) == INTEGER_CST && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST) { tree notc = fold_build1_loc (loc, BIT_NOT_EXPR, TREE_TYPE (TREE_OPERAND (arg0, 1)), TREE_OPERAND (arg0, 1)); tree dandnotc Index: gcc/match.pd =================================================================== --- gcc/match.pd (revision 227316) +++ gcc/match.pd (working copy) @@ -21,20 +21,21 @@ for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ /* Generic tree predicates we inherit. */ (define_predicates integer_onep integer_zerop integer_all_onesp integer_minus_onep integer_each_onep integer_truep integer_nonzerop real_zerop real_onep real_minus_onep + zerop CONSTANT_CLASS_P tree_expr_nonnegative_p integer_pow2p HONOR_NANS) /* Operator lists. */ (define_operator_list tcc_comparison lt le eq ne ge gt unordered ordered unlt unle ungt unge uneq ltgt) (define_operator_list inverted_tcc_comparison ge gt ne eq lt le ordered unordered ge gt le lt ltgt uneq) @@ -1570,21 +1571,21 @@ along with GCC; see the file COPYING3. } (switch /* x > +Inf is always false, if with ignore sNANs. */ (if (code == GT_EXPR && ! HONOR_SNANS (@0)) { constant_boolean_node (false, type); }) (if (code == LE_EXPR) /* x <= +Inf is always true, if we don't case about NaNs. */ (if (! HONOR_NANS (@0)) { constant_boolean_node (true, type); } - /* x <= +Inf is the same as x == x, i.e. isfinite(x). */ + /* x <= +Inf is the same as x == x, i.e. !isnan(x). */ (eq @0 @0))) /* x == +Inf and x >= +Inf are always equal to x > DBL_MAX. */ (if (code == EQ_EXPR || code == GE_EXPR) (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); } (if (neg) (lt @0 { build_real (TREE_TYPE (@0), max); }) (gt @0 { build_real (TREE_TYPE (@0), max); })))) /* x < +Inf is always equal to x <= DBL_MAX. */ (if (code == LT_EXPR) (with { real_maxval (&max, neg, TYPE_MODE (TREE_TYPE (@0))); } @@ -1727,20 +1728,26 @@ along with GCC; see the file COPYING3. (scmp @0 @1))) (simplify (cmp (negate @0) CONSTANT_CLASS_P@1) (if (FLOAT_TYPE_P (TREE_TYPE (@0)) || (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)))) (with { tree tem = fold_unary (NEGATE_EXPR, TREE_TYPE (@0), @1); } (if (tem && !TREE_OVERFLOW (tem)) (scmp @0 { tem; })))))) +/* Convert ABS_EXPR<x> == 0 or ABS_EXPR<x> != 0 to x == 0 or x != 0. */ +(for op (eq ne) + (simplify + (op (abs @0) zerop@1) + (op @0 @1))) + /* From fold_sign_changed_comparison and fold_widened_comparison. */ (for cmp (simple_comparison) (simplify (cmp (convert@0 @00) (convert?@1 @10)) (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE /* Disable this optimization if we're casting a function pointer type on targets that require function pointer canonicalization. */ && !(targetm.have_canonicalize_funcptr_for_compare () && TREE_CODE (TREE_TYPE (@00)) == POINTER_TYPE && TREE_CODE (TREE_TYPE (TREE_TYPE (@00))) == FUNCTION_TYPE) @@ -1833,20 +1840,42 @@ along with GCC; see the file COPYING3. (simplify (cmp (convert?@3 (bit_xor @0 INTEGER_CST@1)) INTEGER_CST@2) (if (tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0))) (cmp @0 (bit_xor @1 (convert @2))))) (simplify (cmp (convert? addr@0) integer_zerop) (if (tree_single_nonzero_warnv_p (@0, NULL)) { constant_boolean_node (cmp == NE_EXPR, type); }))) +/* If we have (A & C) == C where C is a power of 2, convert this into + (A & C) != 0. Similarly for NE_EXPR. */ +(for cmp (eq ne) + icmp (ne eq) + (simplify + (cmp (bit_and@2 @0 integer_pow2p@1) @1) + (icmp @2 { build_zero_cst (TREE_TYPE (@0)); }))) + +/* If we have (A & C) != 0 where C is the sign bit of A, convert + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ +(for cmp (eq ne) + ncmp (ge lt) + (simplify + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (TYPE_PRECISION (TREE_TYPE (@0)) + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0)))) + && element_precision (@2) >= element_precision (@0) + && wi::only_sign_bit_p (@1, element_precision (@0))) + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } + (ncmp (convert:stype @0) { build_zero_cst (stype); }))))) + /* When the addresses are not directly of decls compare base and offset. This implements some remaining parts of fold_comparison address comparisons but still no complete part of it. Still it is good enough to make fold_stmt not regress when not dispatching to fold_binary. */ (for cmp (simple_comparison) (simplify (cmp (convert1?@2 addr@0) (convert2? addr@1)) (with { HOST_WIDE_INT off0, off1; Index: gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/cmp-1.c (working copy) @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-gimple -fdump-tree-optimized" } */ + +int f(int a){ + int b = -__INT_MAX__-1; + a &= b; + return a == b; +} +int g(int x){ + x = x < 0 ? -x : x; + return x == 0; +} + +/* This should work even if int is not 32 bits, it is just not meaningful in + that case. */ +/* { dg-final { scan-tree-dump-not "-2147483648" "optimized"} } */ +/* { dg-final { scan-tree-dump " < 0" "optimized"} } */ +/* { dg-final { scan-tree-dump "ABS_EXPR" "gimple"} } */ +/* { dg-final { scan-tree-dump-not "ABS_EXPR" "optimized"} } */ Index: gcc/tree.c =================================================================== --- gcc/tree.c (revision 227316) +++ gcc/tree.c (working copy) @@ -2208,20 +2208,31 @@ grow_tree_vec_stat (tree v, int len MEM_ record_node_allocation_statistics (TREE_VEC, length - oldlength); v = (tree) ggc_realloc (v, length PASS_MEM_STAT); TREE_VEC_LENGTH (v) = len; return v; } +/* Return 1 if EXPR is the constant zero, whether it is integral, float or + fixed, and scalar, complex or vector. */ + +int +zerop (const_tree expr) +{ + return (integer_zerop (expr) + || real_zerop (expr) + || fixed_zerop (expr)); +} + /* Return 1 if EXPR is the integer constant zero or a complex constant of zero. */ int integer_zerop (const_tree expr) { STRIP_NOPS (expr); switch (TREE_CODE (expr)) { @@ -7505,20 +7516,22 @@ valid_constant_size_p (const_tree size) return false; return true; } /* Return the precision of the type, or for a complex or vector type the precision of the type of its elements. */ unsigned int element_precision (const_tree type) { + if (!TYPE_P (type)) + type = TREE_TYPE (type); enum tree_code code = TREE_CODE (type); if (code == COMPLEX_TYPE || code == VECTOR_TYPE) type = TREE_TYPE (type); return TYPE_PRECISION (type); } /* Return true if CODE represents an associative tree code. Otherwise return false. */ bool Index: gcc/tree.h =================================================================== --- gcc/tree.h (revision 227316) +++ gcc/tree.h (working copy) @@ -4102,20 +4102,24 @@ extern bool initializer_zerop (const_tre /* Given a vector VEC, return its first element if all elements are the same. Otherwise return NULL_TREE. */ extern tree uniform_vector_p (const_tree); /* Given a CONSTRUCTOR CTOR, return the element values as a vector. */ extern vec<tree, va_gc> *ctor_to_vec (tree); +/* zerop (tree x) is nonzero if X is a constant of value 0. */ + +extern int zerop (const_tree); + /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0. */ extern int integer_zerop (const_tree); /* integer_onep (tree x) is nonzero if X is an integer constant of value 1. */ extern int integer_onep (const_tree); /* integer_onep (tree x) is nonzero if X is an integer constant of value 1, or a vector or complex where each part is 1. */