Message ID | alpine.DEB.2.02.1505020023190.30644@stedding.saclay.inria.fr |
---|---|
State | New |
Headers | show |
On Sat, May 2, 2015 at 12:46 AM, Marc Glisse <marc.glisse@inria.fr> wrote: > Hello, > > this patch tries to tighten a bit the range estimate for x%y. slp-perm-7.c > started failing by vectorizing more than expected, I assumed it was a good > thing and updated the test. I am less conservative than Jakub with division > by 0, but I still don't really understand how empty ranges are supposed to > be represented in VRP. > > Bootstrap+testsuite on x86_64-linux-gnu. Hmm, so I don't like how you (continute to) use trees for the constant computations. wide-ints would be a better fit today. I also notice that fold_unary_to_constant can return NULL_TREE and neither the old nor your code handles that. "empty" ranges are basically UNDEFINED. Aren't you pessimizing the case where the old code used value_range_nonnegative_p() by just using TYPE_UNSIGNED? Thanks, Richard. > 2015-05-02 Marc Glisse <marc.glisse@inria.fr> > > PR tree-optimization/64454 > gcc/ > * tree-vrp.c (extract_range_from_binary_expr_1) <TRUNC_MOD_EXPR>: > Rewrite. > gcc/testsuite/ > * gcc.dg/tree-ssa/vrp97.c: New file. > * gcc.dg/vect/slp-perm-7.c: Update. > > -- > Marc Glisse > Index: gcc/testsuite/gcc.dg/tree-ssa/vrp97.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/vrp97.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/vrp97.c (working copy) > @@ -0,0 +1,13 @@ > +/* PR tree-optimization/64454 */ > +/* { dg-options "-O2 -fdump-tree-vrp1" } */ > + > +int f(int a, int b) > +{ > + if (a < -3 || a > 13) __builtin_unreachable(); > + if (b < -6 || b > 9) __builtin_unreachable(); > + int c = a % b; > + return c >= -3 && c <= 8; > +} > + > +/* { dg-final { scan-tree-dump "return 1;" "vrp1" } } */ > +/* { dg-final { cleanup-tree-dump "vrp1" } } */ > Index: gcc/testsuite/gcc.dg/vect/slp-perm-7.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/slp-perm-7.c (revision 222708) > +++ gcc/testsuite/gcc.dg/vect/slp-perm-7.c (working copy) > @@ -63,15 +63,15 @@ int main (int argc, const char* argv[]) > > foo (input, output, input2, output2); > > for (i = 0; i < N; i++) > if (output[i] != check_results[i] || output2[i] != check_results2[i]) > abort (); > > return 0; > } > > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { > target vect_perm } } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { > target vect_perm } } } */ > /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" > { target vect_perm } } } */ > /* { dg-final { cleanup-tree-dump "vect" } } */ > > > Index: gcc/tree-vrp.c > =================================================================== > --- gcc/tree-vrp.c (revision 222708) > +++ gcc/tree-vrp.c (working copy) > @@ -3189,40 +3189,83 @@ extract_range_from_binary_expr_1 (value_ > } > } > else > { > extract_range_from_multiplicative_op_1 (vr, code, &vr0, &vr1); > return; > } > } > else if (code == TRUNC_MOD_EXPR) > { > - if (vr1.type != VR_RANGE > - || range_includes_zero_p (vr1.min, vr1.max) != 0 > - || vrp_val_is_min (vr1.min)) > + if (range_is_null (&vr1)) > + { > + set_value_range_to_undefined (vr); > + return; > + } > + // Some propagation of symbolic ranges should be possible > + // at least in the unsigned case. > + bool has_vr0 = vr0.type == VR_RANGE && !symbolic_range_p (&vr0); > + bool has_vr1 = vr1.type == VR_RANGE && !symbolic_range_p (&vr1); > + if (!has_vr0 && !has_vr1) > { > set_value_range_to_varying (vr); > return; > } > type = VR_RANGE; > - /* Compute MAX <|vr1.min|, |vr1.max|> - 1. */ > - max = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min); > - if (tree_int_cst_lt (max, vr1.max)) > - max = vr1.max; > - max = int_const_binop (MINUS_EXPR, max, build_int_cst (TREE_TYPE > (max), 1)); > - /* If the dividend is non-negative the modulus will be > - non-negative as well. */ > - if (TYPE_UNSIGNED (expr_type) > - || value_range_nonnegative_p (&vr0)) > - min = build_int_cst (TREE_TYPE (max), 0); > + if (TYPE_UNSIGNED (expr_type)) > + { > + // A % B is at most A and smaller than B. > + min = build_int_cst (expr_type, 0); > + if (has_vr0 && (!has_vr1 || tree_int_cst_lt (vr0.max, vr1.max))) > + max = vr0.max; > + else > + max = int_const_binop (MINUS_EXPR, vr1.max, > + build_int_cst (expr_type, 1)); > + } > else > - min = fold_unary_to_constant (NEGATE_EXPR, expr_type, max); > + { > + tree min1 = NULL_TREE; > + tree max1 = NULL_TREE; > + if (has_vr1) > + { > + // ABS (A % B) < ABS (B) > + max1 = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min); > + if (tree_int_cst_lt (max1, vr1.max)) > + max1 = vr1.max; > + max1 = int_const_binop (MINUS_EXPR, max1, > + build_int_cst (expr_type, 1)); > + min1 = fold_unary_to_constant (NEGATE_EXPR, expr_type, max1); > + } > + if (has_vr0) > + { > + // Either 0 <= A % B <= A or A <= A % B <= 0. > + max = vr0.max; > + min = vr0.min; > + tree zero = build_int_cst (expr_type, 0); > + if (tree_int_cst_lt (zero, min)) > + min = zero; > + if (tree_int_cst_lt (max, zero)) > + max = zero; > + if (has_vr1) > + { > + if (tree_int_cst_lt (min, min1)) > + min = min1; > + if (tree_int_cst_lt (max1, max)) > + max = max1; > + } > + } > + else > + { > + min = min1; > + max = max1; > + } > + } > } > else if (code == BIT_AND_EXPR || code == BIT_IOR_EXPR || code == > BIT_XOR_EXPR) > { > bool int_cst_range0, int_cst_range1; > wide_int may_be_nonzero0, may_be_nonzero1; > wide_int must_be_nonzero0, must_be_nonzero1; > > int_cst_range0 = zero_nonzero_bits_from_vr (expr_type, &vr0, > &may_be_nonzero0, > &must_be_nonzero0); >
On Mon, 4 May 2015, Richard Biener wrote: > On Sat, May 2, 2015 at 12:46 AM, Marc Glisse <marc.glisse@inria.fr> wrote: >> Hello, >> >> this patch tries to tighten a bit the range estimate for x%y. slp-perm-7.c >> started failing by vectorizing more than expected, I assumed it was a good >> thing and updated the test. I am less conservative than Jakub with division >> by 0, but I still don't really understand how empty ranges are supposed to >> be represented in VRP. >> >> Bootstrap+testsuite on x86_64-linux-gnu. > > Hmm, so I don't like how you (continute to) use trees for the constant > computations. wide-ints would be a better fit today. I also notice that > fold_unary_to_constant can return NULL_TREE and neither the old nor your > code handles that. You are right. I was lazy and tried to keep this part of the old code, I shouldn't have... > "empty" ranges are basically UNDEFINED. Cool, that's what I did. But I don't see code adding calls to __builtin_unreachable() when an empty range is detected. Maybe that almost never happens? > Aren't you pessimizing the case where the old code used > value_range_nonnegative_p() by just using TYPE_UNSIGNED? I don't think so. The old code only handled signed types in the positive case, while I have a more complete handling of signed types, which should do at least as good as the old one even in the positive case.
On Mon, May 4, 2015 at 3:57 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Mon, 4 May 2015, Richard Biener wrote: > >> On Sat, May 2, 2015 at 12:46 AM, Marc Glisse <marc.glisse@inria.fr> wrote: >>> >>> Hello, >>> >>> this patch tries to tighten a bit the range estimate for x%y. >>> slp-perm-7.c >>> started failing by vectorizing more than expected, I assumed it was a >>> good >>> thing and updated the test. I am less conservative than Jakub with >>> division >>> by 0, but I still don't really understand how empty ranges are supposed >>> to >>> be represented in VRP. >>> >>> Bootstrap+testsuite on x86_64-linux-gnu. >> >> >> Hmm, so I don't like how you (continute to) use trees for the constant >> computations. wide-ints would be a better fit today. I also notice that >> fold_unary_to_constant can return NULL_TREE and neither the old nor your >> code handles that. > > > You are right. I was lazy and tried to keep this part of the old code, I > shouldn't have... > >> "empty" ranges are basically UNDEFINED. > > > Cool, that's what I did. But I don't see code adding calls to > __builtin_unreachable() when an empty range is detected. Maybe that almost > never happens? No, it's just nobody bothered to implement it. You also have to be careful as you can't replace reproducers of UNDEFINED but only uses that still result in UNDEFINED result (for example 0 * UNDEFINED is defined again). What I'd like to do at some point is have some common code that you can query what OP1 tree_code OP2 evaluates to if either op is UNDEFINED. For example UNDEF + X == UNDEF but UNDEF * X == 0 (as optimistical result, of course - the only not undefined case is for X == 0). UNDEF << X == 0 but UNDEF >> X == signed(x) ? UNDEF : 0. We have multiple passes that duplicate only parts of those "optimizations". Having a central place implementing this correctly would be nice. >> Aren't you pessimizing the case where the old code used >> value_range_nonnegative_p() by just using TYPE_UNSIGNED? > > > I don't think so. The old code only handled signed types in the positive > case, while I have a more complete handling of signed types, which should do > at least as good as the old one even in the positive case. Ok, I see. Richard. > -- > Marc Glisse
Index: gcc/testsuite/gcc.dg/tree-ssa/vrp97.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/vrp97.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp97.c (working copy) @@ -0,0 +1,13 @@ +/* PR tree-optimization/64454 */ +/* { dg-options "-O2 -fdump-tree-vrp1" } */ + +int f(int a, int b) +{ + if (a < -3 || a > 13) __builtin_unreachable(); + if (b < -6 || b > 9) __builtin_unreachable(); + int c = a % b; + return c >= -3 && c <= 8; +} + +/* { dg-final { scan-tree-dump "return 1;" "vrp1" } } */ +/* { dg-final { cleanup-tree-dump "vrp1" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-perm-7.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-perm-7.c (revision 222708) +++ gcc/testsuite/gcc.dg/vect/slp-perm-7.c (working copy) @@ -63,15 +63,15 @@ int main (int argc, const char* argv[]) foo (input, output, input2, output2); for (i = 0; i < N; i++) if (output[i] != check_results[i] || output2[i] != check_results2[i]) abort (); return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/tree-vrp.c =================================================================== --- gcc/tree-vrp.c (revision 222708) +++ gcc/tree-vrp.c (working copy) @@ -3189,40 +3189,83 @@ extract_range_from_binary_expr_1 (value_ } } else { extract_range_from_multiplicative_op_1 (vr, code, &vr0, &vr1); return; } } else if (code == TRUNC_MOD_EXPR) { - if (vr1.type != VR_RANGE - || range_includes_zero_p (vr1.min, vr1.max) != 0 - || vrp_val_is_min (vr1.min)) + if (range_is_null (&vr1)) + { + set_value_range_to_undefined (vr); + return; + } + // Some propagation of symbolic ranges should be possible + // at least in the unsigned case. + bool has_vr0 = vr0.type == VR_RANGE && !symbolic_range_p (&vr0); + bool has_vr1 = vr1.type == VR_RANGE && !symbolic_range_p (&vr1); + if (!has_vr0 && !has_vr1) { set_value_range_to_varying (vr); return; } type = VR_RANGE; - /* Compute MAX <|vr1.min|, |vr1.max|> - 1. */ - max = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min); - if (tree_int_cst_lt (max, vr1.max)) - max = vr1.max; - max = int_const_binop (MINUS_EXPR, max, build_int_cst (TREE_TYPE (max), 1)); - /* If the dividend is non-negative the modulus will be - non-negative as well. */ - if (TYPE_UNSIGNED (expr_type) - || value_range_nonnegative_p (&vr0)) - min = build_int_cst (TREE_TYPE (max), 0); + if (TYPE_UNSIGNED (expr_type)) + { + // A % B is at most A and smaller than B. + min = build_int_cst (expr_type, 0); + if (has_vr0 && (!has_vr1 || tree_int_cst_lt (vr0.max, vr1.max))) + max = vr0.max; + else + max = int_const_binop (MINUS_EXPR, vr1.max, + build_int_cst (expr_type, 1)); + } else - min = fold_unary_to_constant (NEGATE_EXPR, expr_type, max); + { + tree min1 = NULL_TREE; + tree max1 = NULL_TREE; + if (has_vr1) + { + // ABS (A % B) < ABS (B) + max1 = fold_unary_to_constant (ABS_EXPR, expr_type, vr1.min); + if (tree_int_cst_lt (max1, vr1.max)) + max1 = vr1.max; + max1 = int_const_binop (MINUS_EXPR, max1, + build_int_cst (expr_type, 1)); + min1 = fold_unary_to_constant (NEGATE_EXPR, expr_type, max1); + } + if (has_vr0) + { + // Either 0 <= A % B <= A or A <= A % B <= 0. + max = vr0.max; + min = vr0.min; + tree zero = build_int_cst (expr_type, 0); + if (tree_int_cst_lt (zero, min)) + min = zero; + if (tree_int_cst_lt (max, zero)) + max = zero; + if (has_vr1) + { + if (tree_int_cst_lt (min, min1)) + min = min1; + if (tree_int_cst_lt (max1, max)) + max = max1; + } + } + else + { + min = min1; + max = max1; + } + } } else if (code == BIT_AND_EXPR || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR) { bool int_cst_range0, int_cst_range1; wide_int may_be_nonzero0, may_be_nonzero1; wide_int must_be_nonzero0, must_be_nonzero1; int_cst_range0 = zero_nonzero_bits_from_vr (expr_type, &vr0, &may_be_nonzero0, &must_be_nonzero0);