Message ID | 20210707124720.16330-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v2] Analyze niter for until-wrap condition [PR101145] | expand |
Hi, I would like to have an early ping on this with more mail addresses. BR, Jiufu. On 2021-07-07 20:47, Jiufu Guo wrote: > Changes since v1: > * Update assumptions for niter, add more test cases check > * Use widest_int/wide_int instead mpz to do +-/ > * Move some early check for quick return > > For code like: > unsigned foo(unsigned val, unsigned start) > { > unsigned cnt = 0; > for (unsigned i = start; i > val; ++i) > cnt++; > return cnt; > } > > The number of iterations should be about UINT_MAX - start. > > There is function adjust_cond_for_loop_until_wrap which > handles similar work for const bases. > Like adjust_cond_for_loop_until_wrap, this patch enhance > function number_of_iterations_cond/number_of_iterations_lt > to analyze number of iterations for this kind of loop. > > Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. > Is this ok for trunk? > > gcc/ChangeLog: > > 2021-07-07 Jiufu Guo <guojiufu@linux.ibm.com> > > PR tree-optimization/101145 > * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): > New function. > (number_of_iterations_lt): Invoke above function. > (adjust_cond_for_loop_until_wrap): > Merge to number_of_iterations_until_wrap. > (number_of_iterations_cond): Update invokes for > adjust_cond_for_loop_until_wrap and number_of_iterations_lt. > > gcc/testsuite/ChangeLog: > > 2021-07-07 Jiufu Guo <guojiufu@linux.ibm.com> > > PR tree-optimization/101145 > * gcc.dg/vect/pr101145.c: New test. > * gcc.dg/vect/pr101145.inc: New test. > * gcc.dg/vect/pr101145_1.c: New test. > * gcc.dg/vect/pr101145_2.c: New test. > * gcc.dg/vect/pr101145_3.c: New test. > * gcc.dg/vect/pr101145inf.c: New test. > * gcc.dg/vect/pr101145inf.inc: New test. > * gcc.dg/vect/pr101145inf_1.c: New test. > --- > gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++++++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 ++++++++ > gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++ > gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++ > gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++ > gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ > gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 ++++ > gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ > gcc/tree-ssa-loop-niter.c | 157 ++++++++++-------- > 9 files changed, 463 insertions(+), 65 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c > b/gcc/testsuite/gcc.dg/vect/pr101145.c > new file mode 100644 > index 00000000000..74031b031cf > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c > @@ -0,0 +1,187 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#include <limits.h> > + > +unsigned __attribute__ ((noinline)) > +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) > +{ > + while (UINT_MAX - 64 < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > n) > +{ > + l = UINT_MAX - 32; > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > n) > +{ > + while (n <= ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > n) > +{ // infininate > + while (0 <= ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > n) > +{ > + //no loop > + l = UINT_MAX; > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) > +{ > + while (--l < n) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) > +{ > + while (--l < 64) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > n) > +{ > + l = 32; > + while (--l < n) > + *a++ = *b++ + 1; > + return l; > +} > + > + > +int a[3200], b[3200]; > +int fail; > + > +int > +main () > +{ > + unsigned l, n; > + unsigned res; > + /* l > n*/ > + n = UINT_MAX - 64; > + l = n + 32; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 1; > + res = foo (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + l = n - 32; > + res = foo (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + l = UINT_MAX; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n + 32; > + res = foo_1 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n + 32; > + res = foo_2 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n; > + res = foo_3 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 1; > + res = foo_3 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 2; > + res = foo_3 (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + res = foo_5 (a, b, l, n); > + if (res != 0) > + fail++; > + > + n = 64; > + l = n - 32; > + res = bar (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + l = n; > + res = bar (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + l = n + 1; > + res = bar (a, b, l, n); > + res++; > + if (res != l) > + fail++; > + > + l = 0; > + res = bar (a, b, l, n); > + res++; > + if (res != l) > + fail++; > + > + l = 32; > + res = bar_1 (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + res = bar_1 (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + if (fail) > + __builtin_abort (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } > */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc > b/gcc/testsuite/gcc.dg/vect/pr101145.inc > new file mode 100644 > index 00000000000..6eed3fa8aca > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc > @@ -0,0 +1,63 @@ > +TYPE __attribute__ ((noinline)) > +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > +{ > + for (l = L_BASE; n < l; l += C) > + *a++ = *b++ + 1; > + return l; > +} > + > +TYPE __attribute__ ((noinline)) > +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > +{ > + for (l = L_BASE_DOWN; l < n; l -= C) > + *a++ = *b++ + 1; > + return l; > +} > + > +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } > + > +int a[1000], b[1000]; > +int fail; > + > +int > +main () > +{ > + TYPE res; > + TYPE l; > + TYPE n; > + n = N_BASE; > + l = n - C; > + res = foo_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n; > + res = foo_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n + C; > + res = foo_sign (a, b, l, n); > + if (neq ((res - MIN) / C, 0)) > + fail++; > + > + n = N_BASE_DOWN; > + l = n - C; > + res = bar_sign (a, b, l, n); > + if (neq ((MAX - res) / C, 0)) > + fail++; > + > + l = n; > + res = bar_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n + C; > + res = bar_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + if (fail) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c > b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > new file mode 100644 > index 00000000000..94f6b99b893 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > @@ -0,0 +1,15 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE signed char > +#define MIN -128 > +#define MAX 127 > +#define N_BASE (MAX - 32) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 3 > +#define L_BASE l > +#define L_BASE_DOWN l > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } > */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c > b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > new file mode 100644 > index 00000000000..d3cfc9e01e1 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > @@ -0,0 +1,15 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE unsigned char > +#define MIN 0 > +#define MAX 255 > +#define N_BASE (MAX - 32 + 1) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 2 > +#define L_BASE l > +#define L_BASE_DOWN l > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } > */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > new file mode 100644 > index 00000000000..9d5863726a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > @@ -0,0 +1,15 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE int * > +#define MIN ((TYPE)0) > +#define MAX ((TYPE)((long long)-1)) > +#define N_BASE (MIN - 32) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 1 > +#define L_BASE l > +#define L_BASE_DOWN l > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } > */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c > b/gcc/testsuite/gcc.dg/vect/pr101145inf.c > new file mode 100644 > index 00000000000..ed49f5670b5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c > @@ -0,0 +1,25 @@ > +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ > +/* { dg-options "-O3" } */ > +#include <limits.h> > +#include "pr101145inf.inc" > + > +__attribute__ ((noinline)) > +unsigned foo(unsigned val, unsigned start) > +{ > + unsigned cnt = 0; > + for (unsigned i = start; val <= i; i+=16) > + cnt++; > + return cnt; > +} > + > +void test_finite () > +{ > + unsigned n = foo (16, UINT_MAX - 32); > + if (n != 3) > + __builtin_abort (); > +} > + > +void test_infinite () > +{ > + foo (15, UINT_MAX - 32); > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > new file mode 100644 > index 00000000000..4aa3d049187 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > @@ -0,0 +1,28 @@ > +#include <unistd.h> > +#include <signal.h> > +#include <stdlib.h> > + > +void test_finite (); > +void test_infinite (); > + > +void do_exit (int i) > +{ > + exit (0); > +} > + > +int main(void) > +{ > + test_finite (); > + struct sigaction s; > + sigemptyset (&s.sa_mask); > + s.sa_handler = do_exit; > + s.sa_flags = 0; > + sigaction (SIGALRM, &s, NULL); > + alarm (1); > + > + test_infinite (); > + > + __builtin_abort (); > + return 1; > +} > + > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > new file mode 100644 > index 00000000000..4ee3e31c7fe > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > @@ -0,0 +1,23 @@ > +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ > +/* { dg-options "-O3" } */ > +#include <limits.h> > +#include "pr101145inf.inc" > + > +__attribute__ ((noinline)) > +unsigned foo(unsigned val, unsigned start) > +{ > + unsigned cnt = 0; > + for (unsigned i = start; i < val; i-=16) > + cnt++; > + return cnt; > +} > + > +void test_finite () > +{ > + foo (UINT_MAX - 15, 32); > +} > + > +void test_infinite () > +{ > + foo (UINT_MAX - 14, 32); > +} > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c > index b5add827018..85c179be68d 100644 > --- a/gcc/tree-ssa-loop-niter.c > +++ b/gcc/tree-ssa-loop-niter.c > @@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, affine_iv > *iv0, affine_iv *iv1, > } > } > > +/* Determines number of iterations of loop whose ending condition > + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. > + The number of iterations is stored to NITER. */ > + > +static bool > +number_of_iterations_until_wrap (class loop *, tree type, affine_iv > *iv0, > + affine_iv *iv1, class tree_niter_desc *niter) > +{ > + tree niter_type = unsigned_type_for (type); > + tree step, num, assumptions, may_be_zero; > + wide_int high, low, max, min; > + > + may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, > iv0->base); > + if (integer_onep (may_be_zero)) > + return false; > + > + int prec = TYPE_PRECISION (type); > + signop sgn = TYPE_SIGN (type); > + min = wi::min_value (prec, sgn); > + max = wi::max_value (prec, sgn); > + > + /* n < {base, C}. */ > + if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step)) > + { > + step = iv1->step; > + /* MIN + C - 1 <= n. */ > + tree last = wide_int_to_tree (type, min + wi::to_wide (step) - > 1); > + assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, > iv0->base); > + if (integer_zerop (assumptions)) > + return false; > + > + num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree > (type, max), > + iv1->base); > + high = max; > + if (TREE_CODE (iv1->base) == INTEGER_CST) > + low = wi::to_wide (iv1->base) - 1; > + else if (TREE_CODE (iv0->base) == INTEGER_CST) > + low = wi::to_wide (iv0->base); > + else > + low = min; > + } > + /* {base, -C} < n. */ > + else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop > (iv1->step)) > + { > + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), > iv0->step); > + /* MAX - C + 1 >= n. */ > + tree last = wide_int_to_tree (type, max - wi::to_wide (step) + > 1); > + assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, > iv1->base); > + if (integer_zerop (assumptions)) > + return false; > + > + num = fold_build2 (MINUS_EXPR, niter_type, iv0->base, > + wide_int_to_tree (type, min)); > + low = min; > + if (TREE_CODE (iv0->base) == INTEGER_CST) > + high = wi::to_wide (iv0->base) + 1; > + else if (TREE_CODE (iv1->base) == INTEGER_CST) > + high = wi::to_wide (iv1->base); > + else > + high = max; > + } > + else > + return false; > + > + /* (delta + step - 1) / step */ > + step = fold_convert (niter_type, step); > + num = fold_convert (niter_type, num); > + num = fold_build2 (PLUS_EXPR, niter_type, num, step); > + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); > + > + widest_int delta, s; > + delta = widest_int::from (high, sgn) - widest_int::from (low, sgn); > + s = wi::to_widest (step); > + delta = delta + s - 1; > + niter->max = wi::udiv_floor (delta, s); > + > + niter->may_be_zero = may_be_zero; > + > + if (!integer_nonzerop (assumptions)) > + niter->assumptions = fold_build2 (TRUTH_AND_EXPR, > boolean_type_node, > + niter->assumptions, assumptions); > + > + niter->control.no_overflow = false; > + > + return true; > +} > + > /* Determines number of iterations of loop whose ending condition > is IV0 < IV1. TYPE is the type of the iv. The number of > iterations is stored to NITER. BNDS bounds the difference > @@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree > type, affine_iv *iv0, > niter->bound = iv0->base; > } > > + /* {base, -C} < n, or n < {base, C} */ > + if (tree_int_cst_sign_bit (iv0->step) > + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit > (iv1->step))) > + return number_of_iterations_until_wrap (loop, type, iv0, iv1, > niter); > + > delta = fold_build2 (MINUS_EXPR, niter_type, > fold_convert (niter_type, iv1->base), > fold_convert (niter_type, iv0->base)); > @@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv) > } > } > > -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts > - the condition for loop-until-wrap cases. For example: > - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 > - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 > - Return true if condition is successfully adjusted. */ > - > -static bool > -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code > *code, > - affine_iv *iv1) > -{ > - /* Only support simple cases for the moment. */ > - if (TREE_CODE (iv0->base) != INTEGER_CST > - || TREE_CODE (iv1->base) != INTEGER_CST) > - return false; > - > - tree niter_type = unsigned_type_for (type), high, low; > - /* Case: i-- < 10. */ > - if (integer_zerop (iv1->step)) > - { > - /* TODO: Should handle case in which abs(step) != 1. */ > - if (!integer_minus_onep (iv0->step)) > - return false; > - /* Give up on infinite loop. */ > - if (*code == LE_EXPR > - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type))) > - return false; > - high = fold_build2 (PLUS_EXPR, niter_type, > - fold_convert (niter_type, iv0->base), > - build_int_cst (niter_type, 1)); > - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); > - } > - else if (integer_zerop (iv0->step)) > - { > - /* TODO: Should handle case in which abs(step) != 1. */ > - if (!integer_onep (iv1->step)) > - return false; > - /* Give up on infinite loop. */ > - if (*code == LE_EXPR > - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type))) > - return false; > - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); > - low = fold_build2 (MINUS_EXPR, niter_type, > - fold_convert (niter_type, iv1->base), > - build_int_cst (niter_type, 1)); > - } > - else > - gcc_unreachable (); > - > - iv0->base = low; > - iv0->step = fold_convert (niter_type, integer_one_node); > - iv1->base = high; > - iv1->step = build_int_cst (niter_type, 0); > - *code = NE_EXPR; > - return true; > -} > - > /* Determine the number of iterations according to condition (for > staying > inside loop) which compares two induction variables using > comparison > operator CODE. The induction variable on left side of the > comparison > @@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop, > return true; > } > > - /* Handle special case loops: while (i-- < 10) and while (10 < i++) > by > - adjusting iv0, iv1 and code. */ > - if (code != NE_EXPR > - && (tree_int_cst_sign_bit (iv0->step) > - || (!integer_zerop (iv1->step) > - && !tree_int_cst_sign_bit (iv1->step))) > - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1)) > - return false; > - > /* OK, now we know we have a senseful loop. Handle several cases, > depending > on what comparison operator is used. */ > bound_difference (loop, iv1->base, iv0->base, &bnds);
Hi, I would like to have a ping on this. https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574596.html BR, Jiufu On 2021-07-15 08:17, guojiufu via Gcc-patches wrote: > Hi, > > I would like to have an early ping on this with more mail addresses. > > BR, > Jiufu. > > On 2021-07-07 20:47, Jiufu Guo wrote: >> Changes since v1: >> * Update assumptions for niter, add more test cases check >> * Use widest_int/wide_int instead mpz to do +-/ >> * Move some early check for quick return >> >> For code like: >> unsigned foo(unsigned val, unsigned start) >> { >> unsigned cnt = 0; >> for (unsigned i = start; i > val; ++i) >> cnt++; >> return cnt; >> } >> >> The number of iterations should be about UINT_MAX - start. >> >> There is function adjust_cond_for_loop_until_wrap which >> handles similar work for const bases. >> Like adjust_cond_for_loop_until_wrap, this patch enhance >> function number_of_iterations_cond/number_of_iterations_lt >> to analyze number of iterations for this kind of loop. >> >> Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. >> Is this ok for trunk? >> >> gcc/ChangeLog: >> >> 2021-07-07 Jiufu Guo <guojiufu@linux.ibm.com> >> >> PR tree-optimization/101145 >> * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): >> New function. >> (number_of_iterations_lt): Invoke above function. >> (adjust_cond_for_loop_until_wrap): >> Merge to number_of_iterations_until_wrap. >> (number_of_iterations_cond): Update invokes for >> adjust_cond_for_loop_until_wrap and number_of_iterations_lt. >> >> gcc/testsuite/ChangeLog: >> >> 2021-07-07 Jiufu Guo <guojiufu@linux.ibm.com> >> >> PR tree-optimization/101145 >> * gcc.dg/vect/pr101145.c: New test. >> * gcc.dg/vect/pr101145.inc: New test. >> * gcc.dg/vect/pr101145_1.c: New test. >> * gcc.dg/vect/pr101145_2.c: New test. >> * gcc.dg/vect/pr101145_3.c: New test. >> * gcc.dg/vect/pr101145inf.c: New test. >> * gcc.dg/vect/pr101145inf.inc: New test. >> * gcc.dg/vect/pr101145inf_1.c: New test. >> --- >> gcc/testsuite/gcc.dg/vect/pr101145.c | 187 >> ++++++++++++++++++++++ >> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 ++++++++ >> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++ >> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++ >> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++ >> gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ >> gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 ++++ >> gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ >> gcc/tree-ssa-loop-niter.c | 157 ++++++++++-------- >> 9 files changed, 463 insertions(+), 65 deletions(-) >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c >> b/gcc/testsuite/gcc.dg/vect/pr101145.c >> new file mode 100644 >> index 00000000000..74031b031cf >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c >> @@ -0,0 +1,187 @@ >> +/* { dg-require-effective-target vect_int } */ >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ >> +#include <limits.h> >> + >> +unsigned __attribute__ ((noinline)) >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned >> n) >> +{ >> + while (n < ++l) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, >> unsigned) >> +{ >> + while (UINT_MAX - 64 < ++l) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned >> n) >> +{ >> + l = UINT_MAX - 32; >> + while (n < ++l) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned >> n) >> +{ >> + while (n <= ++l) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned >> n) >> +{ // infininate >> + while (0 <= ++l) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned >> n) >> +{ >> + //no loop >> + l = UINT_MAX; >> + while (n < ++l) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned >> n) >> +{ >> + while (--l < n) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, >> unsigned) >> +{ >> + while (--l < 64) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +unsigned __attribute__ ((noinline)) >> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned >> n) >> +{ >> + l = 32; >> + while (--l < n) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> + >> +int a[3200], b[3200]; >> +int fail; >> + >> +int >> +main () >> +{ >> + unsigned l, n; >> + unsigned res; >> + /* l > n*/ >> + n = UINT_MAX - 64; >> + l = n + 32; >> + res = foo (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + l = n; >> + res = foo (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + l = n - 1; >> + res = foo (a, b, l, n); >> + if (res != l + 1) >> + fail++; >> + >> + l = n - 32; >> + res = foo (a, b, l, n); >> + if (res != l + 1) >> + fail++; >> + >> + l = UINT_MAX; >> + res = foo (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + l = n + 32; >> + res = foo_1 (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + l = n + 32; >> + res = foo_2 (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + l = n; >> + res = foo_3 (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + l = n - 1; >> + res = foo_3 (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + l = n - 2; >> + res = foo_3 (a, b, l, n); >> + if (res != l + 1) >> + fail++; >> + >> + res = foo_5 (a, b, l, n); >> + if (res != 0) >> + fail++; >> + >> + n = 64; >> + l = n - 32; >> + res = bar (a, b, l, n); >> + res++; >> + if (res != 0) >> + fail++; >> + >> + l = n; >> + res = bar (a, b, l, n); >> + res++; >> + if (res != 0) >> + fail++; >> + >> + l = n + 1; >> + res = bar (a, b, l, n); >> + res++; >> + if (res != l) >> + fail++; >> + >> + l = 0; >> + res = bar (a, b, l, n); >> + res++; >> + if (res != l) >> + fail++; >> + >> + l = 32; >> + res = bar_1 (a, b, l, n); >> + res++; >> + if (res != 0) >> + fail++; >> + >> + res = bar_1 (a, b, l, n); >> + res++; >> + if (res != 0) >> + fail++; >> + >> + if (fail) >> + __builtin_abort (); >> + return 0; >> +} >> + >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } >> } */ >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc >> new file mode 100644 >> index 00000000000..6eed3fa8aca >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc >> @@ -0,0 +1,63 @@ >> +TYPE __attribute__ ((noinline)) >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) >> +{ >> + for (l = L_BASE; n < l; l += C) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +TYPE __attribute__ ((noinline)) >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) >> +{ >> + for (l = L_BASE_DOWN; l < n; l -= C) >> + *a++ = *b++ + 1; >> + return l; >> +} >> + >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } >> + >> +int a[1000], b[1000]; >> +int fail; >> + >> +int >> +main () >> +{ >> + TYPE res; >> + TYPE l; >> + TYPE n; >> + n = N_BASE; >> + l = n - C; >> + res = foo_sign (a, b, l, n); >> + if (res != l) >> + fail++; >> + >> + l = n; >> + res = foo_sign (a, b, l, n); >> + if (res != l) >> + fail++; >> + >> + l = n + C; >> + res = foo_sign (a, b, l, n); >> + if (neq ((res - MIN) / C, 0)) >> + fail++; >> + >> + n = N_BASE_DOWN; >> + l = n - C; >> + res = bar_sign (a, b, l, n); >> + if (neq ((MAX - res) / C, 0)) >> + fail++; >> + >> + l = n; >> + res = bar_sign (a, b, l, n); >> + if (res != l) >> + fail++; >> + >> + l = n + C; >> + res = bar_sign (a, b, l, n); >> + if (res != l) >> + fail++; >> + >> + if (fail) >> + __builtin_abort (); >> + return 0; >> +} >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> new file mode 100644 >> index 00000000000..94f6b99b893 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> @@ -0,0 +1,15 @@ >> +/* { dg-require-effective-target vect_int } */ >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ >> +#define TYPE signed char >> +#define MIN -128 >> +#define MAX 127 >> +#define N_BASE (MAX - 32) >> +#define N_BASE_DOWN (MIN + 32) >> + >> +#define C 3 >> +#define L_BASE l >> +#define L_BASE_DOWN l >> + >> +#include "pr101145.inc" >> + >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } >> } */ >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c >> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c >> new file mode 100644 >> index 00000000000..d3cfc9e01e1 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c >> @@ -0,0 +1,15 @@ >> +/* { dg-require-effective-target vect_int } */ >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ >> +#define TYPE unsigned char >> +#define MIN 0 >> +#define MAX 255 >> +#define N_BASE (MAX - 32 + 1) >> +#define N_BASE_DOWN (MIN + 32) >> + >> +#define C 2 >> +#define L_BASE l >> +#define L_BASE_DOWN l >> + >> +#include "pr101145.inc" >> + >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } >> } */ >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c >> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c >> new file mode 100644 >> index 00000000000..9d5863726a5 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c >> @@ -0,0 +1,15 @@ >> +/* { dg-require-effective-target vect_int } */ >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ >> +#define TYPE int * >> +#define MIN ((TYPE)0) >> +#define MAX ((TYPE)((long long)-1)) >> +#define N_BASE (MIN - 32) >> +#define N_BASE_DOWN (MIN + 32) >> + >> +#define C 1 >> +#define L_BASE l >> +#define L_BASE_DOWN l >> + >> +#include "pr101145.inc" >> + >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } >> } */ >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c >> b/gcc/testsuite/gcc.dg/vect/pr101145inf.c >> new file mode 100644 >> index 00000000000..ed49f5670b5 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c >> @@ -0,0 +1,25 @@ >> +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ >> +/* { dg-options "-O3" } */ >> +#include <limits.h> >> +#include "pr101145inf.inc" >> + >> +__attribute__ ((noinline)) >> +unsigned foo(unsigned val, unsigned start) >> +{ >> + unsigned cnt = 0; >> + for (unsigned i = start; val <= i; i+=16) >> + cnt++; >> + return cnt; >> +} >> + >> +void test_finite () >> +{ >> + unsigned n = foo (16, UINT_MAX - 32); >> + if (n != 3) >> + __builtin_abort (); >> +} >> + >> +void test_infinite () >> +{ >> + foo (15, UINT_MAX - 32); >> +} >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc >> b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc >> new file mode 100644 >> index 00000000000..4aa3d049187 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc >> @@ -0,0 +1,28 @@ >> +#include <unistd.h> >> +#include <signal.h> >> +#include <stdlib.h> >> + >> +void test_finite (); >> +void test_infinite (); >> + >> +void do_exit (int i) >> +{ >> + exit (0); >> +} >> + >> +int main(void) >> +{ >> + test_finite (); >> + struct sigaction s; >> + sigemptyset (&s.sa_mask); >> + s.sa_handler = do_exit; >> + s.sa_flags = 0; >> + sigaction (SIGALRM, &s, NULL); >> + alarm (1); >> + >> + test_infinite (); >> + >> + __builtin_abort (); >> + return 1; >> +} >> + >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c >> b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c >> new file mode 100644 >> index 00000000000..4ee3e31c7fe >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c >> @@ -0,0 +1,23 @@ >> +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ >> +/* { dg-options "-O3" } */ >> +#include <limits.h> >> +#include "pr101145inf.inc" >> + >> +__attribute__ ((noinline)) >> +unsigned foo(unsigned val, unsigned start) >> +{ >> + unsigned cnt = 0; >> + for (unsigned i = start; i < val; i-=16) >> + cnt++; >> + return cnt; >> +} >> + >> +void test_finite () >> +{ >> + foo (UINT_MAX - 15, 32); >> +} >> + >> +void test_infinite () >> +{ >> + foo (UINT_MAX - 14, 32); >> +} >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c >> index b5add827018..85c179be68d 100644 >> --- a/gcc/tree-ssa-loop-niter.c >> +++ b/gcc/tree-ssa-loop-niter.c >> @@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, affine_iv >> *iv0, affine_iv *iv1, >> } >> } >> >> +/* Determines number of iterations of loop whose ending condition >> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. >> + The number of iterations is stored to NITER. */ >> + >> +static bool >> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv >> *iv0, >> + affine_iv *iv1, class tree_niter_desc *niter) >> +{ >> + tree niter_type = unsigned_type_for (type); >> + tree step, num, assumptions, may_be_zero; >> + wide_int high, low, max, min; >> + >> + may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, >> iv0->base); >> + if (integer_onep (may_be_zero)) >> + return false; >> + >> + int prec = TYPE_PRECISION (type); >> + signop sgn = TYPE_SIGN (type); >> + min = wi::min_value (prec, sgn); >> + max = wi::max_value (prec, sgn); >> + >> + /* n < {base, C}. */ >> + if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit >> (iv1->step)) >> + { >> + step = iv1->step; >> + /* MIN + C - 1 <= n. */ >> + tree last = wide_int_to_tree (type, min + wi::to_wide (step) - >> 1); >> + assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, >> iv0->base); >> + if (integer_zerop (assumptions)) >> + return false; >> + >> + num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree >> (type, max), >> + iv1->base); >> + high = max; >> + if (TREE_CODE (iv1->base) == INTEGER_CST) >> + low = wi::to_wide (iv1->base) - 1; >> + else if (TREE_CODE (iv0->base) == INTEGER_CST) >> + low = wi::to_wide (iv0->base); >> + else >> + low = min; >> + } >> + /* {base, -C} < n. */ >> + else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop >> (iv1->step)) >> + { >> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), >> iv0->step); >> + /* MAX - C + 1 >= n. */ >> + tree last = wide_int_to_tree (type, max - wi::to_wide (step) + >> 1); >> + assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, >> iv1->base); >> + if (integer_zerop (assumptions)) >> + return false; >> + >> + num = fold_build2 (MINUS_EXPR, niter_type, iv0->base, >> + wide_int_to_tree (type, min)); >> + low = min; >> + if (TREE_CODE (iv0->base) == INTEGER_CST) >> + high = wi::to_wide (iv0->base) + 1; >> + else if (TREE_CODE (iv1->base) == INTEGER_CST) >> + high = wi::to_wide (iv1->base); >> + else >> + high = max; >> + } >> + else >> + return false; >> + >> + /* (delta + step - 1) / step */ >> + step = fold_convert (niter_type, step); >> + num = fold_convert (niter_type, num); >> + num = fold_build2 (PLUS_EXPR, niter_type, num, step); >> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); >> + >> + widest_int delta, s; >> + delta = widest_int::from (high, sgn) - widest_int::from (low, sgn); >> + s = wi::to_widest (step); >> + delta = delta + s - 1; >> + niter->max = wi::udiv_floor (delta, s); >> + >> + niter->may_be_zero = may_be_zero; >> + >> + if (!integer_nonzerop (assumptions)) >> + niter->assumptions = fold_build2 (TRUTH_AND_EXPR, >> boolean_type_node, >> + niter->assumptions, assumptions); >> + >> + niter->control.no_overflow = false; >> + >> + return true; >> +} >> + >> /* Determines number of iterations of loop whose ending condition >> is IV0 < IV1. TYPE is the type of the iv. The number of >> iterations is stored to NITER. BNDS bounds the difference >> @@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree >> type, affine_iv *iv0, >> niter->bound = iv0->base; >> } >> >> + /* {base, -C} < n, or n < {base, C} */ >> + if (tree_int_cst_sign_bit (iv0->step) >> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit >> (iv1->step))) >> + return number_of_iterations_until_wrap (loop, type, iv0, iv1, >> niter); >> + >> delta = fold_build2 (MINUS_EXPR, niter_type, >> fold_convert (niter_type, iv1->base), >> fold_convert (niter_type, iv0->base)); >> @@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv) >> } >> } >> >> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts >> - the condition for loop-until-wrap cases. For example: >> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 >> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 >> - Return true if condition is successfully adjusted. */ >> - >> -static bool >> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code >> *code, >> - affine_iv *iv1) >> -{ >> - /* Only support simple cases for the moment. */ >> - if (TREE_CODE (iv0->base) != INTEGER_CST >> - || TREE_CODE (iv1->base) != INTEGER_CST) >> - return false; >> - >> - tree niter_type = unsigned_type_for (type), high, low; >> - /* Case: i-- < 10. */ >> - if (integer_zerop (iv1->step)) >> - { >> - /* TODO: Should handle case in which abs(step) != 1. */ >> - if (!integer_minus_onep (iv0->step)) >> - return false; >> - /* Give up on infinite loop. */ >> - if (*code == LE_EXPR >> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type))) >> - return false; >> - high = fold_build2 (PLUS_EXPR, niter_type, >> - fold_convert (niter_type, iv0->base), >> - build_int_cst (niter_type, 1)); >> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); >> - } >> - else if (integer_zerop (iv0->step)) >> - { >> - /* TODO: Should handle case in which abs(step) != 1. */ >> - if (!integer_onep (iv1->step)) >> - return false; >> - /* Give up on infinite loop. */ >> - if (*code == LE_EXPR >> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type))) >> - return false; >> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); >> - low = fold_build2 (MINUS_EXPR, niter_type, >> - fold_convert (niter_type, iv1->base), >> - build_int_cst (niter_type, 1)); >> - } >> - else >> - gcc_unreachable (); >> - >> - iv0->base = low; >> - iv0->step = fold_convert (niter_type, integer_one_node); >> - iv1->base = high; >> - iv1->step = build_int_cst (niter_type, 0); >> - *code = NE_EXPR; >> - return true; >> -} >> - >> /* Determine the number of iterations according to condition (for >> staying >> inside loop) which compares two induction variables using >> comparison >> operator CODE. The induction variable on left side of the >> comparison >> @@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop, >> return true; >> } >> >> - /* Handle special case loops: while (i-- < 10) and while (10 < i++) >> by >> - adjusting iv0, iv1 and code. */ >> - if (code != NE_EXPR >> - && (tree_int_cst_sign_bit (iv0->step) >> - || (!integer_zerop (iv1->step) >> - && !tree_int_cst_sign_bit (iv1->step))) >> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1)) >> - return false; >> - >> /* OK, now we know we have a senseful loop. Handle several cases, >> depending >> on what comparison operator is used. */ >> bound_difference (loop, iv1->base, iv0->base, &bnds);
On Wed, Aug 4, 2021 at 10:42 AM guojiufu <guojiufu@linux.ibm.com> wrote: > > Hi, > > I would like to have a ping on this. > > https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574596.html Sorry for being late in replying. > > BR, > Jiufu > > On 2021-07-15 08:17, guojiufu via Gcc-patches wrote: > > Hi, > > > > I would like to have an early ping on this with more mail addresses. > > > > BR, > > Jiufu. > > > > On 2021-07-07 20:47, Jiufu Guo wrote: > >> Changes since v1: > >> * Update assumptions for niter, add more test cases check > >> * Use widest_int/wide_int instead mpz to do +-/ > >> * Move some early check for quick return > >> > >> For code like: > >> unsigned foo(unsigned val, unsigned start) > >> { > >> unsigned cnt = 0; > >> for (unsigned i = start; i > val; ++i) > >> cnt++; > >> return cnt; > >> } > >> > >> The number of iterations should be about UINT_MAX - start. > >> > >> There is function adjust_cond_for_loop_until_wrap which > >> handles similar work for const bases. > >> Like adjust_cond_for_loop_until_wrap, this patch enhance > >> function number_of_iterations_cond/number_of_iterations_lt > >> to analyze number of iterations for this kind of loop. > >> > >> Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. > >> Is this ok for trunk? > >> > >> gcc/ChangeLog: > >> > >> 2021-07-07 Jiufu Guo <guojiufu@linux.ibm.com> > >> > >> PR tree-optimization/101145 > >> * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): > >> New function. > >> (number_of_iterations_lt): Invoke above function. > >> (adjust_cond_for_loop_until_wrap): > >> Merge to number_of_iterations_until_wrap. > >> (number_of_iterations_cond): Update invokes for > >> adjust_cond_for_loop_until_wrap and number_of_iterations_lt. > >> > >> gcc/testsuite/ChangeLog: > >> > >> 2021-07-07 Jiufu Guo <guojiufu@linux.ibm.com> > >> > >> PR tree-optimization/101145 > >> * gcc.dg/vect/pr101145.c: New test. > >> * gcc.dg/vect/pr101145.inc: New test. > >> * gcc.dg/vect/pr101145_1.c: New test. > >> * gcc.dg/vect/pr101145_2.c: New test. > >> * gcc.dg/vect/pr101145_3.c: New test. > >> * gcc.dg/vect/pr101145inf.c: New test. > >> * gcc.dg/vect/pr101145inf.inc: New test. > >> * gcc.dg/vect/pr101145inf_1.c: New test. > >> --- > >> gcc/testsuite/gcc.dg/vect/pr101145.c | 187 > >> ++++++++++++++++++++++ > >> gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 ++++++++ > >> gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++ > >> gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++ > >> gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++ > >> gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ > >> gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 ++++ > >> gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ > >> gcc/tree-ssa-loop-niter.c | 157 ++++++++++-------- > >> 9 files changed, 463 insertions(+), 65 deletions(-) > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc > >> create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > >> > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c > >> b/gcc/testsuite/gcc.dg/vect/pr101145.c > >> new file mode 100644 > >> index 00000000000..74031b031cf > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c > >> @@ -0,0 +1,187 @@ > >> +/* { dg-require-effective-target vect_int } */ > >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > >> +#include <limits.h> > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + while (n < ++l) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, > >> unsigned) > >> +{ > >> + while (UINT_MAX - 64 < ++l) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + l = UINT_MAX - 32; > >> + while (n < ++l) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + while (n <= ++l) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ // infininate > >> + while (0 <= ++l) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + //no loop > >> + l = UINT_MAX; > >> + while (n < ++l) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + while (--l < n) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, > >> unsigned) > >> +{ > >> + while (--l < 64) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +unsigned __attribute__ ((noinline)) > >> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned > >> n) > >> +{ > >> + l = 32; > >> + while (--l < n) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> + > >> +int a[3200], b[3200]; > >> +int fail; > >> + > >> +int > >> +main () > >> +{ > >> + unsigned l, n; > >> + unsigned res; > >> + /* l > n*/ > >> + n = UINT_MAX - 64; > >> + l = n + 32; > >> + res = foo (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n; > >> + res = foo (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n - 1; > >> + res = foo (a, b, l, n); > >> + if (res != l + 1) > >> + fail++; > >> + > >> + l = n - 32; > >> + res = foo (a, b, l, n); > >> + if (res != l + 1) > >> + fail++; > >> + > >> + l = UINT_MAX; > >> + res = foo (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n + 32; > >> + res = foo_1 (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n + 32; > >> + res = foo_2 (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n; > >> + res = foo_3 (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n - 1; > >> + res = foo_3 (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n - 2; > >> + res = foo_3 (a, b, l, n); > >> + if (res != l + 1) > >> + fail++; > >> + > >> + res = foo_5 (a, b, l, n); > >> + if (res != 0) > >> + fail++; > >> + > >> + n = 64; > >> + l = n - 32; > >> + res = bar (a, b, l, n); > >> + res++; > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n; > >> + res = bar (a, b, l, n); > >> + res++; > >> + if (res != 0) > >> + fail++; > >> + > >> + l = n + 1; > >> + res = bar (a, b, l, n); > >> + res++; > >> + if (res != l) > >> + fail++; > >> + > >> + l = 0; > >> + res = bar (a, b, l, n); > >> + res++; > >> + if (res != l) > >> + fail++; > >> + > >> + l = 32; > >> + res = bar_1 (a, b, l, n); > >> + res++; > >> + if (res != 0) > >> + fail++; > >> + > >> + res = bar_1 (a, b, l, n); > >> + res++; > >> + if (res != 0) > >> + fail++; > >> + > >> + if (fail) > >> + __builtin_abort (); > >> + return 0; > >> +} > >> + > >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } > >> } */ > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> new file mode 100644 > >> index 00000000000..6eed3fa8aca > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> @@ -0,0 +1,63 @@ > >> +TYPE __attribute__ ((noinline)) > >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > >> +{ > >> + for (l = L_BASE; n < l; l += C) > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +TYPE __attribute__ ((noinline)) > >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > >> +{ > >> + for (l = L_BASE_DOWN; l < n; l -= C) I noticed that both L_BASE and L_BASE_DOWN are defined as l, which makes this test a bit confusing. Could you clean the use of l, for example, by using an auto var for the loop index invariable? Otherwise the patch looks good to me. Thanks very much for the work. Thanks, bin > >> + *a++ = *b++ + 1; > >> + return l; > >> +} > >> + > >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } > >> + > >> +int a[1000], b[1000]; > >> +int fail; > >> + > >> +int > >> +main () > >> +{ > >> + TYPE res; > >> + TYPE l; > >> + TYPE n; > >> + n = N_BASE; > >> + l = n - C; > >> + res = foo_sign (a, b, l, n); > >> + if (res != l) > >> + fail++; > >> + > >> + l = n; > >> + res = foo_sign (a, b, l, n); > >> + if (res != l) > >> + fail++; > >> + > >> + l = n + C; > >> + res = foo_sign (a, b, l, n); > >> + if (neq ((res - MIN) / C, 0)) > >> + fail++; > >> + > >> + n = N_BASE_DOWN; > >> + l = n - C; > >> + res = bar_sign (a, b, l, n); > >> + if (neq ((MAX - res) / C, 0)) > >> + fail++; > >> + > >> + l = n; > >> + res = bar_sign (a, b, l, n); > >> + if (res != l) > >> + fail++; > >> + > >> + l = n + C; > >> + res = bar_sign (a, b, l, n); > >> + if (res != l) > >> + fail++; > >> + > >> + if (fail) > >> + __builtin_abort (); > >> + return 0; > >> +} > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> new file mode 100644 > >> index 00000000000..94f6b99b893 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> @@ -0,0 +1,15 @@ > >> +/* { dg-require-effective-target vect_int } */ > >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > >> +#define TYPE signed char > >> +#define MIN -128 > >> +#define MAX 127 > >> +#define N_BASE (MAX - 32) > >> +#define N_BASE_DOWN (MIN + 32) > >> + > >> +#define C 3 > >> +#define L_BASE l > >> +#define L_BASE_DOWN l > >> + > >> +#include "pr101145.inc" > >> + > >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } > >> } */ > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c > >> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > >> new file mode 100644 > >> index 00000000000..d3cfc9e01e1 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > >> @@ -0,0 +1,15 @@ > >> +/* { dg-require-effective-target vect_int } */ > >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > >> +#define TYPE unsigned char > >> +#define MIN 0 > >> +#define MAX 255 > >> +#define N_BASE (MAX - 32 + 1) > >> +#define N_BASE_DOWN (MIN + 32) > >> + > >> +#define C 2 > >> +#define L_BASE l > >> +#define L_BASE_DOWN l > >> + > >> +#include "pr101145.inc" > >> + > >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } > >> } */ > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c > >> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > >> new file mode 100644 > >> index 00000000000..9d5863726a5 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > >> @@ -0,0 +1,15 @@ > >> +/* { dg-require-effective-target vect_int } */ > >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > >> +#define TYPE int * > >> +#define MIN ((TYPE)0) > >> +#define MAX ((TYPE)((long long)-1)) > >> +#define N_BASE (MIN - 32) > >> +#define N_BASE_DOWN (MIN + 32) > >> + > >> +#define C 1 > >> +#define L_BASE l > >> +#define L_BASE_DOWN l > >> + > >> +#include "pr101145.inc" > >> + > >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } > >> } */ > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c > >> b/gcc/testsuite/gcc.dg/vect/pr101145inf.c > >> new file mode 100644 > >> index 00000000000..ed49f5670b5 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c > >> @@ -0,0 +1,25 @@ > >> +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ > >> +/* { dg-options "-O3" } */ > >> +#include <limits.h> > >> +#include "pr101145inf.inc" > >> + > >> +__attribute__ ((noinline)) > >> +unsigned foo(unsigned val, unsigned start) > >> +{ > >> + unsigned cnt = 0; > >> + for (unsigned i = start; val <= i; i+=16) > >> + cnt++; > >> + return cnt; > >> +} > >> + > >> +void test_finite () > >> +{ > >> + unsigned n = foo (16, UINT_MAX - 32); > >> + if (n != 3) > >> + __builtin_abort (); > >> +} > >> + > >> +void test_infinite () > >> +{ > >> + foo (15, UINT_MAX - 32); > >> +} > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > >> b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > >> new file mode 100644 > >> index 00000000000..4aa3d049187 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > >> @@ -0,0 +1,28 @@ > >> +#include <unistd.h> > >> +#include <signal.h> > >> +#include <stdlib.h> > >> + > >> +void test_finite (); > >> +void test_infinite (); > >> + > >> +void do_exit (int i) > >> +{ > >> + exit (0); > >> +} > >> + > >> +int main(void) > >> +{ > >> + test_finite (); > >> + struct sigaction s; > >> + sigemptyset (&s.sa_mask); > >> + s.sa_handler = do_exit; > >> + s.sa_flags = 0; > >> + sigaction (SIGALRM, &s, NULL); > >> + alarm (1); > >> + > >> + test_infinite (); > >> + > >> + __builtin_abort (); > >> + return 1; > >> +} > >> + > >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > >> b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > >> new file mode 100644 > >> index 00000000000..4ee3e31c7fe > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > >> @@ -0,0 +1,23 @@ > >> +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ > >> +/* { dg-options "-O3" } */ > >> +#include <limits.h> > >> +#include "pr101145inf.inc" > >> + > >> +__attribute__ ((noinline)) > >> +unsigned foo(unsigned val, unsigned start) > >> +{ > >> + unsigned cnt = 0; > >> + for (unsigned i = start; i < val; i-=16) > >> + cnt++; > >> + return cnt; > >> +} > >> + > >> +void test_finite () > >> +{ > >> + foo (UINT_MAX - 15, 32); > >> +} > >> + > >> +void test_infinite () > >> +{ > >> + foo (UINT_MAX - 14, 32); > >> +} > >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c > >> index b5add827018..85c179be68d 100644 > >> --- a/gcc/tree-ssa-loop-niter.c > >> +++ b/gcc/tree-ssa-loop-niter.c > >> @@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, affine_iv > >> *iv0, affine_iv *iv1, > >> } > >> } > >> > >> +/* Determines number of iterations of loop whose ending condition > >> + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. > >> + The number of iterations is stored to NITER. */ > >> + > >> +static bool > >> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv > >> *iv0, > >> + affine_iv *iv1, class tree_niter_desc *niter) > >> +{ > >> + tree niter_type = unsigned_type_for (type); > >> + tree step, num, assumptions, may_be_zero; > >> + wide_int high, low, max, min; > >> + > >> + may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, > >> iv0->base); > >> + if (integer_onep (may_be_zero)) > >> + return false; > >> + > >> + int prec = TYPE_PRECISION (type); > >> + signop sgn = TYPE_SIGN (type); > >> + min = wi::min_value (prec, sgn); > >> + max = wi::max_value (prec, sgn); > >> + > >> + /* n < {base, C}. */ > >> + if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit > >> (iv1->step)) > >> + { > >> + step = iv1->step; > >> + /* MIN + C - 1 <= n. */ > >> + tree last = wide_int_to_tree (type, min + wi::to_wide (step) - > >> 1); > >> + assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, > >> iv0->base); > >> + if (integer_zerop (assumptions)) > >> + return false; > >> + > >> + num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree > >> (type, max), > >> + iv1->base); > >> + high = max; > >> + if (TREE_CODE (iv1->base) == INTEGER_CST) > >> + low = wi::to_wide (iv1->base) - 1; > >> + else if (TREE_CODE (iv0->base) == INTEGER_CST) > >> + low = wi::to_wide (iv0->base); > >> + else > >> + low = min; > >> + } > >> + /* {base, -C} < n. */ > >> + else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop > >> (iv1->step)) > >> + { > >> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), > >> iv0->step); > >> + /* MAX - C + 1 >= n. */ > >> + tree last = wide_int_to_tree (type, max - wi::to_wide (step) + > >> 1); > >> + assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, > >> iv1->base); > >> + if (integer_zerop (assumptions)) > >> + return false; > >> + > >> + num = fold_build2 (MINUS_EXPR, niter_type, iv0->base, > >> + wide_int_to_tree (type, min)); > >> + low = min; > >> + if (TREE_CODE (iv0->base) == INTEGER_CST) > >> + high = wi::to_wide (iv0->base) + 1; > >> + else if (TREE_CODE (iv1->base) == INTEGER_CST) > >> + high = wi::to_wide (iv1->base); > >> + else > >> + high = max; > >> + } > >> + else > >> + return false; > >> + > >> + /* (delta + step - 1) / step */ > >> + step = fold_convert (niter_type, step); > >> + num = fold_convert (niter_type, num); > >> + num = fold_build2 (PLUS_EXPR, niter_type, num, step); > >> + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); > >> + > >> + widest_int delta, s; > >> + delta = widest_int::from (high, sgn) - widest_int::from (low, sgn); > >> + s = wi::to_widest (step); > >> + delta = delta + s - 1; > >> + niter->max = wi::udiv_floor (delta, s); > >> + > >> + niter->may_be_zero = may_be_zero; > >> + > >> + if (!integer_nonzerop (assumptions)) > >> + niter->assumptions = fold_build2 (TRUTH_AND_EXPR, > >> boolean_type_node, > >> + niter->assumptions, assumptions); > >> + > >> + niter->control.no_overflow = false; > >> + > >> + return true; > >> +} > >> + > >> /* Determines number of iterations of loop whose ending condition > >> is IV0 < IV1. TYPE is the type of the iv. The number of > >> iterations is stored to NITER. BNDS bounds the difference > >> @@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree > >> type, affine_iv *iv0, > >> niter->bound = iv0->base; > >> } > >> > >> + /* {base, -C} < n, or n < {base, C} */ > >> + if (tree_int_cst_sign_bit (iv0->step) > >> + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit > >> (iv1->step))) > >> + return number_of_iterations_until_wrap (loop, type, iv0, iv1, > >> niter); > >> + > >> delta = fold_build2 (MINUS_EXPR, niter_type, > >> fold_convert (niter_type, iv1->base), > >> fold_convert (niter_type, iv0->base)); > >> @@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv) > >> } > >> } > >> > >> -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts > >> - the condition for loop-until-wrap cases. For example: > >> - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 > >> - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 > >> - Return true if condition is successfully adjusted. */ > >> - > >> -static bool > >> -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code > >> *code, > >> - affine_iv *iv1) > >> -{ > >> - /* Only support simple cases for the moment. */ > >> - if (TREE_CODE (iv0->base) != INTEGER_CST > >> - || TREE_CODE (iv1->base) != INTEGER_CST) > >> - return false; > >> - > >> - tree niter_type = unsigned_type_for (type), high, low; > >> - /* Case: i-- < 10. */ > >> - if (integer_zerop (iv1->step)) > >> - { > >> - /* TODO: Should handle case in which abs(step) != 1. */ > >> - if (!integer_minus_onep (iv0->step)) > >> - return false; > >> - /* Give up on infinite loop. */ > >> - if (*code == LE_EXPR > >> - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type))) > >> - return false; > >> - high = fold_build2 (PLUS_EXPR, niter_type, > >> - fold_convert (niter_type, iv0->base), > >> - build_int_cst (niter_type, 1)); > >> - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); > >> - } > >> - else if (integer_zerop (iv0->step)) > >> - { > >> - /* TODO: Should handle case in which abs(step) != 1. */ > >> - if (!integer_onep (iv1->step)) > >> - return false; > >> - /* Give up on infinite loop. */ > >> - if (*code == LE_EXPR > >> - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type))) > >> - return false; > >> - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); > >> - low = fold_build2 (MINUS_EXPR, niter_type, > >> - fold_convert (niter_type, iv1->base), > >> - build_int_cst (niter_type, 1)); > >> - } > >> - else > >> - gcc_unreachable (); > >> - > >> - iv0->base = low; > >> - iv0->step = fold_convert (niter_type, integer_one_node); > >> - iv1->base = high; > >> - iv1->step = build_int_cst (niter_type, 0); > >> - *code = NE_EXPR; > >> - return true; > >> -} > >> - > >> /* Determine the number of iterations according to condition (for > >> staying > >> inside loop) which compares two induction variables using > >> comparison > >> operator CODE. The induction variable on left side of the > >> comparison > >> @@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop, > >> return true; > >> } > >> > >> - /* Handle special case loops: while (i-- < 10) and while (10 < i++) > >> by > >> - adjusting iv0, iv1 and code. */ > >> - if (code != NE_EXPR > >> - && (tree_int_cst_sign_bit (iv0->step) > >> - || (!integer_zerop (iv1->step) > >> - && !tree_int_cst_sign_bit (iv1->step))) > >> - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1)) > >> - return false; > >> - > >> /* OK, now we know we have a senseful loop. Handle several cases, > >> depending > >> on what comparison operator is used. */ > >> bound_difference (loop, iv1->base, iv0->base, &bnds);
"Bin.Cheng" <amker.cheng@gmail.com> writes: > On Wed, Aug 4, 2021 at 10:42 AM guojiufu > <guojiufu@linux.ibm.com> wrote: >> >> Hi, >> cut... >> >> @@ -0,0 +1,63 @@ >> >> +TYPE __attribute__ ((noinline)) >> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, >> >> TYPE n) >> >> +{ >> >> + for (l = L_BASE; n < l; l += C) >> >> + *a++ = *b++ + 1; >> >> + return l; >> >> +} >> >> + >> >> +TYPE __attribute__ ((noinline)) >> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, >> >> TYPE n) >> >> +{ >> >> + for (l = L_BASE_DOWN; l < n; l -= C) > I noticed that both L_BASE and L_BASE_DOWN are defined as l, > which > makes this test a bit confusing. Could you clean the use of l, > for > example, by using an auto var for the loop index invariable? > Otherwise the patch looks good to me. Thanks very much for the > work. Thanks a lot for your help to review! L_BASE.. are not needed. Updated the patch which use a new index var 'i' for loop instead param 'l': TYPE i; for (i = l; n < i; i += C) I updated the patch as below. Bootstrap & regress pass on powerpc64 and powerpc64le. For code like: unsigned foo(unsigned val, unsigned start) { unsigned cnt = 0; for (unsigned i = start; i > val; ++i) cnt++; return cnt; } The number of iterations should be about UINT_MAX - start. There is function adjust_cond_for_loop_until_wrap which handles similar work for const bases. Like adjust_cond_for_loop_until_wrap, this patch enhance function number_of_iterations_cond/number_of_iterations_lt to analyze number of iterations for this kind of loop. Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. Is this ok for trunk? gcc/ChangeLog: 2021-08-16 Jiufu Guo <guojiufu@linux.ibm.com> PR tree-optimization/101145 * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): New function. (number_of_iterations_lt): Invoke above function. (adjust_cond_for_loop_until_wrap): Merge to number_of_iterations_until_wrap. (number_of_iterations_cond): Update invokes for adjust_cond_for_loop_until_wrap and number_of_iterations_lt. gcc/testsuite/ChangeLog: 2021-08-16 Jiufu Guo <guojiufu@linux.ibm.com> PR tree-optimization/101145 * gcc.dg/vect/pr101145.c: New test. * gcc.dg/vect/pr101145.inc: New test. * gcc.dg/vect/pr101145_1.c: New test. * gcc.dg/vect/pr101145_2.c: New test. * gcc.dg/vect/pr101145_3.c: New test. * gcc.dg/vect/pr101145inf.c: New test. * gcc.dg/vect/pr101145inf.inc: New test. * gcc.dg/vect/pr101145inf_1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr101145.c | 187 ++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr101145.inc | 65 ++++++++ gcc/testsuite/gcc.dg/vect/pr101145_1.c | 13 ++ gcc/testsuite/gcc.dg/vect/pr101145_2.c | 13 ++ gcc/testsuite/gcc.dg/vect/pr101145_3.c | 13 ++ gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 ++++ gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ gcc/tree-ssa-loop-niter.c | 157 ++++++++++-------- 9 files changed, 459 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 00000000000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include <limits.h> + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (--l < n) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (--l < 64) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = 32; + while (--l < n) + *a++ = *b++ + 1; + return l; +} + + +int a[3200], b[3200]; +int fail; + +int +main () +{ + unsigned l, n; + unsigned res; + /* l > n*/ + n = UINT_MAX - 64; + l = n + 32; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n - 1; + res = foo (a, b, l, n); + if (res != l + 1) + fail++; + + l = n - 32; + res = foo (a, b, l, n); + if (res != l + 1) + fail++; + + l = UINT_MAX; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n + 32; + res = foo_1 (a, b, l, n); + if (res != 0) + fail++; + + l = n + 32; + res = foo_2 (a, b, l, n); + if (res != 0) + fail++; + + l = n; + res = foo_3 (a, b, l, n); + if (res != 0) + fail++; + + l = n - 1; + res = foo_3 (a, b, l, n); + if (res != 0) + fail++; + + l = n - 2; + res = foo_3 (a, b, l, n); + if (res != l + 1) + fail++; + + res = foo_5 (a, b, l, n); + if (res != 0) + fail++; + + n = 64; + l = n - 32; + res = bar (a, b, l, n); + res++; + if (res != 0) + fail++; + + l = n; + res = bar (a, b, l, n); + res++; + if (res != 0) + fail++; + + l = n + 1; + res = bar (a, b, l, n); + res++; + if (res != l) + fail++; + + l = 0; + res = bar (a, b, l, n); + res++; + if (res != l) + fail++; + + l = 32; + res = bar_1 (a, b, l, n); + res++; + if (res != 0) + fail++; + + res = bar_1 (a, b, l, n); + res++; + if (res != 0) + fail++; + + if (fail) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc new file mode 100644 index 00000000000..615d2e5e552 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc @@ -0,0 +1,65 @@ +TYPE __attribute__ ((noinline)) +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) +{ + TYPE i; + for (i = l; n < i; i += C) + *a++ = *b++ + 1; + return i; +} + +TYPE __attribute__ ((noinline)) +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) +{ + TYPE i; + for (i = l; i < n; i -= C) + *a++ = *b++ + 1; + return i; +} + +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } + +int a[1000], b[1000]; +int fail; + +int +main () +{ + TYPE res; + TYPE l; + TYPE n; + n = N_BASE; + l = n - C; + res = foo_sign (a, b, l, n); + if (res != l) + fail++; + + l = n; + res = foo_sign (a, b, l, n); + if (res != l) + fail++; + + l = n + C; + res = foo_sign (a, b, l, n); + if (neq ((res - MIN) / C, 0)) + fail++; + + n = N_BASE_DOWN; + l = n - C; + res = bar_sign (a, b, l, n); + if (neq ((MAX - res) / C, 0)) + fail++; + + l = n; + res = bar_sign (a, b, l, n); + if (res != l) + fail++; + + l = n + C; + res = bar_sign (a, b, l, n); + if (res != l) + fail++; + + if (fail) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c new file mode 100644 index 00000000000..8bc26e2436e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE signed char +#define MIN -128 +#define MAX 127 +#define N_BASE (MAX - 32) +#define N_BASE_DOWN (MIN + 32) + +#define C 3 + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c new file mode 100644 index 00000000000..b14c4b4b519 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE unsigned char +#define MIN 0 +#define MAX 255 +#define N_BASE (MAX - 32 + 1) +#define N_BASE_DOWN (MIN + 32) + +#define C 2 + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c new file mode 100644 index 00000000000..99289afec0b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE int * +#define MIN ((TYPE)0) +#define MAX ((TYPE)((long long)-1)) +#define N_BASE (MIN - 32) +#define N_BASE_DOWN (MIN + 32) + +#define C 1 + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c b/gcc/testsuite/gcc.dg/vect/pr101145inf.c new file mode 100644 index 00000000000..ed49f5670b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c @@ -0,0 +1,25 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-options "-O3" } */ +#include <limits.h> +#include "pr101145inf.inc" + +__attribute__ ((noinline)) +unsigned foo(unsigned val, unsigned start) +{ + unsigned cnt = 0; + for (unsigned i = start; val <= i; i+=16) + cnt++; + return cnt; +} + +void test_finite () +{ + unsigned n = foo (16, UINT_MAX - 32); + if (n != 3) + __builtin_abort (); +} + +void test_infinite () +{ + foo (15, UINT_MAX - 32); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc new file mode 100644 index 00000000000..4aa3d049187 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc @@ -0,0 +1,28 @@ +#include <unistd.h> +#include <signal.h> +#include <stdlib.h> + +void test_finite (); +void test_infinite (); + +void do_exit (int i) +{ + exit (0); +} + +int main(void) +{ + test_finite (); + struct sigaction s; + sigemptyset (&s.sa_mask); + s.sa_handler = do_exit; + s.sa_flags = 0; + sigaction (SIGALRM, &s, NULL); + alarm (1); + + test_infinite (); + + __builtin_abort (); + return 1; +} + diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c new file mode 100644 index 00000000000..4ee3e31c7fe --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c @@ -0,0 +1,23 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-options "-O3" } */ +#include <limits.h> +#include "pr101145inf.inc" + +__attribute__ ((noinline)) +unsigned foo(unsigned val, unsigned start) +{ + unsigned cnt = 0; + for (unsigned i = start; i < val; i-=16) + cnt++; + return cnt; +} + +void test_finite () +{ + foo (UINT_MAX - 15, 32); +} + +void test_infinite () +{ + foo (UINT_MAX - 14, 32); +} diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 466158a5eb1..7af92d1c893 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1, } } +/* Determines number of iterations of loop whose ending condition + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. + The number of iterations is stored to NITER. */ + +static bool +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0, + affine_iv *iv1, class tree_niter_desc *niter) +{ + tree niter_type = unsigned_type_for (type); + tree step, num, assumptions, may_be_zero; + wide_int high, low, max, min; + + may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base); + if (integer_onep (may_be_zero)) + return false; + + int prec = TYPE_PRECISION (type); + signop sgn = TYPE_SIGN (type); + min = wi::min_value (prec, sgn); + max = wi::max_value (prec, sgn); + + /* n < {base, C}. */ + if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step)) + { + step = iv1->step; + /* MIN + C - 1 <= n. */ + tree last = wide_int_to_tree (type, min + wi::to_wide (step) - 1); + assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, iv0->base); + if (integer_zerop (assumptions)) + return false; + + num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree (type, max), + iv1->base); + high = max; + if (TREE_CODE (iv1->base) == INTEGER_CST) + low = wi::to_wide (iv1->base) - 1; + else if (TREE_CODE (iv0->base) == INTEGER_CST) + low = wi::to_wide (iv0->base); + else + low = min; + } + /* {base, -C} < n. */ + else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step)) + { + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step); + /* MAX - C + 1 >= n. */ + tree last = wide_int_to_tree (type, max - wi::to_wide (step) + 1); + assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, iv1->base); + if (integer_zerop (assumptions)) + return false; + + num = fold_build2 (MINUS_EXPR, niter_type, iv0->base, + wide_int_to_tree (type, min)); + low = min; + if (TREE_CODE (iv0->base) == INTEGER_CST) + high = wi::to_wide (iv0->base) + 1; + else if (TREE_CODE (iv1->base) == INTEGER_CST) + high = wi::to_wide (iv1->base); + else + high = max; + } + else + return false; + + /* (delta + step - 1) / step */ + step = fold_convert (niter_type, step); + num = fold_convert (niter_type, num); + num = fold_build2 (PLUS_EXPR, niter_type, num, step); + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); + + widest_int delta, s; + delta = widest_int::from (high, sgn) - widest_int::from (low, sgn); + s = wi::to_widest (step); + delta = delta + s - 1; + niter->max = wi::udiv_floor (delta, s); + + niter->may_be_zero = may_be_zero; + + if (!integer_nonzerop (assumptions)) + niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, + niter->assumptions, assumptions); + + niter->control.no_overflow = false; + + return true; +} + /* Determines number of iterations of loop whose ending condition is IV0 < IV1. TYPE is the type of the iv. The number of iterations is stored to NITER. BNDS bounds the difference @@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0, niter->bound = iv0->base; } + /* {base, -C} < n, or n < {base, C} */ + if (tree_int_cst_sign_bit (iv0->step) + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))) + return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter); + delta = fold_build2 (MINUS_EXPR, niter_type, fold_convert (niter_type, iv1->base), fold_convert (niter_type, iv0->base)); @@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv) } } -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts - the condition for loop-until-wrap cases. For example: - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 - Return true if condition is successfully adjusted. */ - -static bool -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code, - affine_iv *iv1) -{ - /* Only support simple cases for the moment. */ - if (TREE_CODE (iv0->base) != INTEGER_CST - || TREE_CODE (iv1->base) != INTEGER_CST) - return false; - - tree niter_type = unsigned_type_for (type), high, low; - /* Case: i-- < 10. */ - if (integer_zerop (iv1->step)) - { - /* TODO: Should handle case in which abs(step) != 1. */ - if (!integer_minus_onep (iv0->step)) - return false; - /* Give up on infinite loop. */ - if (*code == LE_EXPR - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type))) - return false; - high = fold_build2 (PLUS_EXPR, niter_type, - fold_convert (niter_type, iv0->base), - build_int_cst (niter_type, 1)); - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); - } - else if (integer_zerop (iv0->step)) - { - /* TODO: Should handle case in which abs(step) != 1. */ - if (!integer_onep (iv1->step)) - return false; - /* Give up on infinite loop. */ - if (*code == LE_EXPR - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type))) - return false; - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); - low = fold_build2 (MINUS_EXPR, niter_type, - fold_convert (niter_type, iv1->base), - build_int_cst (niter_type, 1)); - } - else - gcc_unreachable (); - - iv0->base = low; - iv0->step = fold_convert (niter_type, integer_one_node); - iv1->base = high; - iv1->step = build_int_cst (niter_type, 0); - *code = NE_EXPR; - return true; -} - /* Determine the number of iterations according to condition (for staying inside loop) which compares two induction variables using comparison operator CODE. The induction variable on left side of the comparison @@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop, return true; } - /* Handle special case loops: while (i-- < 10) and while (10 < i++) by - adjusting iv0, iv1 and code. */ - if (code != NE_EXPR - && (tree_int_cst_sign_bit (iv0->step) - || (!integer_zerop (iv1->step) - && !tree_int_cst_sign_bit (iv1->step))) - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1)) - return false; - /* OK, now we know we have a senseful loop. Handle several cases, depending on what comparison operator is used. */ bound_difference (loop, iv1->base, iv0->base, &bnds);
Jiufu Guo <guojiufu@linux.ibm.com> writes: > "Bin.Cheng" <amker.cheng@gmail.com> writes: > >> On Wed, Aug 4, 2021 at 10:42 AM guojiufu >> <guojiufu@linux.ibm.com> wrote: >>> >>> Hi, >>> > cut... >>> >> @@ -0,0 +1,63 @@ >>> >> +TYPE __attribute__ ((noinline)) >>> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE >>> >> l, >> TYPE n) >>> >> +{ >>> >> + for (l = L_BASE; n < l; l += C) >>> >> + *a++ = *b++ + 1; >>> >> + return l; >>> >> +} >>> >> + >>> >> +TYPE __attribute__ ((noinline)) >>> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE >>> >> l, >> TYPE n) >>> >> +{ >>> >> + for (l = L_BASE_DOWN; l < n; l -= C) >> I noticed that both L_BASE and L_BASE_DOWN are defined as l, >> which >> makes this test a bit confusing. Could you clean the use of l, >> for >> example, by using an auto var for the loop index invariable? >> Otherwise the patch looks good to me. Thanks very much for the >> work. > Thanks a lot for your help to review! > L_BASE.. are not needed. Updated the patch which use > a new index var 'i' for loop instead param 'l': > > TYPE i; > for (i = l; n < i; i += C) > > I updated the patch as below. > Bootstrap & regress pass on powerpc64 and powerpc64le. I mean it also pass powerpc64(BE includes 32bit). BR, Jiufu > > For code like: > unsigned foo(unsigned val, unsigned start) > { > unsigned cnt = 0; > for (unsigned i = start; i > val; ++i) > cnt++; > return cnt; > } > > The number of iterations should be about UINT_MAX - start. > > There is function adjust_cond_for_loop_until_wrap which > handles similar work for const bases. > Like adjust_cond_for_loop_until_wrap, this patch enhance > function number_of_iterations_cond/number_of_iterations_lt > to analyze number of iterations for this kind of loop. > > Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64. > Is this ok for trunk? > > gcc/ChangeLog: > > 2021-08-16 Jiufu Guo <guojiufu@linux.ibm.com> > > PR tree-optimization/101145 > * tree-ssa-loop-niter.c (number_of_iterations_until_wrap): > New function. > (number_of_iterations_lt): Invoke above function. > (adjust_cond_for_loop_until_wrap): > Merge to number_of_iterations_until_wrap. > (number_of_iterations_cond): Update invokes for > adjust_cond_for_loop_until_wrap and > number_of_iterations_lt. > > gcc/testsuite/ChangeLog: > > 2021-08-16 Jiufu Guo <guojiufu@linux.ibm.com> > > PR tree-optimization/101145 > * gcc.dg/vect/pr101145.c: New test. > * gcc.dg/vect/pr101145.inc: New test. > * gcc.dg/vect/pr101145_1.c: New test. > * gcc.dg/vect/pr101145_2.c: New test. > * gcc.dg/vect/pr101145_3.c: New test. > * gcc.dg/vect/pr101145inf.c: New test. > * gcc.dg/vect/pr101145inf.inc: New test. > * gcc.dg/vect/pr101145inf_1.c: New test. > --- > gcc/testsuite/gcc.dg/vect/pr101145.c | 187 > ++++++++++++++++++++++ > gcc/testsuite/gcc.dg/vect/pr101145.inc | 65 ++++++++ > gcc/testsuite/gcc.dg/vect/pr101145_1.c | 13 ++ > gcc/testsuite/gcc.dg/vect/pr101145_2.c | 13 ++ > gcc/testsuite/gcc.dg/vect/pr101145_3.c | 13 ++ > gcc/testsuite/gcc.dg/vect/pr101145inf.c | 25 +++ > gcc/testsuite/gcc.dg/vect/pr101145inf.inc | 28 ++++ > gcc/testsuite/gcc.dg/vect/pr101145inf_1.c | 23 +++ > gcc/tree-ssa-loop-niter.c | 157 > ++++++++++-------- > 9 files changed, 459 insertions(+), 65 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc > create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c > b/gcc/testsuite/gcc.dg/vect/pr101145.c > new file mode 100644 > index 00000000000..74031b031cf > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c > @@ -0,0 +1,187 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#include <limits.h> > + > +unsigned __attribute__ ((noinline)) > +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned n) > +{ > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned) > +{ > + while (UINT_MAX - 64 < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned n) > +{ > + l = UINT_MAX - 32; > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned n) > +{ > + while (n <= ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned n) > +{ // infininate + while (0 <= ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned n) > +{ > + //no loop > + l = UINT_MAX; > + while (n < ++l) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned n) > +{ > + while (--l < n) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned) > +{ > + while (--l < 64) > + *a++ = *b++ + 1; > + return l; > +} > + > +unsigned __attribute__ ((noinline)) > +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, > unsigned n) > +{ > + l = 32; > + while (--l < n) > + *a++ = *b++ + 1; > + return l; > +} > + > + > +int a[3200], b[3200]; > +int fail; > + > +int > +main () > +{ > + unsigned l, n; > + unsigned res; > + /* l > n*/ > + n = UINT_MAX - 64; > + l = n + 32; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 1; > + res = foo (a, b, l, n); > + if (res != l + 1) > + fail++; > + + l = n - 32; > + res = foo (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + l = UINT_MAX; > + res = foo (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n + 32; > + res = foo_1 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n + 32; > + res = foo_2 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n; > + res = foo_3 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 1; > + res = foo_3 (a, b, l, n); > + if (res != 0) > + fail++; > + > + l = n - 2; > + res = foo_3 (a, b, l, n); > + if (res != l + 1) > + fail++; > + > + res = foo_5 (a, b, l, n); > + if (res != 0) > + fail++; > + > + n = 64; > + l = n - 32; > + res = bar (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + l = n; > + res = bar (a, b, l, n); > + res++; > + if (res != 0) > + fail++; > + > + l = n + 1; > + res = bar (a, b, l, n); > + res++; > + if (res != l) > + fail++; > + > + l = 0; > + res = bar (a, b, l, n); > + res++; > + if (res != l) > + fail++; > + > + l = 32; > + res = bar_1 (a, b, l, n); > + res++; > + if (res != 0) > + fail++; + > + res = bar_1 (a, b, l, n); > + res++; > + if (res != 0) > + fail++; + > + if (fail) > + __builtin_abort (); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 > "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc > b/gcc/testsuite/gcc.dg/vect/pr101145.inc > new file mode 100644 > index 00000000000..615d2e5e552 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc > @@ -0,0 +1,65 @@ > +TYPE __attribute__ ((noinline)) > +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, > TYPE n) > +{ > + TYPE i; > + for (i = l; n < i; i += C) > + *a++ = *b++ + 1; > + return i; > +} > + > +TYPE __attribute__ ((noinline)) > +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, > TYPE n) > +{ > + TYPE i; > + for (i = l; i < n; i -= C) > + *a++ = *b++ + 1; > + return i; > +} > + > +int __attribute__ ((noinline)) neq (int a, int b) { return a != > b; } > + > +int a[1000], b[1000]; > +int fail; > + > +int > +main () > +{ > + TYPE res; > + TYPE l; > + TYPE n; > + n = N_BASE; > + l = n - C; > + res = foo_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n; > + res = foo_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n + C; > + res = foo_sign (a, b, l, n); > + if (neq ((res - MIN) / C, 0)) > + fail++; > + > + n = N_BASE_DOWN; > + l = n - C; > + res = bar_sign (a, b, l, n); > + if (neq ((MAX - res) / C, 0)) > + fail++; > + > + l = n; > + res = bar_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + l = n + C; > + res = bar_sign (a, b, l, n); > + if (res != l) > + fail++; > + > + if (fail) > + __builtin_abort (); > + return 0; > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c > b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > new file mode 100644 > index 00000000000..8bc26e2436e > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > @@ -0,0 +1,13 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE signed char > +#define MIN -128 > +#define MAX 127 > +#define N_BASE (MAX - 32) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 3 > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 > "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c > b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > new file mode 100644 > index 00000000000..b14c4b4b519 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c > @@ -0,0 +1,13 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE unsigned char > +#define MIN 0 > +#define MAX 255 > +#define N_BASE (MAX - 32 + 1) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 2 > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 > "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c > b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > new file mode 100644 > index 00000000000..99289afec0b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c > @@ -0,0 +1,13 @@ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > +#define TYPE int * > +#define MIN ((TYPE)0) > +#define MAX ((TYPE)((long long)-1)) > +#define N_BASE (MIN - 32) > +#define N_BASE_DOWN (MIN + 32) > + > +#define C 1 > + > +#include "pr101145.inc" > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 > "vect" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c > b/gcc/testsuite/gcc.dg/vect/pr101145inf.c > new file mode 100644 > index 00000000000..ed49f5670b5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c > @@ -0,0 +1,25 @@ > +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ > +/* { dg-options "-O3" } */ > +#include <limits.h> > +#include "pr101145inf.inc" > + > +__attribute__ ((noinline)) > +unsigned foo(unsigned val, unsigned start) > +{ > + unsigned cnt = 0; > + for (unsigned i = start; val <= i; i+=16) > + cnt++; > + return cnt; > +} > + > +void test_finite () > +{ > + unsigned n = foo (16, UINT_MAX - 32); > + if (n != 3) > + __builtin_abort (); > +} > + > +void test_infinite () > +{ > + foo (15, UINT_MAX - 32); > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > new file mode 100644 > index 00000000000..4aa3d049187 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc > @@ -0,0 +1,28 @@ > +#include <unistd.h> > +#include <signal.h> > +#include <stdlib.h> > + > +void test_finite (); > +void test_infinite (); > + > +void do_exit (int i) > +{ > + exit (0); > +} > + > +int main(void) > +{ > + test_finite (); > + struct sigaction s; > + sigemptyset (&s.sa_mask); > + s.sa_handler = do_exit; > + s.sa_flags = 0; > + sigaction (SIGALRM, &s, NULL); > + alarm (1); > + > + test_infinite (); > + > + __builtin_abort (); > + return 1; > +} > + > diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > new file mode 100644 > index 00000000000..4ee3e31c7fe > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c > @@ -0,0 +1,23 @@ > +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ > +/* { dg-options "-O3" } */ > +#include <limits.h> > +#include "pr101145inf.inc" > + > +__attribute__ ((noinline)) > +unsigned foo(unsigned val, unsigned start) > +{ > + unsigned cnt = 0; > + for (unsigned i = start; i < val; i-=16) > + cnt++; > + return cnt; > +} > + > +void test_finite () > +{ > + foo (UINT_MAX - 15, 32); > +} > + > +void test_infinite () > +{ > + foo (UINT_MAX - 14, 32); > +} > diff --git a/gcc/tree-ssa-loop-niter.c > b/gcc/tree-ssa-loop-niter.c > index 466158a5eb1..7af92d1c893 100644 > --- a/gcc/tree-ssa-loop-niter.c > +++ b/gcc/tree-ssa-loop-niter.c > @@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, > affine_iv *iv0, affine_iv *iv1, > } > } > > +/* Determines number of iterations of loop whose ending > condition > + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, > C}. > + The number of iterations is stored to NITER. */ > + > +static bool > +number_of_iterations_until_wrap (class loop *, tree type, > affine_iv *iv0, > + affine_iv *iv1, class > tree_niter_desc *niter) > +{ > + tree niter_type = unsigned_type_for (type); > + tree step, num, assumptions, may_be_zero; > + wide_int high, low, max, min; > + > + may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, > iv1->base, iv0->base); > + if (integer_onep (may_be_zero)) > + return false; > + > + int prec = TYPE_PRECISION (type); > + signop sgn = TYPE_SIGN (type); > + min = wi::min_value (prec, sgn); > + max = wi::max_value (prec, sgn); > + > + /* n < {base, C}. */ > + if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit > (iv1->step)) > + { > + step = iv1->step; > + /* MIN + C - 1 <= n. */ > + tree last = wide_int_to_tree (type, min + wi::to_wide > (step) - 1); > + assumptions = fold_build2 (LE_EXPR, boolean_type_node, > last, iv0->base); > + if (integer_zerop (assumptions)) > + return false; > + > + num = fold_build2 (MINUS_EXPR, niter_type, > wide_int_to_tree (type, max), > + iv1->base); > + high = max; > + if (TREE_CODE (iv1->base) == INTEGER_CST) > + low = wi::to_wide (iv1->base) - 1; > + else if (TREE_CODE (iv0->base) == INTEGER_CST) > + low = wi::to_wide (iv0->base); > + else > + low = min; > + } > + /* {base, -C} < n. */ > + else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop > (iv1->step)) > + { > + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), > iv0->step); > + /* MAX - C + 1 >= n. */ > + tree last = wide_int_to_tree (type, max - wi::to_wide > (step) + 1); > + assumptions = fold_build2 (GE_EXPR, boolean_type_node, > last, iv1->base); > + if (integer_zerop (assumptions)) > + return false; > + > + num = fold_build2 (MINUS_EXPR, niter_type, iv0->base, > + wide_int_to_tree (type, min)); > + low = min; > + if (TREE_CODE (iv0->base) == INTEGER_CST) > + high = wi::to_wide (iv0->base) + 1; > + else if (TREE_CODE (iv1->base) == INTEGER_CST) > + high = wi::to_wide (iv1->base); > + else > + high = max; > + } > + else > + return false; > + > + /* (delta + step - 1) / step */ > + step = fold_convert (niter_type, step); > + num = fold_convert (niter_type, num); > + num = fold_build2 (PLUS_EXPR, niter_type, num, step); > + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, > step); > + > + widest_int delta, s; > + delta = widest_int::from (high, sgn) - widest_int::from (low, > sgn); > + s = wi::to_widest (step); > + delta = delta + s - 1; > + niter->max = wi::udiv_floor (delta, s); > + > + niter->may_be_zero = may_be_zero; > + > + if (!integer_nonzerop (assumptions)) > + niter->assumptions = fold_build2 (TRUTH_AND_EXPR, > boolean_type_node, > + niter->assumptions, > assumptions); > + > + niter->control.no_overflow = false; > + > + return true; > +} > + > /* Determines number of iterations of loop whose ending > condition > is IV0 < IV1. TYPE is the type of the iv. The number of > iterations is stored to NITER. BNDS bounds the difference > @@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop > *loop, tree type, affine_iv *iv0, > niter->bound = iv0->base; > } > > + /* {base, -C} < n, or n < {base, C} */ > + if (tree_int_cst_sign_bit (iv0->step) > + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit > (iv1->step))) > + return number_of_iterations_until_wrap (loop, type, iv0, > iv1, niter); > + > delta = fold_build2 (MINUS_EXPR, niter_type, > fold_convert (niter_type, iv1->base), > fold_convert (niter_type, iv0->base)); > @@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv > *iv) > } > } > > -/* Given exit condition IV0 CODE IV1 in TYPE, this function > adjusts > - the condition for loop-until-wrap cases. For example: > - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 > - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 > - Return true if condition is successfully adjusted. */ > - > -static bool > -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, > tree_code *code, > - affine_iv *iv1) > -{ > - /* Only support simple cases for the moment. */ > - if (TREE_CODE (iv0->base) != INTEGER_CST > - || TREE_CODE (iv1->base) != INTEGER_CST) > - return false; > - > - tree niter_type = unsigned_type_for (type), high, low; > - /* Case: i-- < 10. */ > - if (integer_zerop (iv1->step)) > - { > - /* TODO: Should handle case in which abs(step) != 1. */ > - if (!integer_minus_onep (iv0->step)) > - return false; > - /* Give up on infinite loop. */ > - if (*code == LE_EXPR > - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE > (type))) > - return false; > - high = fold_build2 (PLUS_EXPR, niter_type, > - fold_convert (niter_type, iv0->base), > - build_int_cst (niter_type, 1)); > - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); > - } > - else if (integer_zerop (iv0->step)) > - { > - /* TODO: Should handle case in which abs(step) != 1. */ > - if (!integer_onep (iv1->step)) > - return false; > - /* Give up on infinite loop. */ > - if (*code == LE_EXPR > - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE > (type))) > - return false; > - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); > - low = fold_build2 (MINUS_EXPR, niter_type, > - fold_convert (niter_type, iv1->base), > - build_int_cst (niter_type, 1)); > - } > - else > - gcc_unreachable (); > - > - iv0->base = low; > - iv0->step = fold_convert (niter_type, integer_one_node); > - iv1->base = high; > - iv1->step = build_int_cst (niter_type, 0); > - *code = NE_EXPR; > - return true; > -} > - > /* Determine the number of iterations according to condition > (for staying > inside loop) which compares two induction variables using > comparison > operator CODE. The induction variable on left side of the > comparison > @@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop > *loop, > return true; > } > > - /* Handle special case loops: while (i-- < 10) and while (10 > < i++) by > - adjusting iv0, iv1 and code. */ > - if (code != NE_EXPR > - && (tree_int_cst_sign_bit (iv0->step) > - || (!integer_zerop (iv1->step) > - && !tree_int_cst_sign_bit (iv1->step))) > - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, > iv1)) > - return false; > - > /* OK, now we know we have a senseful loop. Handle several > cases, depending > on what comparison operator is used. */ > bound_difference (loop, iv1->base, iv0->base, &bnds);
On 2021-08-16 09:33, Bin.Cheng wrote: > On Wed, Aug 4, 2021 at 10:42 AM guojiufu <guojiufu@linux.ibm.com> > wrote: >> ... >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc >> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc >> >> new file mode 100644 >> >> index 00000000000..6eed3fa8aca >> >> --- /dev/null >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc >> >> @@ -0,0 +1,63 @@ >> >> +TYPE __attribute__ ((noinline)) >> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) >> >> +{ >> >> + for (l = L_BASE; n < l; l += C) >> >> + *a++ = *b++ + 1; >> >> + return l; >> >> +} >> >> + >> >> +TYPE __attribute__ ((noinline)) >> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) >> >> +{ >> >> + for (l = L_BASE_DOWN; l < n; l -= C) > I noticed that both L_BASE and L_BASE_DOWN are defined as l, which > makes this test a bit confusing. Could you clean the use of l, for > example, by using an auto var for the loop index invariable? > Otherwise the patch looks good to me. Thanks very much for the work. Hi, Sorry for bothering you here. I feel this would be an approval (with the comment) already :) With the change code to make it a little clear as: TYPE i; for (i = l; n < i; i += C) it may be ok to commit the patch to the trunk, right? BR, Jiufu > > Thanks, > bin >> >> + *a++ = *b++ + 1; >> >> + return l; >> >> +} >> >> + >> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } >> >> + >> >> +int a[1000], b[1000]; >> >> +int fail; >> >> + >> >> +int ... >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> >> new file mode 100644 >> >> index 00000000000..94f6b99b893 >> >> --- /dev/null >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c >> >> @@ -0,0 +1,15 @@ >> >> +/* { dg-require-effective-target vect_int } */ >> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ >> >> +#define TYPE signed char >> >> +#define MIN -128 >> >> +#define MAX 127 >> >> +#define N_BASE (MAX - 32) >> >> +#define N_BASE_DOWN (MIN + 32) >> >> + >> >> +#define C 3 >> >> +#define L_BASE l >> >> +#define L_BASE_DOWN l >> >> +
On Wed, Aug 25, 2021 at 11:26 AM guojiufu <guojiufu@linux.ibm.com> wrote: > > On 2021-08-16 09:33, Bin.Cheng wrote: > > On Wed, Aug 4, 2021 at 10:42 AM guojiufu <guojiufu@linux.ibm.com> > > wrote: > >> > ... > >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> >> new file mode 100644 > >> >> index 00000000000..6eed3fa8aca > >> >> --- /dev/null > >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc > >> >> @@ -0,0 +1,63 @@ > >> >> +TYPE __attribute__ ((noinline)) > >> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > >> >> +{ > >> >> + for (l = L_BASE; n < l; l += C) > >> >> + *a++ = *b++ + 1; > >> >> + return l; > >> >> +} > >> >> + > >> >> +TYPE __attribute__ ((noinline)) > >> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) > >> >> +{ > >> >> + for (l = L_BASE_DOWN; l < n; l -= C) > > I noticed that both L_BASE and L_BASE_DOWN are defined as l, which > > makes this test a bit confusing. Could you clean the use of l, for > > example, by using an auto var for the loop index invariable? > > Otherwise the patch looks good to me. Thanks very much for the work. > > Hi, > > Sorry for bothering you here. > I feel this would be an approval (with the comment) already :) > > With the change code to make it a little clear as: > TYPE i; > for (i = l; n < i; i += C) > > it may be ok to commit the patch to the trunk, right? Yes please. Thanks again for working on this. Thanks, bin > > BR, > Jiufu > > > > > Thanks, > > bin > >> >> + *a++ = *b++ + 1; > >> >> + return l; > >> >> +} > >> >> + > >> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } > >> >> + > >> >> +int a[1000], b[1000]; > >> >> +int fail; > >> >> + > >> >> +int > ... > >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> >> new file mode 100644 > >> >> index 00000000000..94f6b99b893 > >> >> --- /dev/null > >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c > >> >> @@ -0,0 +1,15 @@ > >> >> +/* { dg-require-effective-target vect_int } */ > >> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */ > >> >> +#define TYPE signed char > >> >> +#define MIN -128 > >> >> +#define MAX 127 > >> >> +#define N_BASE (MAX - 32) > >> >> +#define N_BASE_DOWN (MIN + 32) > >> >> + > >> >> +#define C 3 > >> >> +#define L_BASE l > >> >> +#define L_BASE_DOWN l > >> >> +
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c b/gcc/testsuite/gcc.dg/vect/pr101145.c new file mode 100644 index 00000000000..74031b031cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c @@ -0,0 +1,187 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#include <limits.h> + +unsigned __attribute__ ((noinline)) +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (UINT_MAX - 64 < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = UINT_MAX - 32; + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (n <= ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ // infininate + while (0 <= ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + //no loop + l = UINT_MAX; + while (n < ++l) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + while (--l < n) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned) +{ + while (--l < 64) + *a++ = *b++ + 1; + return l; +} + +unsigned __attribute__ ((noinline)) +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n) +{ + l = 32; + while (--l < n) + *a++ = *b++ + 1; + return l; +} + + +int a[3200], b[3200]; +int fail; + +int +main () +{ + unsigned l, n; + unsigned res; + /* l > n*/ + n = UINT_MAX - 64; + l = n + 32; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n - 1; + res = foo (a, b, l, n); + if (res != l + 1) + fail++; + + l = n - 32; + res = foo (a, b, l, n); + if (res != l + 1) + fail++; + + l = UINT_MAX; + res = foo (a, b, l, n); + if (res != 0) + fail++; + + l = n + 32; + res = foo_1 (a, b, l, n); + if (res != 0) + fail++; + + l = n + 32; + res = foo_2 (a, b, l, n); + if (res != 0) + fail++; + + l = n; + res = foo_3 (a, b, l, n); + if (res != 0) + fail++; + + l = n - 1; + res = foo_3 (a, b, l, n); + if (res != 0) + fail++; + + l = n - 2; + res = foo_3 (a, b, l, n); + if (res != l + 1) + fail++; + + res = foo_5 (a, b, l, n); + if (res != 0) + fail++; + + n = 64; + l = n - 32; + res = bar (a, b, l, n); + res++; + if (res != 0) + fail++; + + l = n; + res = bar (a, b, l, n); + res++; + if (res != 0) + fail++; + + l = n + 1; + res = bar (a, b, l, n); + res++; + if (res != l) + fail++; + + l = 0; + res = bar (a, b, l, n); + res++; + if (res != l) + fail++; + + l = 32; + res = bar_1 (a, b, l, n); + res++; + if (res != 0) + fail++; + + res = bar_1 (a, b, l, n); + res++; + if (res != 0) + fail++; + + if (fail) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc b/gcc/testsuite/gcc.dg/vect/pr101145.inc new file mode 100644 index 00000000000..6eed3fa8aca --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc @@ -0,0 +1,63 @@ +TYPE __attribute__ ((noinline)) +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) +{ + for (l = L_BASE; n < l; l += C) + *a++ = *b++ + 1; + return l; +} + +TYPE __attribute__ ((noinline)) +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n) +{ + for (l = L_BASE_DOWN; l < n; l -= C) + *a++ = *b++ + 1; + return l; +} + +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; } + +int a[1000], b[1000]; +int fail; + +int +main () +{ + TYPE res; + TYPE l; + TYPE n; + n = N_BASE; + l = n - C; + res = foo_sign (a, b, l, n); + if (res != l) + fail++; + + l = n; + res = foo_sign (a, b, l, n); + if (res != l) + fail++; + + l = n + C; + res = foo_sign (a, b, l, n); + if (neq ((res - MIN) / C, 0)) + fail++; + + n = N_BASE_DOWN; + l = n - C; + res = bar_sign (a, b, l, n); + if (neq ((MAX - res) / C, 0)) + fail++; + + l = n; + res = bar_sign (a, b, l, n); + if (res != l) + fail++; + + l = n + C; + res = bar_sign (a, b, l, n); + if (res != l) + fail++; + + if (fail) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c b/gcc/testsuite/gcc.dg/vect/pr101145_1.c new file mode 100644 index 00000000000..94f6b99b893 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c @@ -0,0 +1,15 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE signed char +#define MIN -128 +#define MAX 127 +#define N_BASE (MAX - 32) +#define N_BASE_DOWN (MIN + 32) + +#define C 3 +#define L_BASE l +#define L_BASE_DOWN l + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c b/gcc/testsuite/gcc.dg/vect/pr101145_2.c new file mode 100644 index 00000000000..d3cfc9e01e1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c @@ -0,0 +1,15 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE unsigned char +#define MIN 0 +#define MAX 255 +#define N_BASE (MAX - 32 + 1) +#define N_BASE_DOWN (MIN + 32) + +#define C 2 +#define L_BASE l +#define L_BASE_DOWN l + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c b/gcc/testsuite/gcc.dg/vect/pr101145_3.c new file mode 100644 index 00000000000..9d5863726a5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c @@ -0,0 +1,15 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ +#define TYPE int * +#define MIN ((TYPE)0) +#define MAX ((TYPE)((long long)-1)) +#define N_BASE (MIN - 32) +#define N_BASE_DOWN (MIN + 32) + +#define C 1 +#define L_BASE l +#define L_BASE_DOWN l + +#include "pr101145.inc" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.c b/gcc/testsuite/gcc.dg/vect/pr101145inf.c new file mode 100644 index 00000000000..ed49f5670b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.c @@ -0,0 +1,25 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-options "-O3" } */ +#include <limits.h> +#include "pr101145inf.inc" + +__attribute__ ((noinline)) +unsigned foo(unsigned val, unsigned start) +{ + unsigned cnt = 0; + for (unsigned i = start; val <= i; i+=16) + cnt++; + return cnt; +} + +void test_finite () +{ + unsigned n = foo (16, UINT_MAX - 32); + if (n != 3) + __builtin_abort (); +} + +void test_infinite () +{ + foo (15, UINT_MAX - 32); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf.inc b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc new file mode 100644 index 00000000000..4aa3d049187 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf.inc @@ -0,0 +1,28 @@ +#include <unistd.h> +#include <signal.h> +#include <stdlib.h> + +void test_finite (); +void test_infinite (); + +void do_exit (int i) +{ + exit (0); +} + +int main(void) +{ + test_finite (); + struct sigaction s; + sigemptyset (&s.sa_mask); + s.sa_handler = do_exit; + s.sa_flags = 0; + sigaction (SIGALRM, &s, NULL); + alarm (1); + + test_infinite (); + + __builtin_abort (); + return 1; +} + diff --git a/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c new file mode 100644 index 00000000000..4ee3e31c7fe --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr101145inf_1.c @@ -0,0 +1,23 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ +/* { dg-options "-O3" } */ +#include <limits.h> +#include "pr101145inf.inc" + +__attribute__ ((noinline)) +unsigned foo(unsigned val, unsigned start) +{ + unsigned cnt = 0; + for (unsigned i = start; i < val; i-=16) + cnt++; + return cnt; +} + +void test_finite () +{ + foo (UINT_MAX - 15, 32); +} + +void test_infinite () +{ + foo (UINT_MAX - 14, 32); +} diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index b5add827018..85c179be68d 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -1473,6 +1473,93 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1, } } +/* Determines number of iterations of loop whose ending condition + is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}. + The number of iterations is stored to NITER. */ + +static bool +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0, + affine_iv *iv1, class tree_niter_desc *niter) +{ + tree niter_type = unsigned_type_for (type); + tree step, num, assumptions, may_be_zero; + wide_int high, low, max, min; + + may_be_zero = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base); + if (integer_onep (may_be_zero)) + return false; + + int prec = TYPE_PRECISION (type); + signop sgn = TYPE_SIGN (type); + min = wi::min_value (prec, sgn); + max = wi::max_value (prec, sgn); + + /* n < {base, C}. */ + if (integer_zerop (iv0->step) && !tree_int_cst_sign_bit (iv1->step)) + { + step = iv1->step; + /* MIN + C - 1 <= n. */ + tree last = wide_int_to_tree (type, min + wi::to_wide (step) - 1); + assumptions = fold_build2 (LE_EXPR, boolean_type_node, last, iv0->base); + if (integer_zerop (assumptions)) + return false; + + num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree (type, max), + iv1->base); + high = max; + if (TREE_CODE (iv1->base) == INTEGER_CST) + low = wi::to_wide (iv1->base) - 1; + else if (TREE_CODE (iv0->base) == INTEGER_CST) + low = wi::to_wide (iv0->base); + else + low = min; + } + /* {base, -C} < n. */ + else if (tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step)) + { + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step); + /* MAX - C + 1 >= n. */ + tree last = wide_int_to_tree (type, max - wi::to_wide (step) + 1); + assumptions = fold_build2 (GE_EXPR, boolean_type_node, last, iv1->base); + if (integer_zerop (assumptions)) + return false; + + num = fold_build2 (MINUS_EXPR, niter_type, iv0->base, + wide_int_to_tree (type, min)); + low = min; + if (TREE_CODE (iv0->base) == INTEGER_CST) + high = wi::to_wide (iv0->base) + 1; + else if (TREE_CODE (iv1->base) == INTEGER_CST) + high = wi::to_wide (iv1->base); + else + high = max; + } + else + return false; + + /* (delta + step - 1) / step */ + step = fold_convert (niter_type, step); + num = fold_convert (niter_type, num); + num = fold_build2 (PLUS_EXPR, niter_type, num, step); + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, num, step); + + widest_int delta, s; + delta = widest_int::from (high, sgn) - widest_int::from (low, sgn); + s = wi::to_widest (step); + delta = delta + s - 1; + niter->max = wi::udiv_floor (delta, s); + + niter->may_be_zero = may_be_zero; + + if (!integer_nonzerop (assumptions)) + niter->assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, + niter->assumptions, assumptions); + + niter->control.no_overflow = false; + + return true; +} + /* Determines number of iterations of loop whose ending condition is IV0 < IV1. TYPE is the type of the iv. The number of iterations is stored to NITER. BNDS bounds the difference @@ -1501,6 +1588,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0, niter->bound = iv0->base; } + /* {base, -C} < n, or n < {base, C} */ + if (tree_int_cst_sign_bit (iv0->step) + || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))) + return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter); + delta = fold_build2 (MINUS_EXPR, niter_type, fold_convert (niter_type, iv1->base), fold_convert (niter_type, iv0->base)); @@ -1665,62 +1757,6 @@ dump_affine_iv (FILE *file, affine_iv *iv) } } -/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts - the condition for loop-until-wrap cases. For example: - (unsigned){8, -1}_loop < 10 => {0, 1} != 9 - 10 < (unsigned){0, max - 7}_loop => {0, 1} != 8 - Return true if condition is successfully adjusted. */ - -static bool -adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code, - affine_iv *iv1) -{ - /* Only support simple cases for the moment. */ - if (TREE_CODE (iv0->base) != INTEGER_CST - || TREE_CODE (iv1->base) != INTEGER_CST) - return false; - - tree niter_type = unsigned_type_for (type), high, low; - /* Case: i-- < 10. */ - if (integer_zerop (iv1->step)) - { - /* TODO: Should handle case in which abs(step) != 1. */ - if (!integer_minus_onep (iv0->step)) - return false; - /* Give up on infinite loop. */ - if (*code == LE_EXPR - && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type))) - return false; - high = fold_build2 (PLUS_EXPR, niter_type, - fold_convert (niter_type, iv0->base), - build_int_cst (niter_type, 1)); - low = fold_convert (niter_type, TYPE_MIN_VALUE (type)); - } - else if (integer_zerop (iv0->step)) - { - /* TODO: Should handle case in which abs(step) != 1. */ - if (!integer_onep (iv1->step)) - return false; - /* Give up on infinite loop. */ - if (*code == LE_EXPR - && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type))) - return false; - high = fold_convert (niter_type, TYPE_MAX_VALUE (type)); - low = fold_build2 (MINUS_EXPR, niter_type, - fold_convert (niter_type, iv1->base), - build_int_cst (niter_type, 1)); - } - else - gcc_unreachable (); - - iv0->base = low; - iv0->step = fold_convert (niter_type, integer_one_node); - iv1->base = high; - iv1->step = build_int_cst (niter_type, 0); - *code = NE_EXPR; - return true; -} - /* Determine the number of iterations according to condition (for staying inside loop) which compares two induction variables using comparison operator CODE. The induction variable on left side of the comparison @@ -1855,15 +1891,6 @@ number_of_iterations_cond (class loop *loop, return true; } - /* Handle special case loops: while (i-- < 10) and while (10 < i++) by - adjusting iv0, iv1 and code. */ - if (code != NE_EXPR - && (tree_int_cst_sign_bit (iv0->step) - || (!integer_zerop (iv1->step) - && !tree_int_cst_sign_bit (iv1->step))) - && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1)) - return false; - /* OK, now we know we have a senseful loop. Handle several cases, depending on what comparison operator is used. */ bound_difference (loop, iv1->base, iv0->base, &bnds);