Message ID | 20101220222822.GB16156@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Mon, Dec 20, 2010 at 16:28, Jakub Jelinek <jakub@redhat.com> wrote: > On Sun, Dec 19, 2010 at 11:05:07PM -0600, Sebastian Pop wrote: >> The patch looks good to me, but I cannot approve it. >> >> > /* We might have some leftover. */ >> > - if (gimple_cond_code (exit_cond) == LT_EXPR) >> > - extra = -1 * stepint; >> > - else if (gimple_cond_code (exit_cond) == NE_EXPR) >> > - extra = -1 * stepint; >> > - else if (gimple_cond_code (exit_cond) == GT_EXPR) >> > - extra = -1 * stepint; >> > - else if (gimple_cond_code (exit_cond) == EQ_EXPR) >> > - extra = 1 * stepint; >> > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) >> > + { >> > + if (gimple_cond_code (exit_cond) == LT_EXPR) >> > + extra = -1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == NE_EXPR) >> > + extra = -1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == GT_EXPR) >> > + extra = -1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == EQ_EXPR) >> > + extra = 1 * stepint; >> > + } >> > + else >> > + { >> > + if (gimple_cond_code (exit_cond) == LE_EXPR) >> > + extra = 1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == GE_EXPR) >> > + extra = 1 * stepint; >> > + } >> >> It would be easier to read the following equivalent code: > > Or a switch, as done in this patch, bootstrapped/regtested on x86_64-linux > and i686-linux: I like the switch, thanks. Richi, could you have a look at this patch as well. Thanks, Sebastian > > 2010-12-20 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/46970 > * lambda-code.c (gcc_loop_to_lambda_loop): Give up if > step doesn't fit into host int or if def stmt of inductionvar > is neither PHI nor increment by step. If exit condition > compares induction variable before increment, adjust ubound > differently. > > * gcc.dg/pr46970-1.c: New test. > * gcc.dg/pr46970-2.c: New test. > > --- gcc/lambda-code.c.jj 2010-08-20 16:05:41.000000000 +0200 > +++ gcc/lambda-code.c 2010-12-20 21:19:12.000000000 +0100 > @@ -1289,7 +1289,7 @@ gcc_loop_to_lambda_loop (struct loop *lo > if (gimple_code (phi) != GIMPLE_PHI) > { > tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE); > - if (!op) > + if (!op || !is_gimple_assign (phi)) > { > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo > > return NULL; > } > - if (TREE_CODE (step) != INTEGER_CST) > + if (!host_integerp (step, 0) > + || (HOST_WIDE_INT) TREE_INT_CST_LOW (step) > + != (int) TREE_INT_CST_LOW (step)) > { > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo > return NULL; > } > > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) > + { > + gimple stmt = SSA_NAME_DEF_STMT (inductionvar); > + tree rhs2; > + int stepintval = stepint; > + switch (gimple_assign_rhs_code (stmt)) > + { > + case MINUS_EXPR: > + stepintval = -stepint; > + /* FALLTHRU */ > + case PLUS_EXPR: > + case POINTER_PLUS_EXPR: > + rhs2 = gimple_assign_rhs2 (stmt); > + if (TREE_CODE (rhs2) == INTEGER_CST > + && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval > + && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1)) > + break; > + /* FALLTHRU */ > + default: > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + "Unable to convert loop: Cannot find PHI node for induction variable\n"); > + return NULL; > + } > + } > + > if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)) > { > lboundvar = PHI_ARG_DEF (phi, 1); > @@ -1417,14 +1445,30 @@ gcc_loop_to_lambda_loop (struct loop *lo > > > /* We might have some leftover. */ > - if (gimple_cond_code (exit_cond) == LT_EXPR) > - extra = -1 * stepint; > - else if (gimple_cond_code (exit_cond) == NE_EXPR) > - extra = -1 * stepint; > - else if (gimple_cond_code (exit_cond) == GT_EXPR) > - extra = -1 * stepint; > - else if (gimple_cond_code (exit_cond) == EQ_EXPR) > - extra = 1 * stepint; > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) > + switch (gimple_cond_code (exit_cond)) > + { > + case LT_EXPR: > + case NE_EXPR: > + case GT_EXPR: > + extra = -1 * stepint; > + break; > + case EQ_EXPR: > + extra = 1 * stepint; > + break; > + default: > + break; > + } > + else > + switch (gimple_cond_code (exit_cond)) > + { > + case LE_EXPR: > + case GE_EXPR: > + extra = 1 * stepint; > + break; > + default: > + break; > + } > > ubound = gcc_tree_to_linear_expression (depth, uboundvar, > outerinductionvars, > --- gcc/testsuite/gcc.dg/pr46970-1.c.jj 2010-12-17 16:08:01.000000000 +0100 > +++ gcc/testsuite/gcc.dg/pr46970-1.c 2010-12-17 16:07:46.000000000 +0100 > @@ -0,0 +1,27 @@ > +/* PR tree-optimization/46970 */ > +/* { dg-do run } */ > +/* { dg-options "-Os -ftree-loop-linear" } */ > + > +int > +foo (int n, int *a) > +{ > + int i, j; > + > + for (i = 0; i < n; i++) > + for (j = 0; j < n; j++) > + a[j] = i + n; > + > + for (j = 0; j < n; j++) > + if (a[j] != i + n - 1) > + __builtin_abort (); > + > + return 0; > +} > + > +int > +main () > +{ > + int a[16]; > + foo (16, a); > + return 0; > +} > --- gcc/testsuite/gcc.dg/pr46970-2.c.jj 2010-12-17 18:01:05.000000000 +0100 > +++ gcc/testsuite/gcc.dg/pr46970-2.c 2010-12-17 16:43:46.000000000 +0100 > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -ftree-loop-linear" } */ > +/* { dg-require-effective-target size32plus } */ > + > +double u[1782225]; > + > +__attribute__((noinline, noclone)) int > +foo (int N, int *res) > +{ > + unsigned int i, j; > + double sum = 0; > + for (i = 0; i < N; i++) > + for (j = 0; j < N; j++) > + sum = sum + u[i + 1335 * j]; > + *res = sum + N; > +} > + > +int > +main (void) > +{ > + int i; > + for (i = 0; i < 1782225; i++) > + u[i] = 1.0; > + foo (64, &i); > + if (i != 4160) > + __builtin_abort (); > + return 0; > +} > > Jakub >
On Mon, Dec 20, 2010 at 11:28 PM, Jakub Jelinek <jakub@redhat.com> wrote: > On Sun, Dec 19, 2010 at 11:05:07PM -0600, Sebastian Pop wrote: >> The patch looks good to me, but I cannot approve it. >> >> > /* We might have some leftover. */ >> > - if (gimple_cond_code (exit_cond) == LT_EXPR) >> > - extra = -1 * stepint; >> > - else if (gimple_cond_code (exit_cond) == NE_EXPR) >> > - extra = -1 * stepint; >> > - else if (gimple_cond_code (exit_cond) == GT_EXPR) >> > - extra = -1 * stepint; >> > - else if (gimple_cond_code (exit_cond) == EQ_EXPR) >> > - extra = 1 * stepint; >> > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) >> > + { >> > + if (gimple_cond_code (exit_cond) == LT_EXPR) >> > + extra = -1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == NE_EXPR) >> > + extra = -1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == GT_EXPR) >> > + extra = -1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == EQ_EXPR) >> > + extra = 1 * stepint; >> > + } >> > + else >> > + { >> > + if (gimple_cond_code (exit_cond) == LE_EXPR) >> > + extra = 1 * stepint; >> > + else if (gimple_cond_code (exit_cond) == GE_EXPR) >> > + extra = 1 * stepint; >> > + } >> >> It would be easier to read the following equivalent code: > > Or a switch, as done in this patch, bootstrapped/regtested on x86_64-linux > and i686-linux: > > 2010-12-20 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/46970 > * lambda-code.c (gcc_loop_to_lambda_loop): Give up if > step doesn't fit into host int or if def stmt of inductionvar > is neither PHI nor increment by step. If exit condition > compares induction variable before increment, adjust ubound > differently. > > * gcc.dg/pr46970-1.c: New test. > * gcc.dg/pr46970-2.c: New test. > > --- gcc/lambda-code.c.jj 2010-08-20 16:05:41.000000000 +0200 > +++ gcc/lambda-code.c 2010-12-20 21:19:12.000000000 +0100 > @@ -1289,7 +1289,7 @@ gcc_loop_to_lambda_loop (struct loop *lo > if (gimple_code (phi) != GIMPLE_PHI) > { > tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE); > - if (!op) > + if (!op || !is_gimple_assign (phi)) > { > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo > > return NULL; > } > - if (TREE_CODE (step) != INTEGER_CST) > + if (!host_integerp (step, 0) > + || (HOST_WIDE_INT) TREE_INT_CST_LOW (step) > + != (int) TREE_INT_CST_LOW (step)) That casting looks odd. Why isn't the (supposed) use of step as 'int' not changed to a HOST_WIDE_INT? > { > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo > return NULL; > } > > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) > + { > + gimple stmt = SSA_NAME_DEF_STMT (inductionvar); > + tree rhs2; > + int stepintval = stepint; > + switch (gimple_assign_rhs_code (stmt)) > + { > + case MINUS_EXPR: > + stepintval = -stepint; > + /* FALLTHRU */ > + case PLUS_EXPR: > + case POINTER_PLUS_EXPR: > + rhs2 = gimple_assign_rhs2 (stmt); > + if (TREE_CODE (rhs2) == INTEGER_CST > + && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval > + && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1)) That would also simplify this code - why not tree_int_cst_equal (step, rhs2) btw? The rest of the patch looks sensible, though I know nothing about tree-loop-linear. Richard. > + break; > + /* FALLTHRU */ > + default: > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, > + "Unable to convert loop: Cannot find PHI node for induction variable\n"); > + return NULL; > + } > + } > + > if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)) > { > lboundvar = PHI_ARG_DEF (phi, 1); > @@ -1417,14 +1445,30 @@ gcc_loop_to_lambda_loop (struct loop *lo > > > /* We might have some leftover. */ > - if (gimple_cond_code (exit_cond) == LT_EXPR) > - extra = -1 * stepint; > - else if (gimple_cond_code (exit_cond) == NE_EXPR) > - extra = -1 * stepint; > - else if (gimple_cond_code (exit_cond) == GT_EXPR) > - extra = -1 * stepint; > - else if (gimple_cond_code (exit_cond) == EQ_EXPR) > - extra = 1 * stepint; > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) > + switch (gimple_cond_code (exit_cond)) > + { > + case LT_EXPR: > + case NE_EXPR: > + case GT_EXPR: > + extra = -1 * stepint; > + break; > + case EQ_EXPR: > + extra = 1 * stepint; > + break; > + default: > + break; > + } > + else > + switch (gimple_cond_code (exit_cond)) > + { > + case LE_EXPR: > + case GE_EXPR: > + extra = 1 * stepint; > + break; > + default: > + break; > + } > > ubound = gcc_tree_to_linear_expression (depth, uboundvar, > outerinductionvars, > --- gcc/testsuite/gcc.dg/pr46970-1.c.jj 2010-12-17 16:08:01.000000000 +0100 > +++ gcc/testsuite/gcc.dg/pr46970-1.c 2010-12-17 16:07:46.000000000 +0100 > @@ -0,0 +1,27 @@ > +/* PR tree-optimization/46970 */ > +/* { dg-do run } */ > +/* { dg-options "-Os -ftree-loop-linear" } */ > + > +int > +foo (int n, int *a) > +{ > + int i, j; > + > + for (i = 0; i < n; i++) > + for (j = 0; j < n; j++) > + a[j] = i + n; > + > + for (j = 0; j < n; j++) > + if (a[j] != i + n - 1) > + __builtin_abort (); > + > + return 0; > +} > + > +int > +main () > +{ > + int a[16]; > + foo (16, a); > + return 0; > +} > --- gcc/testsuite/gcc.dg/pr46970-2.c.jj 2010-12-17 18:01:05.000000000 +0100 > +++ gcc/testsuite/gcc.dg/pr46970-2.c 2010-12-17 16:43:46.000000000 +0100 > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -ftree-loop-linear" } */ > +/* { dg-require-effective-target size32plus } */ > + > +double u[1782225]; > + > +__attribute__((noinline, noclone)) int > +foo (int N, int *res) > +{ > + unsigned int i, j; > + double sum = 0; > + for (i = 0; i < N; i++) > + for (j = 0; j < N; j++) > + sum = sum + u[i + 1335 * j]; > + *res = sum + N; > +} > + > +int > +main (void) > +{ > + int i; > + for (i = 0; i < 1782225; i++) > + u[i] = 1.0; > + foo (64, &i); > + if (i != 4160) > + __builtin_abort (); > + return 0; > +} > > Jakub >
On Wed, Dec 22, 2010 at 12:45:31PM +0100, Richard Guenther wrote: > > @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo > > > > return NULL; > > } > > - if (TREE_CODE (step) != INTEGER_CST) > > + if (!host_integerp (step, 0) > > + || (HOST_WIDE_INT) TREE_INT_CST_LOW (step) > > + != (int) TREE_INT_CST_LOW (step)) > > That casting looks odd. Why isn't the (supposed) use of step as 'int' not > changed to a HOST_WIDE_INT? I've briefly looked at it, but changing int to HOST_WIDE_INT would be a huge change, int is used basically everywhere in lambda-code, starting from *lambda_vector and lot of other places. Changing just LL_STEP to HOST_WIDE_INT wouldn't help. lambda-code looks very suspicious in many places, e.g. I don't see anywhere where it would bother with checking for overflows etc., wonder how it ever could get through patch review. Niceties like: switch (TREE_CODE (expr)) { case INTEGER_CST: { lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack); LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr); if (extra != 0) LLE_CONSTANT (lle) += extra; LLE_DENOMINATOR (lle) = 1; } break; where it both doesn't bother to check that expr is host_integerp and doesn't care if expr + extra doesn't overflow. > > @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo > > return NULL; > > } > > > > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) > > + { > > + gimple stmt = SSA_NAME_DEF_STMT (inductionvar); > > + tree rhs2; > > + int stepintval = stepint; > > + switch (gimple_assign_rhs_code (stmt)) > > + { > > + case MINUS_EXPR: > > + stepintval = -stepint; > > + /* FALLTHRU */ > > + case PLUS_EXPR: > > + case POINTER_PLUS_EXPR: > > + rhs2 = gimple_assign_rhs2 (stmt); > > + if (TREE_CODE (rhs2) == INTEGER_CST > > + && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval > > + && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1)) > > That would also simplify this code - why not tree_int_cst_equal (step, > rhs2) btw? Because for MINUS_EXPR we don't want to check step, but - step ? And compare_tree_int is an UHWI comparison instead of HWI. Jakub
On Tue, Jan 4, 2011 at 1:45 PM, Jakub Jelinek <jakub@redhat.com> wrote: > On Wed, Dec 22, 2010 at 12:45:31PM +0100, Richard Guenther wrote: >> > @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo >> > >> > return NULL; >> > } >> > - if (TREE_CODE (step) != INTEGER_CST) >> > + if (!host_integerp (step, 0) >> > + || (HOST_WIDE_INT) TREE_INT_CST_LOW (step) >> > + != (int) TREE_INT_CST_LOW (step)) >> >> That casting looks odd. Why isn't the (supposed) use of step as 'int' not >> changed to a HOST_WIDE_INT? > > I've briefly looked at it, but changing int to HOST_WIDE_INT would be a huge > change, int is used basically everywhere in lambda-code, starting from > *lambda_vector and lot of other places. Changing just LL_STEP to > HOST_WIDE_INT wouldn't help. lambda-code looks very suspicious in many > places, e.g. I don't see anywhere where it would bother with checking for > overflows etc., wonder how it ever could get through patch review. > Niceties like: > switch (TREE_CODE (expr)) > { > case INTEGER_CST: > { > lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack); > LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr); > if (extra != 0) > LLE_CONSTANT (lle) += extra; > > LLE_DENOMINATOR (lle) = 1; > } > break; > where it both doesn't bother to check that expr is host_integerp and doesn't > care if expr + extra doesn't overflow. > >> > @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo >> > return NULL; >> > } >> > >> > + if (SSA_NAME_DEF_STMT (inductionvar) != phi) >> > + { >> > + gimple stmt = SSA_NAME_DEF_STMT (inductionvar); >> > + tree rhs2; >> > + int stepintval = stepint; >> > + switch (gimple_assign_rhs_code (stmt)) >> > + { >> > + case MINUS_EXPR: >> > + stepintval = -stepint; >> > + /* FALLTHRU */ >> > + case PLUS_EXPR: >> > + case POINTER_PLUS_EXPR: >> > + rhs2 = gimple_assign_rhs2 (stmt); >> > + if (TREE_CODE (rhs2) == INTEGER_CST >> > + && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval >> > + && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1)) >> >> That would also simplify this code - why not tree_int_cst_equal (step, >> rhs2) btw? > > Because for MINUS_EXPR we don't want to check step, but - step ? > And compare_tree_int is an UHWI comparison instead of HWI. Ugh. Sebastian - can we nuke tree-loop-linear compeltely and make -ftree-loop-linear an alias for -floop-interchange without regressions? I'd like to reduce the number of broken passes from 2 to 1 this way ... Richard. > Jakub >
On Tue, Jan 4, 2011 at 10:22, Richard Guenther <richard.guenther@gmail.com> wrote: > Ugh. Sebastian - can we nuke tree-loop-linear compeltely and > make -ftree-loop-linear an alias for -floop-interchange without > regressions? I'd like to reduce the number of broken passes from > 2 to 1 this way ... I wouldn't mind removing tree-loop-linear, although other people should also give their opinion on this matter: tree-loop-linear has no external dependences whereas -floop-interchange depends on cloog and ppl. Also we should get all the testsuite/gcc.dg/tree-ssa/ltrans-*.c passing with -floop-interchange. I will add all these testcases to the graphite testsuite and see where we stand. Sebastian
On Tue, Jan 4, 2011 at 10:54, Sebastian Pop <sebpop@gmail.com> wrote: > On Tue, Jan 4, 2011 at 10:22, Richard Guenther > <richard.guenther@gmail.com> wrote: >> Ugh. Sebastian - can we nuke tree-loop-linear compeltely and >> make -ftree-loop-linear an alias for -floop-interchange without >> regressions? I'd like to reduce the number of broken passes from >> 2 to 1 this way ... > > I wouldn't mind removing tree-loop-linear, although other people > should also give their opinion on this matter: tree-loop-linear has no > external dependences whereas -floop-interchange depends on cloog and ppl. > > Also we should get all the testsuite/gcc.dg/tree-ssa/ltrans-*.c > passing with -floop-interchange. I will add all these testcases to > the graphite testsuite and see where we stand. I already included the ltrans-{1,2,3,4,5,6,8}.c testcases in the graphite testsuite as interchange-{1,2,3,4,5,6,7}.c respectively, out of which all except interchange-{1,2}.c are interchanged. interchange-1.c is the equivalent in C of the critical loop of CFP2000/171.swim: with N = 1335 for (i = 0; i < N; i++) { for (j = 0; j < N; j++) sum = sum + u[i + 1335 * j]; u[1336 * i] *= 2; } it looks like the data dependence analysis of graphite does not validate the loop distribution before interchange: for (i = 0; i < N; i++) for (j = 0; j < N; j++) sum = sum + u[i + 1335 * j]; for (i = 0; i < N; i++) u[1336 * i] *= 2; Sebastian
--- gcc/lambda-code.c.jj 2010-08-20 16:05:41.000000000 +0200 +++ gcc/lambda-code.c 2010-12-20 21:19:12.000000000 +0100 @@ -1289,7 +1289,7 @@ gcc_loop_to_lambda_loop (struct loop *lo if (gimple_code (phi) != GIMPLE_PHI) { tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE); - if (!op) + if (!op || !is_gimple_assign (phi)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -1332,7 +1332,9 @@ gcc_loop_to_lambda_loop (struct loop *lo return NULL; } - if (TREE_CODE (step) != INTEGER_CST) + if (!host_integerp (step, 0) + || (HOST_WIDE_INT) TREE_INT_CST_LOW (step) + != (int) TREE_INT_CST_LOW (step)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -1366,6 +1368,32 @@ gcc_loop_to_lambda_loop (struct loop *lo return NULL; } + if (SSA_NAME_DEF_STMT (inductionvar) != phi) + { + gimple stmt = SSA_NAME_DEF_STMT (inductionvar); + tree rhs2; + int stepintval = stepint; + switch (gimple_assign_rhs_code (stmt)) + { + case MINUS_EXPR: + stepintval = -stepint; + /* FALLTHRU */ + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + rhs2 = gimple_assign_rhs2 (stmt); + if (TREE_CODE (rhs2) == INTEGER_CST + && (HOST_WIDE_INT) TREE_INT_CST_LOW (rhs2) == stepintval + && TREE_INT_CST_HIGH (rhs2) == (stepintval >= 0 ? 0 : -1)) + break; + /* FALLTHRU */ + default: + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Unable to convert loop: Cannot find PHI node for induction variable\n"); + return NULL; + } + } + if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)) { lboundvar = PHI_ARG_DEF (phi, 1); @@ -1417,14 +1445,30 @@ gcc_loop_to_lambda_loop (struct loop *lo /* We might have some leftover. */ - if (gimple_cond_code (exit_cond) == LT_EXPR) - extra = -1 * stepint; - else if (gimple_cond_code (exit_cond) == NE_EXPR) - extra = -1 * stepint; - else if (gimple_cond_code (exit_cond) == GT_EXPR) - extra = -1 * stepint; - else if (gimple_cond_code (exit_cond) == EQ_EXPR) - extra = 1 * stepint; + if (SSA_NAME_DEF_STMT (inductionvar) != phi) + switch (gimple_cond_code (exit_cond)) + { + case LT_EXPR: + case NE_EXPR: + case GT_EXPR: + extra = -1 * stepint; + break; + case EQ_EXPR: + extra = 1 * stepint; + break; + default: + break; + } + else + switch (gimple_cond_code (exit_cond)) + { + case LE_EXPR: + case GE_EXPR: + extra = 1 * stepint; + break; + default: + break; + } ubound = gcc_tree_to_linear_expression (depth, uboundvar, outerinductionvars, --- gcc/testsuite/gcc.dg/pr46970-1.c.jj 2010-12-17 16:08:01.000000000 +0100 +++ gcc/testsuite/gcc.dg/pr46970-1.c 2010-12-17 16:07:46.000000000 +0100 @@ -0,0 +1,27 @@ +/* PR tree-optimization/46970 */ +/* { dg-do run } */ +/* { dg-options "-Os -ftree-loop-linear" } */ + +int +foo (int n, int *a) +{ + int i, j; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + a[j] = i + n; + + for (j = 0; j < n; j++) + if (a[j] != i + n - 1) + __builtin_abort (); + + return 0; +} + +int +main () +{ + int a[16]; + foo (16, a); + return 0; +} --- gcc/testsuite/gcc.dg/pr46970-2.c.jj 2010-12-17 18:01:05.000000000 +0100 +++ gcc/testsuite/gcc.dg/pr46970-2.c 2010-12-17 16:43:46.000000000 +0100 @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-loop-linear" } */ +/* { dg-require-effective-target size32plus } */ + +double u[1782225]; + +__attribute__((noinline, noclone)) int +foo (int N, int *res) +{ + unsigned int i, j; + double sum = 0; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + sum = sum + u[i + 1335 * j]; + *res = sum + N; +} + +int +main (void) +{ + int i; + for (i = 0; i < 1782225; i++) + u[i] = 1.0; + foo (64, &i); + if (i != 4160) + __builtin_abort (); + return 0; +}