Message ID | 20210604080419.119328-1-guojiufu@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V3] Split loop for NE condition. | expand |
On Fri, 4 Jun 2021, Jiufu Guo wrote: > Update the patch since v2: > . Check index and bound from gcond before checking if wrap. > . Update test case, and add an executable case. > . Refine code comments. > . Enhance the checking for i++/++i in the loop header. > . Enhance code to handle equal condition on exit > > Bootstrap and regtest pass on powerpc64le, and also pass regtest > on bootstrap-O3. Is this ok for trunk? > > BR. > Jiufu Guo. > > > When there is the possibility that wrap may happen on the loop index, > a few optimizations would not happen. For example code: > > foo (int *a, int *b, unsigned k, unsigned n) > { > while (++k != n) > a[k] = b[k] + 1; > } > > For this code, if "k > n", k would wrap. if "k < n" at begining, > it could be optimized (e.g. vectorization). > > We can split the loop into two loops: > > while (++k > n) > a[k] = b[k] + 1; > while (k++ < n) > a[k] = b[k] + 1; > > This patch splits this kind of loop to achieve better performance. > > gcc/ChangeLog: > > 2021-06-04 Jiufu Guo <guojiufu@linux.ibm.com> > > * tree-ssa-loop-split.c (connect_loop_phis): Add new param. > (get_ne_cond_branch): New function. > (split_ne_loop): New function. > (split_loop_on_ne_cond): New function. > (tree_ssa_split_loops): Use split_loop_on_ne_cond. > > gcc/testsuite/ChangeLog: > > 2021-06-04 Jiufu Guo <guojiufu@linux.ibm.com> > > * gcc.dg/loop-split1.c: New test. > * gcc.dg/loop-split2.c: New test. > * g++.dg/vect/pr98064.cc: Suppress warning. > > --- > gcc/testsuite/g++.dg/vect/pr98064.cc | 4 +- > gcc/testsuite/gcc.dg/loop-split1.c | 101 +++++++++++ > gcc/testsuite/gcc.dg/loop-split2.c | 54 ++++++ > gcc/tree-ssa-loop-split.c | 251 ++++++++++++++++++++++++++- > 4 files changed, 404 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/loop-split1.c > create mode 100644 gcc/testsuite/gcc.dg/loop-split2.c > > diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc b/gcc/testsuite/g++.dg/vect/pr98064.cc > index 74043ce7725..dcb2985d05a 100644 > --- a/gcc/testsuite/g++.dg/vect/pr98064.cc > +++ b/gcc/testsuite/g++.dg/vect/pr98064.cc > @@ -1,5 +1,7 @@ > // { dg-do compile } > -// { dg-additional-options "-O3" } > +// { dg-additional-options "-O3 -Wno-stringop-overflow" } > +/* There is warning message when "short g = var_8; g; g++" > + is optimized/analyzed as string operation,e.g. memset. */ > > const long long &min(const long long &__a, long long &__b) { > if (__b < __a) > diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c > new file mode 100644 > index 00000000000..dd2d03a7b96 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split1.c > @@ -0,0 +1,101 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > + > +void > +foo (int *a, int *b, unsigned l, unsigned n) > +{ > + while (++l != n) > + a[l] = b[l] + 1; > +} > +void > +foo_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (++l != n) > + a[l] = b[l] + 1; > +} > + > +void > +foo1 (int *a, int *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > + a[l] = b[l] + 1; > +} > + > +/* No wrap. */ > +void > +foo1_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (l++ != n) > + a[l] = b[l] + 1; > +} > + > +unsigned > +foo2 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (++l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo2_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (++l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo3 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +/* No wrap. */ > +unsigned > +foo3_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (l++ != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +void > +bar (); > +void > +foo4 (unsigned n, unsigned i) > +{ > + do > + { > + if (i == n) > + return; > + bar (); > + ++i; > + } > + while (1); > +} > + > +unsigned > +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > +{ > + while (p[i] == q[i] && ++i != n) > + p++, q++; > + > + return i; > +} > + > +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ > diff --git a/gcc/testsuite/gcc.dg/loop-split2.c b/gcc/testsuite/gcc.dg/loop-split2.c > new file mode 100644 > index 00000000000..0d3fded3f61 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split2.c > @@ -0,0 +1,54 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3" } */ > + > +extern void abort (void); > +extern void exit (int); > + > +#define NI __attribute__ ((noinline)) > + > +void NI > +foo (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (++l != n) > + a[l] = b[l] + 1; > +} > + > +unsigned NI > +bar (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (l++ != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +int a[258]; > +int b[258]; > + > +int main() > +{ > + __builtin_memcpy (b, a, sizeof (a)); > + > + if (bar (a, b, 3, 8) != 9) > + abort (); > + > + if (bar (a, b, 8, 3) != 4) > + abort (); > + > + b[100] += 1; > + if (bar (a, b, 90, 110) != 100) > + abort (); > + > + if (bar (a, b, 110, 105) != 100) > + abort (); > + > + foo (a, b, 99, 99); > + a[99] = b[99] + 1; > + for (int i = 0; i < 256; i++) > + if (a[i] != b[i] + 1) > + abort(); > + > + exit (0); > +} > + > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > index 3a09bbc39e5..9c0de66e288 100644 > --- a/gcc/tree-ssa-loop-split.c > +++ b/gcc/tree-ssa-loop-split.c > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see > #include "cfghooks.h" > #include "gimple-fold.h" > #include "gimplify-me.h" > +#include "tree-ssa-loop-ivopts.h" > > /* This file implements two kinds of loop splitting. > > @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop) > conditional). I.e. the second loop can now be entered either > via the original entry or via NEW_E, so the entry values of LOOP2 > phi nodes are either the original ones or those at the exit > - of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting > - this. The loops need to fulfill easy_exit_values(). */ > + of LOOP1. Selecting the previous value instead next value as the > + exit value of LOOP1 if USE_PREV is true. Insert new phi nodes in > + LOOP2 pre-header reflecting this. The loops need to fulfill > + easy_exit_values(). */ > > static void > -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) > +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, > + bool use_prev = false) > { > basic_block rest = loop_preheader_edge (loop2)->src; > gcc_assert (new_e->dest == rest); > @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) > > gphi * newphi = create_phi_node (new_init, rest); > add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); > - add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); > + add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e, > + UNKNOWN_LOCATION); > SET_USE (op, new_init); > } > } > @@ -1593,6 +1598,241 @@ split_loop_on_cond (struct loop *loop) > return do_split; > } > > +/* Check if the LOOP exit branch is like "if (idx != bound)", > + Return the branch edge which exit loop, if wrap may happen on "idx". */ > + > +static edge > +get_ne_cond_branch (struct loop *loop) > +{ > + int i; > + edge e; > + > + auto_vec<edge> edges = get_loop_exit_edges (loop); > + FOR_EACH_VEC_ELT (edges, i, e) > + { > + basic_block bb = e->src; > + > + /* Check if exit at gcond. */ > + gimple *last = last_stmt (bb); > + if (!last || gimple_code (last) != GIMPLE_COND) > + continue; > + gcond *cond = as_a<gcond *> (last); > + enum tree_code code = gimple_cond_code (cond); > + if (!(code == NE_EXPR > + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) The NE_EXPR check misses a corresponding && (e->flags & EDGE_FALSE_VALUE) check. > + continue; > + > + /* Check if bound is invarant. */ > + tree idx = gimple_cond_lhs (cond); > + tree bnd = gimple_cond_rhs (cond); > + if (expr_invariant_in_loop_p (loop, idx)) > + std::swap (idx, bnd); > + else if (!expr_invariant_in_loop_p (loop, bnd)) > + continue; > + > + /* Only unsigned type conversion could cause wrap. */ > + tree type = TREE_TYPE (idx); > + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME > + || !TYPE_UNSIGNED (type)) > + continue; > + > + /* Avoid to split if bound is MAX/MIN val. */ > + tree bound_type = TREE_TYPE (bnd); > + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) > + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))) > + continue; Note you do not require 'bnd' to be constant and thus at runtime those cases still need to be handled correctly. > + /* Check if there is possible wrap. */ > + class tree_niter_desc niter; > + if (!number_of_iterations_exit (loop, e, &niter, false, false)) > + continue; > + if (niter.control.no_overflow) > + return NULL; > + if (niter.cmp != NE_EXPR) > + continue; > + > + /* If exit edge is just before the empty latch, it is easy to link > + the split loops: just jump from the exit edge of one loop to the > + header of new loop. */ > + if (single_pred_p (loop->latch) > + && single_pred_edge (loop->latch)->src == bb > + && empty_block_p (loop->latch)) > + return e; > + > + /* If exit edge is at end of header, and header contains i++ or ++i, > + only, it is simple to link the split loops: jump from the end of > + one loop header to the new loop header, and use unchanged PHI > + result of first loop as the entry PHI value of the second loop. */ > + if (bb == loop->header) > + { > + /* Only one phi. */ > + gphi_iterator psi = gsi_start_phis (bb); > + if (gsi_end_p (psi)) > + continue; > + gphi *phi = psi.phi (); > + gsi_next (&psi); > + if (!gsi_end_p (psi)) > + continue; > + > + /* Check it is ++i or ++i */ > + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); > + tree prev = PHI_RESULT (phi); > + if (idx != prev && idx != next) > + continue; > + > + gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb); > + if (gsi_end_p (gsi)) > + continue; > + gimple *s1 = gsi_stmt (gsi); > + if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next > + || gimple_assign_rhs1 (s1) != prev) > + continue; > + > + gsi_next_nondebug (&gsi); > + if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond) > + return e; > + } > + } > + > + return NULL; > +} > + > +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR. */ > + > +static bool > +split_ne_loop (struct loop *loop, edge cond_e) > +{ > + initialize_original_copy_tables (); > + > + struct loop *loop2 = loop_version (loop, boolean_true_node, NULL, > + profile_probability::always (), > + profile_probability::never (), > + profile_probability::always (), > + profile_probability::always (), true); > + > + gcc_assert (loop2); > + update_ssa (TODO_update_ssa); > + > + basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src); > + free_original_copy_tables (); > + > + gcond *gc = as_a<gcond *> (last_stmt (cond_e->src)); > + gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb)); > + > + /* Invert edges for gcond. */ > + if (gimple_cond_code (gc) == EQ_EXPR) > + { > + auto invert_edge = [](basic_block bb) { > + edge out = EDGE_SUCC (bb, 0); > + edge in = EDGE_SUCC (bb, 1); > + if (in->flags & EDGE_TRUE_VALUE) > + std::swap (in, out); > + in->flags |= EDGE_TRUE_VALUE; > + in->flags &= ~EDGE_FALSE_VALUE; > + out->flags |= EDGE_FALSE_VALUE; > + out->flags &= ~EDGE_TRUE_VALUE; > + }; > + > + invert_edge (gimple_bb (gc)); > + invert_edge (gimple_bb (dup_gc)); > + } > + > + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ It now occurs to me that we nowhere check the evolution of IDX (split_at_bb_p uses simple_iv for this for example). The transform assumes that we will actually hit i == n and that i increments, but while you check the control IV from number_of_iterations_exit for NE_EXPR that does not guarantee a positive evolution. Your testcases do not include any negative step examples, but I guess the conditions need to be swapped in this case? I think you also have to consider the order we split, say with for (i = start; i != end; ++i) { push (i); if (a[i] != b[i]) break; } push (i) calls need to be in the same order for all cases of start < end, start == end and start > end (and also cover runtime testcases with end == 0 or end == UINT_MAX, likewise for start). > + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); > + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; > + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; > + > + gimple_cond_set_code (gc, up_code); > + gimple_cond_set_code (dup_gc, down_code); > + > + /* Link the exit cond edge to new loop. */ > + gcond *break_cond = as_a<gcond *> (gimple_copy (gc)); > + edge pred_e = single_pred_edge (loop->latch); > + bool simple_loop > + = pred_e && pred_e->src == cond_e->src && empty_block_p (loop->latch); > + if (simple_loop) > + gimple_cond_set_code (break_cond, down_code); > + else > + gimple_cond_make_true (break_cond); > + > + basic_block break_bb = split_edge (cond_e); > + gimple_stmt_iterator gsi = gsi_last_bb (break_bb); > + gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); > + > + edge to_exit = single_succ_edge (break_bb); > + edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0); > + to_new_loop->flags |= EDGE_TRUE_VALUE; > + to_exit->flags |= EDGE_FALSE_VALUE; > + to_exit->flags &= ~EDGE_FALLTHRU; > + to_exit->probability = cond_e->probability; > + to_new_loop->probability = to_exit->probability.invert (); > + > + update_ssa (TODO_update_ssa); > + > + connect_loop_phis (loop, loop2, to_new_loop, !simple_loop); > + > + rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, ";; Loop split on wrap index.\n"); > + > + return true; > +} > + > +/* Checks if LOOP contains a suitable NE_EXPR conditional block to split. > +L_H: > + if (i!=N) > + S; > + else > + goto EXIT; > + i++; > + goto L_H; > + > +The "i!=N" is like "i>N || i<N", then it could be transform to: > + > +L_H: > + if (i>N) > + S; > + else > + goto EXIT; > + i++; > + goto L_H; > +L1_H: > + if (i<N) > + S; > + else > + goto EXIT; > + i++; > + goto L1_H; > + > +The loop with "i<N" is in favor both GIMPLE and RTL passes. */ > + > +static bool > +split_loop_on_ne_cond (class loop *loop) > +{ > + int num = 0; > + basic_block *bbs = get_loop_body (loop); > + > + if (!can_copy_bbs_p (bbs, loop->num_nodes)) > + { > + free (bbs); > + return false; > + } > + > + for (unsigned i = 0; i < loop->num_nodes; i++) > + num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); > + free (bbs); > + > + if (num > param_max_peeled_insns) > + return false; > + > + edge branch_edge = get_ne_cond_branch (loop); > + if (branch_edge && split_ne_loop (loop, branch_edge)) please check get_ne_cond_branch first, the can_copy_bbs_p and size estimates are of linear complexity in the loop size. Please also test the size estimate before can_copy_bbs and terminate the loop when you reach param_max_peeled_insns. > + return true; > + > + return false; > +} > + > /* Main entry point. Perform loop splitting on all suitable loops. */ > > static unsigned int > @@ -1622,7 +1862,8 @@ tree_ssa_split_loops (void) > if (optimize_loop_for_size_p (loop)) > continue; > > - if (split_loop (loop) || split_loop_on_cond (loop)) > + if (split_loop (loop) || split_loop_on_cond (loop) > + || split_loop_on_ne_cond (loop)) > { > /* Mark our containing loop as having had some split inner loops. */ > loop_outer (loop)->aux = loop; >
On 2021-06-08 18:13, Richard Biener wrote: > On Fri, 4 Jun 2021, Jiufu Guo wrote: > cut... >> + gcond *cond = as_a<gcond *> (last); >> + enum tree_code code = gimple_cond_code (cond); >> + if (!(code == NE_EXPR >> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) > > The NE_EXPR check misses a corresponding && (e->flags & > EDGE_FALSE_VALUE) > check. > Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. >> + continue; >> + >> + /* Check if bound is invarant. */ >> + tree idx = gimple_cond_lhs (cond); >> + tree bnd = gimple_cond_rhs (cond); >> + if (expr_invariant_in_loop_p (loop, idx)) >> + std::swap (idx, bnd); >> + else if (!expr_invariant_in_loop_p (loop, bnd)) >> + continue; >> + >> + /* Only unsigned type conversion could cause wrap. */ >> + tree type = TREE_TYPE (idx); >> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME >> + || !TYPE_UNSIGNED (type)) >> + continue; >> + >> + /* Avoid to split if bound is MAX/MIN val. */ >> + tree bound_type = TREE_TYPE (bnd); >> + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P >> (bound_type) >> + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) >> + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))) >> + continue; > > Note you do not require 'bnd' to be constant and thus at runtime those > cases still need to be handled correctly. Yes, bnd is not required to be constant. The above code is filtering the case where bnd is const max/min value of the type. So, the code could be updated as: if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) > >> + /* Check if there is possible wrap. */ >> + class tree_niter_desc niter; >> + if (!number_of_iterations_exit (loop, e, &niter, false, false)) cut... >> + >> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ > > It now occurs to me that we nowhere check the evolution of IDX > (split_at_bb_p uses simple_iv for this for example). The transform > assumes that we will actually hit i == n and that i increments, but > while you check the control IV from number_of_iterations_exit > for NE_EXPR that does not guarantee a positive evolution. > If I do not correctly reply your question, please point out: number_of_iterations_exit is similar with simple_iv to invoke simple_iv_with_niters which check the evolution, and number_of_iterations_exit check number_of_iterations_cond which check no_overflow more accurate, this is one reason I use this function. This transform assumes that the last run hits i==n. Otherwise, the loop may run infinitely wrap after wrap. For safe, if the step is 1 or -1, this assumption would be true. I would add this check. Thanks so much for pointing out I missed the negative step! > Your testcases do not include any negative step examples, but I guess > the conditions need to be swapped in this case? I would add cases and code to support step 1/-1. > > I think you also have to consider the order we split, say with > > for (i = start; i != end; ++i) > { > push (i); > if (a[i] != b[i]) > break; > } > > push (i) calls need to be in the same order for all cases of > start < end, start == end and start > end (and also cover > runtime testcases with end == 0 or end == UINT_MAX, likewise > for start). I add tests for the above cases. If missing sth, please point out, thanks! > >> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); >> + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; >> + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; cut.... Thanks again for the very helpful review! BR, Jiufu Guo.
On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: > On 2021-06-08 18:13, Richard Biener wrote: >> On Fri, 4 Jun 2021, Jiufu Guo wrote: >> > cut... >>> + gcond *cond = as_a<gcond *> (last); >>> + enum tree_code code = gimple_cond_code (cond); >>> + if (!(code == NE_EXPR >>> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) >> >> The NE_EXPR check misses a corresponding && (e->flags & >> EDGE_FALSE_VALUE) >> check. >> > Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. > >>> + continue; >>> + >>> + /* Check if bound is invarant. */ >>> + tree idx = gimple_cond_lhs (cond); >>> + tree bnd = gimple_cond_rhs (cond); >>> + if (expr_invariant_in_loop_p (loop, idx)) >>> + std::swap (idx, bnd); >>> + else if (!expr_invariant_in_loop_p (loop, bnd)) >>> + continue; >>> + >>> + /* Only unsigned type conversion could cause wrap. */ >>> + tree type = TREE_TYPE (idx); >>> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME >>> + || !TYPE_UNSIGNED (type)) >>> + continue; >>> + >>> + /* Avoid to split if bound is MAX/MIN val. */ >>> + tree bound_type = TREE_TYPE (bnd); >>> + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P >>> (bound_type) >>> + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) >>> + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))) >>> + continue; >> >> Note you do not require 'bnd' to be constant and thus at runtime those >> cases still need to be handled correctly. > Yes, bnd is not required to be constant. The above code is filtering > the case > where bnd is const max/min value of the type. So, the code could be > updated as: > if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) > >> >>> + /* Check if there is possible wrap. */ >>> + class tree_niter_desc niter; >>> + if (!number_of_iterations_exit (loop, e, &niter, false, >>> false)) > cut... >>> + >>> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ >> >> It now occurs to me that we nowhere check the evolution of IDX >> (split_at_bb_p uses simple_iv for this for example). The transform >> assumes that we will actually hit i == n and that i increments, but >> while you check the control IV from number_of_iterations_exit >> for NE_EXPR that does not guarantee a positive evolution. >> > If I do not correctly reply your question, please point out: > number_of_iterations_exit is similar with simple_iv to invoke > simple_iv_with_niters > which check the evolution, and number_of_iterations_exit check > number_of_iterations_cond > which check no_overflow more accurate, this is one reason I use this > function. > > This transform assumes that the last run hits i==n. > Otherwise, the loop may run infinitely wrap after wrap. > For safe, if the step is 1 or -1, this assumption would be true. I > would add this check. > > Thanks so much for pointing out I missed the negative step! > >> Your testcases do not include any negative step examples, but I guess >> the conditions need to be swapped in this case? > > I would add cases and code to support step 1/-1. > >> >> I think you also have to consider the order we split, say with >> >> for (i = start; i != end; ++i) >> { >> push (i); >> if (a[i] != b[i]) >> break; >> } >> >> push (i) calls need to be in the same order for all cases of >> start < end, start == end and start > end (and also cover >> runtime testcases with end == 0 or end == UINT_MAX, likewise >> for start). > I add tests for the above cases. If missing sth, please point out, > thanks! > >> >>> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); >>> + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; >>> + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; > cut.... > > Thanks again for the very helpful review! > > BR, > Jiufu Guo. Here is the updated patch, thanks for your time! diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c new file mode 100644 index 00000000000..dd2d03a7b96 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split1.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ + +void +foo (int *a, int *b, unsigned l, unsigned n) +{ + while (++l != n) + a[l] = b[l] + 1; +} +void +foo_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (++l != n) + a[l] = b[l] + 1; +} + +void +foo1 (int *a, int *b, unsigned l, unsigned n) +{ + while (l++ != n) + a[l] = b[l] + 1; +} + +/* No wrap. */ +void +foo1_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (l++ != n) + a[l] = b[l] + 1; +} + +unsigned +foo2 (char *a, char *b, unsigned l, unsigned n) +{ + while (++l != n) + if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo2_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (++l != n) + if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo3 (char *a, char *b, unsigned l, unsigned n) +{ + while (l++ != n) + if (a[l] != b[l]) + break; + + return l; +} + +/* No wrap. */ +unsigned +foo3_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (l++ != n) + if (a[l] != b[l]) + break; + + return l; +} + +void +bar (); +void +foo4 (unsigned n, unsigned i) +{ + do + { + if (i == n) + return; + bar (); + ++i; + } + while (1); +} + +unsigned +find_skip_diff (char *p, char *q, unsigned n, unsigned i) +{ + while (p[i] == q[i] && ++i != n) + p++, q++; + + return i; +} + +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-split2.c b/gcc/testsuite/gcc.dg/loop-split2.c new file mode 100644 index 00000000000..56377e2f2f5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split2.c @@ -0,0 +1,155 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +extern void +abort (void); +extern void +exit (int); +void +push (int); + +#define NI __attribute__ ((noinline)) + +void NI +foo (int *a, int *b, unsigned char l, unsigned char n) +{ + while (++l != n) + a[l] = b[l] + 1; +} + +unsigned NI +bar (int *a, int *b, unsigned char l, unsigned char n) +{ + while (l++ != n) + { + push (l); + if (a[l] != b[l]) + break; + push (l + 1); + } + return l; +} + +void NI +foo_1 (int *a, int *b, unsigned char l, unsigned char n) +{ + while (--l != n) + a[l] = b[l] + 1; +} + +unsigned NI +bar_1 (int *a, int *b, unsigned char l, unsigned char n) +{ + while (l-- != n) + { + push (l); + if (a[l] != b[l]) + break; + push (l + 1); + } + + return l; +} + +int a[258]; +int b[258]; +int c[1024]; +static int top = 0; +void +push (int e) +{ + c[top++] = e; +} + +void +reset () +{ + top = 0; + __builtin_memset (c, 0, sizeof (c)); +} + +#define check(a, b) (a == b) + +int +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5) +{ + return check (c[0], a0) && check (c[1], a1) && check (c[2], a2) + && check (c[3], a3) && check (c[4], a4) && check (c[5], a5); +} + +int +main () +{ + __builtin_memcpy (b, a, sizeof (a)); + reset (); + if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0)) + abort (); + + reset (); + if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9) + || !check_c (c + 496, 254, 255, 255, 256, 0, 1)) + abort (); + + reset (); + if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0)) + abort (); + + reset (); + if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 0, 0)) + abort (); + + reset (); + if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, 1)) + abort (); + + reset (); + if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4)) + abort (); + + reset (); + if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0)) + abort (); + + reset (); + if (bar_1 (a, b, 6, 6) != 5) + abort (); + + reset (); + if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, 256)) + abort (); + + reset (); + if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0)) + abort (); + + b[100] += 1; + reset (); + if (bar (a, b, 90, 110) != 100) + abort (); + + reset (); + if (bar (a, b, 110, 105) != 100) + abort (); + + reset (); + if (bar_1 (a, b, 90, 110) != 109) + abort (); + + reset (); + if (bar_1 (a, b, 2, 90) != 100) + abort (); + + foo (a, b, 99, 99); + a[99] = b[99] + 1; + for (int i = 0; i < 256; i++) + if (a[i] != b[i] + 1) + abort (); + + foo_1 (a, b, 99, 99); + a[99] = b[99] + 1; + for (int i = 0; i < 256; i++) + if (a[i] != b[i] + 1) + abort (); + + exit (0); +} diff --git a/gcc/testsuite/gcc.dg/loop-split3.c b/gcc/testsuite/gcc.dg/loop-split3.c new file mode 100644 index 00000000000..ec93ee8bd12 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split3.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ + +void +foo (int *a, int *b, unsigned l, unsigned n) +{ + while (--l != n) + a[l] = b[l] + 1; +} + +void +foo1 (int *a, int *b, unsigned l, unsigned n) +{ + while (l-- != n) + a[l] = b[l] + 1; +} + +unsigned +foo2 (char *a, char *b, unsigned l, unsigned n) +{ + while (--l != n) + if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo3 (char *a, char *b, unsigned l, unsigned n) +{ + while (l-- != n) + if (a[l] != b[l]) + break; + + return l; +} + +void +bar (); +void +foo4 (unsigned n, unsigned i) +{ + do + { + if (i == n) + return; + bar (); + --i; + } + while (1); +} + +unsigned +find_skip_diff (char *p, char *q, unsigned n, unsigned i) +{ + while (p[i] == q[i] && --i != n) + p--, q--; + + return i; +} + +/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */ diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 3a09bbc39e5..e9f23b32186 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "cfghooks.h" #include "gimple-fold.h" #include "gimplify-me.h" +#include "tree-ssa-loop-ivopts.h" /* This file implements two kinds of loop splitting. @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop) conditional). I.e. the second loop can now be entered either via the original entry or via NEW_E, so the entry values of LOOP2 phi nodes are either the original ones or those at the exit - of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting - this. The loops need to fulfill easy_exit_values(). */ + of LOOP1. Selecting the previous value instead next value as the + exit value of LOOP1 if USE_PREV is true. Insert new phi nodes in + LOOP2 pre-header reflecting this. The loops need to fulfill + easy_exit_values(). */ static void -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, + bool use_prev = false) { basic_block rest = loop_preheader_edge (loop2)->src; gcc_assert (new_e->dest == rest); @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) gphi * newphi = create_phi_node (new_init, rest); add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); - add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); + add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e, + UNKNOWN_LOCATION); SET_USE (op, new_init); } } @@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop) return do_split; } +/* Check if the LOOP exit branch is like "if (idx != bound)", + Return the branch edge which exit loop, if wrap may happen on "idx". */ + +static edge +get_ne_cond_branch (struct loop *loop, tree *step) +{ + int i; + edge e; + + auto_vec<edge> edges = get_loop_exit_edges (loop); + FOR_EACH_VEC_ELT (edges, i, e) + { + basic_block bb = e->src; + + /* Check if exit at gcond. */ + gimple *last = last_stmt (bb); + if (!last || gimple_code (last) != GIMPLE_COND) + continue; + gcond *cond = as_a<gcond *> (last); + enum tree_code code = gimple_cond_code (cond); + if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE)) + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) + continue; + + /* Check if bound is invarant. */ + tree idx = gimple_cond_lhs (cond); + tree bnd = gimple_cond_rhs (cond); + if (expr_invariant_in_loop_p (loop, idx)) + std::swap (idx, bnd); + else if (!expr_invariant_in_loop_p (loop, bnd)) + continue; + + /* Only unsigned type conversion could cause wrap. */ + tree type = TREE_TYPE (idx); + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME + || !TYPE_UNSIGNED (type)) + continue; + + /* Avoid to split if bound is MAX/MIN val. */ + tree bound_type = TREE_TYPE (bnd); + if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) + continue; + + /* Check if there is possible wrap. */ + class tree_niter_desc niter; + if (!number_of_iterations_exit (loop, e, &niter, false, false)) + continue; + if (niter.control.no_overflow) + return NULL; + if (niter.cmp != NE_EXPR) + continue; + if (!integer_onep (niter.control.step) + && !integer_minus_onep (niter.control.step)) + continue; + *step = niter.control.step; + + /* If exit edge is just before the empty latch, it is easy to link + the split loops: just jump from the exit edge of one loop to the + header of new loop. */ + if (single_pred_p (loop->latch) + && single_pred_edge (loop->latch)->src == bb + && empty_block_p (loop->latch)) + return e; + + /* If exit edge is at end of header, and header contains i++ or ++i, + only, it is simple to link the split loops: jump from the end of + one loop header to the new loop header, and use unchanged PHI result + of the first loop as the entry PHI value of the second loop. */ + if (bb == loop->header) + { + /* Only one phi. */ + gphi_iterator psi = gsi_start_phis (bb); + if (gsi_end_p (psi)) + continue; + gphi *phi = psi.phi (); + gsi_next (&psi); + if (!gsi_end_p (psi)) + continue; + + /* Check it is ++i or ++i */ + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); + tree prev = PHI_RESULT (phi); + if (idx != prev && idx != next) + continue; + + gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb); + if (gsi_end_p (gsi)) + continue; + gimple *s1 = gsi_stmt (gsi); + if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next + || gimple_assign_rhs1 (s1) != prev) + continue; + + gsi_next_nondebug (&gsi); + if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond) + return e; + } + } + + return NULL; +} + +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR. */ + +static bool +split_ne_loop (struct loop *loop, edge cond_e, tree step) +{ + initialize_original_copy_tables (); + + struct loop *loop2 = loop_version (loop, boolean_true_node, NULL, + profile_probability::always (), + profile_probability::never (), + profile_probability::always (), + profile_probability::always (), true); + + gcc_assert (loop2); + update_ssa (TODO_update_ssa); + + basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src); + free_original_copy_tables (); + + gcond *gc = as_a<gcond *> (last_stmt (cond_e->src)); + gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb)); + + /* Invert edges for gcond. */ + if (gimple_cond_code (gc) == EQ_EXPR) + { + auto invert_edge = [](basic_block bb) { + edge out = EDGE_SUCC (bb, 0); + edge in = EDGE_SUCC (bb, 1); + if (in->flags & EDGE_TRUE_VALUE) + std::swap (in, out); + in->flags |= EDGE_TRUE_VALUE; + in->flags &= ~EDGE_FALSE_VALUE; + out->flags |= EDGE_FALSE_VALUE; + out->flags &= ~EDGE_TRUE_VALUE; + }; + + invert_edge (gimple_bb (gc)); + invert_edge (gimple_bb (dup_gc)); + } + + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); + if (tree_int_cst_sign_bit (step)) + inv = !inv; + enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR; + enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR; + + gimple_cond_set_code (gc, first_loop_code); + gimple_cond_set_code (dup_gc, second_loop_code); + + /* Link the exit cond edge to new loop. */ + gcond *break_cond = as_a<gcond *> (gimple_copy (gc)); + edge pred_e = single_pred_edge (loop->latch); + bool simple_loop + = pred_e && pred_e->src == cond_e->src && empty_block_p (loop->latch); + if (simple_loop) + gimple_cond_set_code (break_cond, second_loop_code); + else + gimple_cond_make_true (break_cond); + + basic_block break_bb = split_edge (cond_e); + gimple_stmt_iterator gsi = gsi_last_bb (break_bb); + gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); + + edge to_exit = single_succ_edge (break_bb); + edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0); + to_new_loop->flags |= EDGE_TRUE_VALUE; + to_exit->flags |= EDGE_FALSE_VALUE; + to_exit->flags &= ~EDGE_FALLTHRU; + to_exit->probability = cond_e->probability; + to_new_loop->probability = to_exit->probability.invert (); + + update_ssa (TODO_update_ssa); + + connect_loop_phis (loop, loop2, to_new_loop, !simple_loop); + + rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Loop split on wrap index.\n"); + + return true; +} + +/* Checks if LOOP contains a suitable NE_EXPR conditional block to split. +L_H: + if (i!=N) + S; + else + goto EXIT; + i++; + goto L_H; + +The "i!=N" is like "i>N || i<N", then it could be transformed to: + +L_H: + if (i>N) + S; + else + goto EXIT; + i++; + goto L_H; +L1_H: + if (i<N) + S; + else + goto EXIT; + i++; + goto L1_H; + +The loop with "i<N" is in favor of both GIMPLE and RTL passes. */ + +static bool +split_loop_on_ne_cond (class loop *loop) +{ + tree step; + edge branch_edge = get_ne_cond_branch (loop, &step); + if (!branch_edge) + return false; + + int num = 0; + basic_block *bbs = get_loop_body (loop); + for (unsigned i = 0; i < loop->num_nodes; i++) + num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); + + if (num > param_max_peeled_insns) + { + free (bbs); + return false; + } + + if (!can_copy_bbs_p (bbs, loop->num_nodes)) + { + free (bbs); + return false; + } + free (bbs); + + if (split_ne_loop (loop, branch_edge, step)) + return true; + + return false; +} + /* Main entry point. Perform loop splitting on all suitable loops. */ static unsigned int @@ -1622,7 +1873,8 @@ tree_ssa_split_loops (void) if (optimize_loop_for_size_p (loop)) continue; - if (split_loop (loop) || split_loop_on_cond (loop)) + if (split_loop (loop) || split_loop_on_cond (loop) + || split_loop_on_ne_cond (loop)) { /* Mark our containing loop as having had some split inner loops. */ loop_outer (loop)->aux = loop;
On 2021-06-09 19:18, guojiufu wrote: > On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: >> On 2021-06-08 18:13, Richard Biener wrote: >>> On Fri, 4 Jun 2021, Jiufu Guo wrote: >>> >> cut... cut... > > Here is the updated patch, thanks for your time! Updates: . Enhance code to support negative step. . Check step +-1 to make sure it hits loop condition != . Enhance runtime cases to check more boundary cases and run order cases. . Refine for compiling time: check loop num of insns and can_copy_bbs_p later > > diff --git a/gcc/testsuite/gcc.dg/loop-split1.c > b/gcc/testsuite/gcc.dg/loop-split1.c > new file mode 100644 > index 00000000000..dd2d03a7b96 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split1.c > @@ -0,0 +1,101 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > + > +void > +foo (int *a, int *b, unsigned l, unsigned n) > +{ > + while (++l != n) > + a[l] = b[l] + 1; > +} > +void > +foo_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (++l != n) > + a[l] = b[l] + 1; > +} > + > +void > +foo1 (int *a, int *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > + a[l] = b[l] + 1; > +} > + > +/* No wrap. */ > +void > +foo1_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (l++ != n) > + a[l] = b[l] + 1; > +} > + > +unsigned > +foo2 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (++l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo2_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (++l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo3 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +/* No wrap. */ > +unsigned > +foo3_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (l++ != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +void > +bar (); > +void > +foo4 (unsigned n, unsigned i) > +{ > + do > + { > + if (i == n) > + return; > + bar (); > + ++i; > + } > + while (1); > +} > + > +unsigned > +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > +{ > + while (p[i] == q[i] && ++i != n) > + p++, q++; > + > + return i; > +} > + > +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ > diff --git a/gcc/testsuite/gcc.dg/loop-split2.c > b/gcc/testsuite/gcc.dg/loop-split2.c > new file mode 100644 > index 00000000000..56377e2f2f5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split2.c > @@ -0,0 +1,155 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3" } */ > + > +extern void > +abort (void); > +extern void > +exit (int); > +void > +push (int); > + > +#define NI __attribute__ ((noinline)) > + > +void NI > +foo (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (++l != n) > + a[l] = b[l] + 1; > +} > + > +unsigned NI > +bar (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (l++ != n) > + { > + push (l); > + if (a[l] != b[l]) > + break; > + push (l + 1); > + } > + return l; > +} > + > +void NI > +foo_1 (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (--l != n) > + a[l] = b[l] + 1; > +} > + > +unsigned NI > +bar_1 (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (l-- != n) > + { > + push (l); > + if (a[l] != b[l]) > + break; > + push (l + 1); > + } > + > + return l; > +} > + > +int a[258]; > +int b[258]; > +int c[1024]; > +static int top = 0; > +void > +push (int e) > +{ > + c[top++] = e; > +} > + > +void > +reset () > +{ > + top = 0; > + __builtin_memset (c, 0, sizeof (c)); > +} > + > +#define check(a, b) (a == b) > + > +int > +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5) > +{ > + return check (c[0], a0) && check (c[1], a1) && check (c[2], a2) > + && check (c[3], a3) && check (c[4], a4) && check (c[5], a5); > +} > + > +int > +main () > +{ > + __builtin_memcpy (b, a, sizeof (a)); > + reset (); > + if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0)) > + abort (); > + > + reset (); > + if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9) > + || !check_c (c + 496, 254, 255, 255, 256, 0, 1)) > + abort (); > + > + reset (); > + if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0)) > + abort (); > + > + reset (); > + if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 0, > 0)) > + abort (); > + > + reset (); > + if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, > 1)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 6, 6) != 5) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, > 256)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0)) > + abort (); > + > + b[100] += 1; > + reset (); > + if (bar (a, b, 90, 110) != 100) > + abort (); > + > + reset (); > + if (bar (a, b, 110, 105) != 100) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 90, 110) != 109) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 2, 90) != 100) > + abort (); > + > + foo (a, b, 99, 99); > + a[99] = b[99] + 1; > + for (int i = 0; i < 256; i++) > + if (a[i] != b[i] + 1) > + abort (); > + > + foo_1 (a, b, 99, 99); > + a[99] = b[99] + 1; > + for (int i = 0; i < 256; i++) > + if (a[i] != b[i] + 1) > + abort (); > + > + exit (0); > +} > diff --git a/gcc/testsuite/gcc.dg/loop-split3.c > b/gcc/testsuite/gcc.dg/loop-split3.c > new file mode 100644 > index 00000000000..ec93ee8bd12 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split3.c > @@ -0,0 +1,62 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > + > +void > +foo (int *a, int *b, unsigned l, unsigned n) > +{ > + while (--l != n) > + a[l] = b[l] + 1; > +} > + > +void > +foo1 (int *a, int *b, unsigned l, unsigned n) > +{ > + while (l-- != n) > + a[l] = b[l] + 1; > +} > + > +unsigned > +foo2 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (--l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo3 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (l-- != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +void > +bar (); > +void > +foo4 (unsigned n, unsigned i) > +{ > + do > + { > + if (i == n) > + return; > + bar (); > + --i; > + } > + while (1); > +} > + > +unsigned > +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > +{ > + while (p[i] == q[i] && --i != n) > + p--, q--; > + > + return i; > +} > + > +/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */ > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > index 3a09bbc39e5..e9f23b32186 100644 > --- a/gcc/tree-ssa-loop-split.c > +++ b/gcc/tree-ssa-loop-split.c > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see > #include "cfghooks.h" > #include "gimple-fold.h" > #include "gimplify-me.h" > +#include "tree-ssa-loop-ivopts.h" > > /* This file implements two kinds of loop splitting. > > @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop) > conditional). I.e. the second loop can now be entered either > via the original entry or via NEW_E, so the entry values of LOOP2 > phi nodes are either the original ones or those at the exit > - of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting > - this. The loops need to fulfill easy_exit_values(). */ > + of LOOP1. Selecting the previous value instead next value as the > + exit value of LOOP1 if USE_PREV is true. Insert new phi nodes in > + LOOP2 pre-header reflecting this. The loops need to fulfill > + easy_exit_values(). */ > > static void > -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) > +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, > + bool use_prev = false) > { > basic_block rest = loop_preheader_edge (loop2)->src; > gcc_assert (new_e->dest == rest); > @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop > *loop2, edge new_e) > > gphi * newphi = create_phi_node (new_init, rest); > add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); > - add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); > + add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, > new_e, > + UNKNOWN_LOCATION); > SET_USE (op, new_init); > } > } > @@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop) > return do_split; > } > > +/* Check if the LOOP exit branch is like "if (idx != bound)", > + Return the branch edge which exit loop, if wrap may happen on > "idx". */ > + > +static edge > +get_ne_cond_branch (struct loop *loop, tree *step) > +{ > + int i; > + edge e; > + > + auto_vec<edge> edges = get_loop_exit_edges (loop); > + FOR_EACH_VEC_ELT (edges, i, e) > + { > + basic_block bb = e->src; > + > + /* Check if exit at gcond. */ > + gimple *last = last_stmt (bb); > + if (!last || gimple_code (last) != GIMPLE_COND) > + continue; > + gcond *cond = as_a<gcond *> (last); > + enum tree_code code = gimple_cond_code (cond); > + if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE)) > + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) > + continue; > + > + /* Check if bound is invarant. */ > + tree idx = gimple_cond_lhs (cond); > + tree bnd = gimple_cond_rhs (cond); > + if (expr_invariant_in_loop_p (loop, idx)) > + std::swap (idx, bnd); > + else if (!expr_invariant_in_loop_p (loop, bnd)) > + continue; > + > + /* Only unsigned type conversion could cause wrap. */ > + tree type = TREE_TYPE (idx); > + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME > + || !TYPE_UNSIGNED (type)) > + continue; > + > + /* Avoid to split if bound is MAX/MIN val. */ > + tree bound_type = TREE_TYPE (bnd); > + if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) > + continue; > + > + /* Check if there is possible wrap. */ > + class tree_niter_desc niter; > + if (!number_of_iterations_exit (loop, e, &niter, false, false)) > + continue; > + if (niter.control.no_overflow) > + return NULL; > + if (niter.cmp != NE_EXPR) > + continue; > + if (!integer_onep (niter.control.step) > + && !integer_minus_onep (niter.control.step)) > + continue; > + *step = niter.control.step; > + > + /* If exit edge is just before the empty latch, it is easy to > link > + the split loops: just jump from the exit edge of one loop to the > + header of new loop. */ > + if (single_pred_p (loop->latch) > + && single_pred_edge (loop->latch)->src == bb > + && empty_block_p (loop->latch)) > + return e; > + > + /* If exit edge is at end of header, and header contains i++ or > ++i, > + only, it is simple to link the split loops: jump from the end of > + one loop header to the new loop header, and use unchanged PHI result > + of the first loop as the entry PHI value of the second loop. */ > + if (bb == loop->header) > + { > + /* Only one phi. */ > + gphi_iterator psi = gsi_start_phis (bb); > + if (gsi_end_p (psi)) > + continue; > + gphi *phi = psi.phi (); > + gsi_next (&psi); > + if (!gsi_end_p (psi)) > + continue; > + > + /* Check it is ++i or ++i */ > + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); > + tree prev = PHI_RESULT (phi); > + if (idx != prev && idx != next) > + continue; > + > + gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb); > + if (gsi_end_p (gsi)) > + continue; > + gimple *s1 = gsi_stmt (gsi); > + if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next > + || gimple_assign_rhs1 (s1) != prev) > + continue; > + > + gsi_next_nondebug (&gsi); > + if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond) > + return e; > + } > + } > + > + return NULL; > +} > + > +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and > LT_EXPR. */ > + > +static bool > +split_ne_loop (struct loop *loop, edge cond_e, tree step) > +{ > + initialize_original_copy_tables (); > + > + struct loop *loop2 = loop_version (loop, boolean_true_node, NULL, > + profile_probability::always (), > + profile_probability::never (), > + profile_probability::always (), > + profile_probability::always (), true); > + > + gcc_assert (loop2); > + update_ssa (TODO_update_ssa); > + > + basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src); > + free_original_copy_tables (); > + > + gcond *gc = as_a<gcond *> (last_stmt (cond_e->src)); > + gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb)); > + > + /* Invert edges for gcond. */ > + if (gimple_cond_code (gc) == EQ_EXPR) > + { > + auto invert_edge = [](basic_block bb) { > + edge out = EDGE_SUCC (bb, 0); > + edge in = EDGE_SUCC (bb, 1); > + if (in->flags & EDGE_TRUE_VALUE) > + std::swap (in, out); > + in->flags |= EDGE_TRUE_VALUE; > + in->flags &= ~EDGE_FALSE_VALUE; > + out->flags |= EDGE_FALSE_VALUE; > + out->flags &= ~EDGE_TRUE_VALUE; > + }; > + > + invert_edge (gimple_bb (gc)); > + invert_edge (gimple_bb (dup_gc)); > + } > + > + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ > + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); > + if (tree_int_cst_sign_bit (step)) > + inv = !inv; > + enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR; > + enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR; > + > + gimple_cond_set_code (gc, first_loop_code); > + gimple_cond_set_code (dup_gc, second_loop_code); > + > + /* Link the exit cond edge to new loop. */ > + gcond *break_cond = as_a<gcond *> (gimple_copy (gc)); > + edge pred_e = single_pred_edge (loop->latch); > + bool simple_loop > + = pred_e && pred_e->src == cond_e->src && empty_block_p > (loop->latch); > + if (simple_loop) > + gimple_cond_set_code (break_cond, second_loop_code); > + else > + gimple_cond_make_true (break_cond); > + > + basic_block break_bb = split_edge (cond_e); > + gimple_stmt_iterator gsi = gsi_last_bb (break_bb); > + gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); > + > + edge to_exit = single_succ_edge (break_bb); > + edge to_new_loop = make_edge (break_bb, loop_preheader_edge > (loop2)->src, 0); > + to_new_loop->flags |= EDGE_TRUE_VALUE; > + to_exit->flags |= EDGE_FALSE_VALUE; > + to_exit->flags &= ~EDGE_FALLTHRU; > + to_exit->probability = cond_e->probability; > + to_new_loop->probability = to_exit->probability.invert (); > + > + update_ssa (TODO_update_ssa); > + > + connect_loop_phis (loop, loop2, to_new_loop, !simple_loop); > + > + rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, ";; Loop split on wrap index.\n"); > + > + return true; > +} > + > +/* Checks if LOOP contains a suitable NE_EXPR conditional block to > split. > +L_H: > + if (i!=N) > + S; > + else > + goto EXIT; > + i++; > + goto L_H; > + > +The "i!=N" is like "i>N || i<N", then it could be transformed to: > + > +L_H: > + if (i>N) > + S; > + else > + goto EXIT; > + i++; > + goto L_H; > +L1_H: > + if (i<N) > + S; > + else > + goto EXIT; > + i++; > + goto L1_H; > + > +The loop with "i<N" is in favor of both GIMPLE and RTL passes. */ > + > +static bool > +split_loop_on_ne_cond (class loop *loop) > +{ > + tree step; > + edge branch_edge = get_ne_cond_branch (loop, &step); > + if (!branch_edge) > + return false; > + > + int num = 0; > + basic_block *bbs = get_loop_body (loop); > + for (unsigned i = 0; i < loop->num_nodes; i++) > + num += estimate_num_insns_seq (bb_seq (bbs[i]), > &eni_size_weights); > + > + if (num > param_max_peeled_insns) > + { > + free (bbs); > + return false; > + } > + > + if (!can_copy_bbs_p (bbs, loop->num_nodes)) > + { > + free (bbs); > + return false; > + } > + free (bbs); > + > + if (split_ne_loop (loop, branch_edge, step)) > + return true; > + > + return false; > +} > + > /* Main entry point. Perform loop splitting on all suitable loops. > */ > > static unsigned int > @@ -1622,7 +1873,8 @@ tree_ssa_split_loops (void) > if (optimize_loop_for_size_p (loop)) > continue; > > - if (split_loop (loop) || split_loop_on_cond (loop)) > + if (split_loop (loop) || split_loop_on_cond (loop) > + || split_loop_on_ne_cond (loop)) > { > /* Mark our containing loop as having had some split inner loops. > */ > loop_outer (loop)->aux = loop;
On Wed, 9 Jun 2021, guojiufu wrote: > On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: > > On 2021-06-08 18:13, Richard Biener wrote: > >> On Fri, 4 Jun 2021, Jiufu Guo wrote: > >> > > cut... > >>> + gcond *cond = as_a<gcond *> (last); > >>> + enum tree_code code = gimple_cond_code (cond); > >>> + if (!(code == NE_EXPR > >>> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) > >> > >> The NE_EXPR check misses a corresponding && (e->flags & EDGE_FALSE_VALUE) > >> check. > >> > > Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. > > > >>> + continue; > >>> + > >>> + /* Check if bound is invarant. */ > >>> + tree idx = gimple_cond_lhs (cond); > >>> + tree bnd = gimple_cond_rhs (cond); > >>> + if (expr_invariant_in_loop_p (loop, idx)) > >>> + std::swap (idx, bnd); > >>> + else if (!expr_invariant_in_loop_p (loop, bnd)) > >>> + continue; > >>> + > >>> + /* Only unsigned type conversion could cause wrap. */ > >>> + tree type = TREE_TYPE (idx); > >>> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME > >>> + || !TYPE_UNSIGNED (type)) > >>> + continue; > >>> + > >>> + /* Avoid to split if bound is MAX/MIN val. */ > >>> + tree bound_type = TREE_TYPE (bnd); > >>> + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) > >>> + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > >>> + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))) > >>> + continue; > >> > >> Note you do not require 'bnd' to be constant and thus at runtime those > >> cases still need to be handled correctly. > > Yes, bnd is not required to be constant. The above code is filtering the > > case > > where bnd is const max/min value of the type. So, the code could be updated > > as: > > if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > > || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) Yes, and the comment adjusted to "if bound is known to be MAX/MIN val." > >> > >>> + /* Check if there is possible wrap. */ > >>> + class tree_niter_desc niter; > >>> + if (!number_of_iterations_exit (loop, e, &niter, false, false)) > > cut... > >>> + > >>> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ > >> > >> It now occurs to me that we nowhere check the evolution of IDX > >> (split_at_bb_p uses simple_iv for this for example). The transform > >> assumes that we will actually hit i == n and that i increments, but > >> while you check the control IV from number_of_iterations_exit > >> for NE_EXPR that does not guarantee a positive evolution. > >> > > If I do not correctly reply your question, please point out: > > number_of_iterations_exit is similar with simple_iv to invoke > > simple_iv_with_niters > > which check the evolution, and number_of_iterations_exit check > > number_of_iterations_cond > > which check no_overflow more accurate, this is one reason I use this > > function. > > > > This transform assumes that the last run hits i==n. > > Otherwise, the loop may run infinitely wrap after wrap. > > For safe, if the step is 1 or -1, this assumption would be true. I > > would add this check. OK. > > Thanks so much for pointing out I missed the negative step! > > > >> Your testcases do not include any negative step examples, but I guess > >> the conditions need to be swapped in this case? > > > > I would add cases and code to support step 1/-1. > > > >> > >> I think you also have to consider the order we split, say with > >> > >> for (i = start; i != end; ++i) > >> { > >> push (i); > >> if (a[i] != b[i]) > >> break; > >> } > >> > >> push (i) calls need to be in the same order for all cases of > >> start < end, start == end and start > end (and also cover > >> runtime testcases with end == 0 or end == UINT_MAX, likewise > >> for start). > > I add tests for the above cases. If missing sth, please point out, thanks! > > > >> > >>> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); > >>> + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; > >>> + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; > > cut.... > > > > Thanks again for the very helpful review! > > > > BR, > > Jiufu Guo. > > Here is the updated patch, thanks for your time! > > diff --git a/gcc/testsuite/gcc.dg/loop-split1.c > b/gcc/testsuite/gcc.dg/loop-split1.c > new file mode 100644 > index 00000000000..dd2d03a7b96 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split1.c > @@ -0,0 +1,101 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > + > +void > +foo (int *a, int *b, unsigned l, unsigned n) > +{ > + while (++l != n) > + a[l] = b[l] + 1; > +} > +void > +foo_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (++l != n) > + a[l] = b[l] + 1; > +} > + > +void > +foo1 (int *a, int *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > + a[l] = b[l] + 1; > +} > + > +/* No wrap. */ > +void > +foo1_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (l++ != n) > + a[l] = b[l] + 1; > +} > + > +unsigned > +foo2 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (++l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo2_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (++l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo3 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +/* No wrap. */ > +unsigned > +foo3_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (l++ != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +void > +bar (); > +void > +foo4 (unsigned n, unsigned i) > +{ > + do > + { > + if (i == n) > + return; > + bar (); > + ++i; > + } > + while (1); > +} > + > +unsigned > +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > +{ > + while (p[i] == q[i] && ++i != n) > + p++, q++; > + > + return i; > +} > + > +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ > diff --git a/gcc/testsuite/gcc.dg/loop-split2.c > b/gcc/testsuite/gcc.dg/loop-split2.c > new file mode 100644 > index 00000000000..56377e2f2f5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split2.c > @@ -0,0 +1,155 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3" } */ > + > +extern void > +abort (void); > +extern void > +exit (int); > +void > +push (int); > + > +#define NI __attribute__ ((noinline)) > + > +void NI > +foo (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (++l != n) > + a[l] = b[l] + 1; > +} > + > +unsigned NI > +bar (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (l++ != n) > + { > + push (l); > + if (a[l] != b[l]) > + break; > + push (l + 1); > + } > + return l; > +} > + > +void NI > +foo_1 (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (--l != n) > + a[l] = b[l] + 1; > +} > + > +unsigned NI > +bar_1 (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (l-- != n) > + { > + push (l); > + if (a[l] != b[l]) > + break; > + push (l + 1); > + } > + > + return l; > +} > + > +int a[258]; > +int b[258]; > +int c[1024]; > +static int top = 0; > +void > +push (int e) > +{ > + c[top++] = e; > +} > + > +void > +reset () > +{ > + top = 0; > + __builtin_memset (c, 0, sizeof (c)); > +} > + > +#define check(a, b) (a == b) > + > +int > +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5) > +{ > + return check (c[0], a0) && check (c[1], a1) && check (c[2], a2) > + && check (c[3], a3) && check (c[4], a4) && check (c[5], a5); > +} > + > +int > +main () > +{ > + __builtin_memcpy (b, a, sizeof (a)); > + reset (); > + if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0)) > + abort (); > + > + reset (); > + if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9) > + || !check_c (c + 496, 254, 255, 255, 256, 0, 1)) > + abort (); > + > + reset (); > + if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0)) > + abort (); > + > + reset (); > + if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 0, 0)) > + abort (); > + > + reset (); > + if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, 1)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 6, 6) != 5) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, 256)) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0)) > + abort (); > + > + b[100] += 1; > + reset (); > + if (bar (a, b, 90, 110) != 100) > + abort (); > + > + reset (); > + if (bar (a, b, 110, 105) != 100) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 90, 110) != 109) > + abort (); > + > + reset (); > + if (bar_1 (a, b, 2, 90) != 100) > + abort (); > + > + foo (a, b, 99, 99); > + a[99] = b[99] + 1; > + for (int i = 0; i < 256; i++) > + if (a[i] != b[i] + 1) > + abort (); > + > + foo_1 (a, b, 99, 99); > + a[99] = b[99] + 1; > + for (int i = 0; i < 256; i++) > + if (a[i] != b[i] + 1) > + abort (); > + > + exit (0); > +} > diff --git a/gcc/testsuite/gcc.dg/loop-split3.c > b/gcc/testsuite/gcc.dg/loop-split3.c > new file mode 100644 > index 00000000000..ec93ee8bd12 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split3.c > @@ -0,0 +1,62 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > + > +void > +foo (int *a, int *b, unsigned l, unsigned n) > +{ > + while (--l != n) > + a[l] = b[l] + 1; > +} > + > +void > +foo1 (int *a, int *b, unsigned l, unsigned n) > +{ > + while (l-- != n) > + a[l] = b[l] + 1; > +} > + > +unsigned > +foo2 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (--l != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo3 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (l-- != n) > + if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +void > +bar (); > +void > +foo4 (unsigned n, unsigned i) > +{ > + do > + { > + if (i == n) > + return; > + bar (); > + --i; > + } > + while (1); > +} > + > +unsigned > +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > +{ > + while (p[i] == q[i] && --i != n) > + p--, q--; > + > + return i; > +} > + > +/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */ > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > index 3a09bbc39e5..e9f23b32186 100644 > --- a/gcc/tree-ssa-loop-split.c > +++ b/gcc/tree-ssa-loop-split.c > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see > #include "cfghooks.h" > #include "gimple-fold.h" > #include "gimplify-me.h" > +#include "tree-ssa-loop-ivopts.h" > > /* This file implements two kinds of loop splitting. > > @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop) > conditional). I.e. the second loop can now be entered either > via the original entry or via NEW_E, so the entry values of LOOP2 > phi nodes are either the original ones or those at the exit > - of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting > - this. The loops need to fulfill easy_exit_values(). */ > + of LOOP1. Selecting the previous value instead next value as the > + exit value of LOOP1 if USE_PREV is true. Insert new phi nodes in > + LOOP2 pre-header reflecting this. The loops need to fulfill > + easy_exit_values(). */ > > static void > -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) > +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, > + bool use_prev = false) > { > basic_block rest = loop_preheader_edge (loop2)->src; > gcc_assert (new_e->dest == rest); > @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop *loop2, > edge new_e) > > gphi * newphi = create_phi_node (new_init, rest); > add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); > - add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); > + add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e, > + UNKNOWN_LOCATION); > SET_USE (op, new_init); > } > } > @@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop) > return do_split; > } > > +/* Check if the LOOP exit branch is like "if (idx != bound)", > + Return the branch edge which exit loop, if wrap may happen on "idx". */ > + > +static edge > +get_ne_cond_branch (struct loop *loop, tree *step) > +{ > + int i; > + edge e; > + > + auto_vec<edge> edges = get_loop_exit_edges (loop); > + FOR_EACH_VEC_ELT (edges, i, e) > + { > + basic_block bb = e->src; > + > + /* Check if exit at gcond. */ > + gimple *last = last_stmt (bb); > + if (!last || gimple_code (last) != GIMPLE_COND) > + continue; > + gcond *cond = as_a<gcond *> (last); gcond *cont = safe_dyn_cast <gcond *> (last_stmt (bb)); if (!last) continue; is shorter. > + enum tree_code code = gimple_cond_code (cond); > + if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE)) > + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) > + continue; > + > + /* Check if bound is invarant. */ > + tree idx = gimple_cond_lhs (cond); > + tree bnd = gimple_cond_rhs (cond); > + if (expr_invariant_in_loop_p (loop, idx)) > + std::swap (idx, bnd); > + else if (!expr_invariant_in_loop_p (loop, bnd)) > + continue; > + > + /* Only unsigned type conversion could cause wrap. */ > + tree type = TREE_TYPE (idx); > + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME > + || !TYPE_UNSIGNED (type)) > + continue; > + > + /* Avoid to split if bound is MAX/MIN val. */ > + tree bound_type = TREE_TYPE (bnd); > + if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) > + continue; > + > + /* Check if there is possible wrap. */ > + class tree_niter_desc niter; > + if (!number_of_iterations_exit (loop, e, &niter, false, false)) > + continue; > + if (niter.control.no_overflow) > + return NULL; > + if (niter.cmp != NE_EXPR) > + continue; > + if (!integer_onep (niter.control.step) > + && !integer_minus_onep (niter.control.step)) > + continue; > + *step = niter.control.step; > + > + /* If exit edge is just before the empty latch, it is easy to link > + the split loops: just jump from the exit edge of one loop to the > + header of new loop. */ > + if (single_pred_p (loop->latch) > + && single_pred_edge (loop->latch)->src == bb > + && empty_block_p (loop->latch)) > + return e; > + > + /* If exit edge is at end of header, and header contains i++ or ++i, > + only, it is simple to link the split loops: jump from the end of > + one loop header to the new loop header, and use unchanged PHI result > + of the first loop as the entry PHI value of the second loop. */ > + if (bb == loop->header) > + { > + /* Only one phi. */ > + gphi_iterator psi = gsi_start_phis (bb); > + if (gsi_end_p (psi)) > + continue; > + gphi *phi = psi.phi (); > + gsi_next (&psi); > + if (!gsi_end_p (psi)) > + continue; > + > + /* Check it is ++i or ++i */ > + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); > + tree prev = PHI_RESULT (phi); > + if (idx != prev && idx != next) > + continue; > + > + gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb); > + if (gsi_end_p (gsi)) > + continue; > + gimple *s1 = gsi_stmt (gsi); > + if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next > + || gimple_assign_rhs1 (s1) != prev) > + continue; > + > + gsi_next_nondebug (&gsi); > + if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond) > + return e; > + } > + } > + > + return NULL; > +} > + > +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR. */ > + > +static bool > +split_ne_loop (struct loop *loop, edge cond_e, tree step) > +{ > + initialize_original_copy_tables (); > + > + struct loop *loop2 = loop_version (loop, boolean_true_node, NULL, > + profile_probability::always (), > + profile_probability::never (), > + profile_probability::always (), > + profile_probability::always (), true); > + > + gcc_assert (loop2); > + update_ssa (TODO_update_ssa); > + > + basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src); > + free_original_copy_tables (); > + > + gcond *gc = as_a<gcond *> (last_stmt (cond_e->src)); > + gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb)); > + > + /* Invert edges for gcond. */ > + if (gimple_cond_code (gc) == EQ_EXPR) > + { > + auto invert_edge = [](basic_block bb) { > + edge out = EDGE_SUCC (bb, 0); > + edge in = EDGE_SUCC (bb, 1); > + if (in->flags & EDGE_TRUE_VALUE) > + std::swap (in, out); > + in->flags |= EDGE_TRUE_VALUE; > + in->flags &= ~EDGE_FALSE_VALUE; > + out->flags |= EDGE_FALSE_VALUE; > + out->flags &= ~EDGE_TRUE_VALUE; > + }; > + > + invert_edge (gimple_bb (gc)); > + invert_edge (gimple_bb (dup_gc)); > + } > + > + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ > + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); > + if (tree_int_cst_sign_bit (step)) > + inv = !inv; > + enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR; > + enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR; You could handle gimple_cond_code (gc) == EQ_EXPR via if (gimple_cond_code (gc) == EQ_EXPR) { first_loop_code = invert_tree_comparison (first_loop_code, false); second_loop_code = invert_tree_comparison (second_loop_code, false); } that looks simpler than the lambda dance with inverting the edge flags. > + gimple_cond_set_code (gc, first_loop_code); > + gimple_cond_set_code (dup_gc, second_loop_code); > + > + /* Link the exit cond edge to new loop. */ > + gcond *break_cond = as_a<gcond *> (gimple_copy (gc)); > + edge pred_e = single_pred_edge (loop->latch); > + bool simple_loop > + = pred_e && pred_e->src == cond_e->src && empty_block_p (loop->latch); > + if (simple_loop) > + gimple_cond_set_code (break_cond, second_loop_code); > + else > + gimple_cond_make_true (break_cond); > + > + basic_block break_bb = split_edge (cond_e); > + gimple_stmt_iterator gsi = gsi_last_bb (break_bb); > + gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); > + > + edge to_exit = single_succ_edge (break_bb); > + edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, > 0); > + to_new_loop->flags |= EDGE_TRUE_VALUE; > + to_exit->flags |= EDGE_FALSE_VALUE; > + to_exit->flags &= ~EDGE_FALLTHRU; > + to_exit->probability = cond_e->probability; > + to_new_loop->probability = to_exit->probability.invert (); > + > + update_ssa (TODO_update_ssa); I've re-organized the pass to perform a single TODO_update_ssa at the very end, please do not update SSA form here, nor > + connect_loop_phis (loop, loop2, to_new_loop, !simple_loop); > + > + rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); re-write into loop-closed SSA. > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, ";; Loop split on wrap index.\n"); > + > + return true; > +} > + > +/* Checks if LOOP contains a suitable NE_EXPR conditional block to split. > +L_H: > + if (i!=N) > + S; > + else > + goto EXIT; > + i++; > + goto L_H; > + > +The "i!=N" is like "i>N || i<N", then it could be transformed to: > + > +L_H: > + if (i>N) > + S; > + else > + goto EXIT; > + i++; > + goto L_H; > +L1_H: > + if (i<N) > + S; > + else > + goto EXIT; > + i++; > + goto L1_H; > + > +The loop with "i<N" is in favor of both GIMPLE and RTL passes. */ > + > +static bool > +split_loop_on_ne_cond (class loop *loop) > +{ > + tree step; > + edge branch_edge = get_ne_cond_branch (loop, &step); > + if (!branch_edge) > + return false; > + > + int num = 0; > + basic_block *bbs = get_loop_body (loop); > + for (unsigned i = 0; i < loop->num_nodes; i++) > + num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); Since the motivation is to make data-refs analyzable after the transform if there are no datarefs the transform only increases code-size. In particular I would look for calls which will be not analyzable. Since we're looking at each stmt above that could be embedded here. As said earlier once num exceeds param_max_peeled_insns you can stop the above loop. So heuristically I'd do sth like for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); if (is_gimple_debug (stmt)) continue; if (gimple_has_side_effects (stmt)) { free (bbs); return false; } num += estimate_num_insns (stmt, &eni_size_weights); if (num > param_max_peeled_insns) { free (bbs); return false; } if (gimple_vuse (stmt)) any_dr = true; } if (!any_dr) { free (bbs); return false; } There's also still the issue that the transformed loop will fail number of iteration analysis for the loop that iterates until the IV wraps. That's a blocker for the acceptance of this transform. Richard.
On 2021-06-21 16:51, Richard Biener wrote: > On Wed, 9 Jun 2021, guojiufu wrote: > >> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: >> > On 2021-06-08 18:13, Richard Biener wrote: >> >> On Fri, 4 Jun 2021, Jiufu Guo wrote: >> >> >> > cut... >> >>> + gcond *cond = as_a<gcond *> (last); >> >>> + enum tree_code code = gimple_cond_code (cond); >> >>> + if (!(code == NE_EXPR >> >>> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) >> >> >> >> The NE_EXPR check misses a corresponding && (e->flags & EDGE_FALSE_VALUE) >> >> check. >> >> >> > Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. >> > >> >>> + continue; >> >>> + >> >>> + /* Check if bound is invarant. */ >> >>> + tree idx = gimple_cond_lhs (cond); >> >>> + tree bnd = gimple_cond_rhs (cond); >> >>> + if (expr_invariant_in_loop_p (loop, idx)) >> >>> + std::swap (idx, bnd); >> >>> + else if (!expr_invariant_in_loop_p (loop, bnd)) >> >>> + continue; >> >>> + >> >>> + /* Only unsigned type conversion could cause wrap. */ >> >>> + tree type = TREE_TYPE (idx); >> >>> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME >> >>> + || !TYPE_UNSIGNED (type)) >> >>> + continue; >> >>> + >> >>> + /* Avoid to split if bound is MAX/MIN val. */ >> >>> + tree bound_type = TREE_TYPE (bnd); >> >>> + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) >> >>> + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) >> >>> + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))) >> >>> + continue; >> >> >> >> Note you do not require 'bnd' to be constant and thus at runtime those >> >> cases still need to be handled correctly. >> > Yes, bnd is not required to be constant. The above code is filtering the >> > case >> > where bnd is const max/min value of the type. So, the code could be updated >> > as: >> > if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) >> > || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) > > Yes, and the comment adjusted to "if bound is known to be MAX/MIN val." > >> >> >> >>> + /* Check if there is possible wrap. */ >> >>> + class tree_niter_desc niter; >> >>> + if (!number_of_iterations_exit (loop, e, &niter, false, false)) >> > cut... >> >>> + >> >>> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ >> >> >> >> It now occurs to me that we nowhere check the evolution of IDX >> >> (split_at_bb_p uses simple_iv for this for example). The transform >> >> assumes that we will actually hit i == n and that i increments, but >> >> while you check the control IV from number_of_iterations_exit >> >> for NE_EXPR that does not guarantee a positive evolution. >> >> >> > If I do not correctly reply your question, please point out: >> > number_of_iterations_exit is similar with simple_iv to invoke >> > simple_iv_with_niters >> > which check the evolution, and number_of_iterations_exit check >> > number_of_iterations_cond >> > which check no_overflow more accurate, this is one reason I use this >> > function. >> > >> > This transform assumes that the last run hits i==n. >> > Otherwise, the loop may run infinitely wrap after wrap. >> > For safe, if the step is 1 or -1, this assumption would be true. I >> > would add this check. > > OK. > >> > Thanks so much for pointing out I missed the negative step! >> > >> >> Your testcases do not include any negative step examples, but I guess >> >> the conditions need to be swapped in this case? >> > >> > I would add cases and code to support step 1/-1. >> > >> >> >> >> I think you also have to consider the order we split, say with >> >> >> >> for (i = start; i != end; ++i) >> >> { >> >> push (i); >> >> if (a[i] != b[i]) >> >> break; >> >> } >> >> >> >> push (i) calls need to be in the same order for all cases of >> >> start < end, start == end and start > end (and also cover >> >> runtime testcases with end == 0 or end == UINT_MAX, likewise >> >> for start). >> > I add tests for the above cases. If missing sth, please point out, thanks! >> > >> >> >> >>> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); >> >>> + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; >> >>> + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; >> > cut.... >> > >> > Thanks again for the very helpful review! >> > >> > BR, >> > Jiufu Guo. >> >> Here is the updated patch, thanks for your time! >> >> diff --git a/gcc/testsuite/gcc.dg/loop-split1.c >> b/gcc/testsuite/gcc.dg/loop-split1.c >> new file mode 100644 >> index 00000000000..dd2d03a7b96 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/loop-split1.c >> @@ -0,0 +1,101 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ >> + >> +void >> +foo (int *a, int *b, unsigned l, unsigned n) >> +{ >> + while (++l != n) >> + a[l] = b[l] + 1; >> +} >> +void >> +foo_1 (int *a, int *b, unsigned n) >> +{ >> + unsigned l = 0; >> + while (++l != n) >> + a[l] = b[l] + 1; >> +} >> + >> +void >> +foo1 (int *a, int *b, unsigned l, unsigned n) >> +{ >> + while (l++ != n) >> + a[l] = b[l] + 1; >> +} >> + >> +/* No wrap. */ >> +void >> +foo1_1 (int *a, int *b, unsigned n) >> +{ >> + unsigned l = 0; >> + while (l++ != n) >> + a[l] = b[l] + 1; >> +} >> + >> +unsigned >> +foo2 (char *a, char *b, unsigned l, unsigned n) >> +{ >> + while (++l != n) >> + if (a[l] != b[l]) >> + break; >> + >> + return l; >> +} >> + >> +unsigned >> +foo2_1 (char *a, char *b, unsigned l, unsigned n) >> +{ >> + l = 0; >> + while (++l != n) >> + if (a[l] != b[l]) >> + break; >> + >> + return l; >> +} >> + >> +unsigned >> +foo3 (char *a, char *b, unsigned l, unsigned n) >> +{ >> + while (l++ != n) >> + if (a[l] != b[l]) >> + break; >> + >> + return l; >> +} >> + >> +/* No wrap. */ >> +unsigned >> +foo3_1 (char *a, char *b, unsigned l, unsigned n) >> +{ >> + l = 0; >> + while (l++ != n) >> + if (a[l] != b[l]) >> + break; >> + >> + return l; >> +} >> + >> +void >> +bar (); >> +void >> +foo4 (unsigned n, unsigned i) >> +{ >> + do >> + { >> + if (i == n) >> + return; >> + bar (); >> + ++i; >> + } >> + while (1); >> +} >> + >> +unsigned >> +find_skip_diff (char *p, char *q, unsigned n, unsigned i) >> +{ >> + while (p[i] == q[i] && ++i != n) >> + p++, q++; >> + >> + return i; >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ >> diff --git a/gcc/testsuite/gcc.dg/loop-split2.c >> b/gcc/testsuite/gcc.dg/loop-split2.c >> new file mode 100644 >> index 00000000000..56377e2f2f5 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/loop-split2.c >> @@ -0,0 +1,155 @@ >> +/* { dg-do run } */ >> +/* { dg-options "-O3" } */ >> + >> +extern void >> +abort (void); >> +extern void >> +exit (int); >> +void >> +push (int); >> + >> +#define NI __attribute__ ((noinline)) >> + >> +void NI >> +foo (int *a, int *b, unsigned char l, unsigned char n) >> +{ >> + while (++l != n) >> + a[l] = b[l] + 1; >> +} >> + >> +unsigned NI >> +bar (int *a, int *b, unsigned char l, unsigned char n) >> +{ >> + while (l++ != n) >> + { >> + push (l); >> + if (a[l] != b[l]) >> + break; >> + push (l + 1); >> + } >> + return l; >> +} >> + >> +void NI >> +foo_1 (int *a, int *b, unsigned char l, unsigned char n) >> +{ >> + while (--l != n) >> + a[l] = b[l] + 1; >> +} >> + >> +unsigned NI >> +bar_1 (int *a, int *b, unsigned char l, unsigned char n) >> +{ >> + while (l-- != n) >> + { >> + push (l); >> + if (a[l] != b[l]) >> + break; >> + push (l + 1); >> + } >> + >> + return l; >> +} >> + >> +int a[258]; >> +int b[258]; >> +int c[1024]; >> +static int top = 0; >> +void >> +push (int e) >> +{ >> + c[top++] = e; >> +} >> + >> +void >> +reset () >> +{ >> + top = 0; >> + __builtin_memset (c, 0, sizeof (c)); >> +} >> + >> +#define check(a, b) (a == b) >> + >> +int >> +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5) >> +{ >> + return check (c[0], a0) && check (c[1], a1) && check (c[2], a2) >> + && check (c[3], a3) && check (c[4], a4) && check (c[5], a5); >> +} >> + >> +int >> +main () >> +{ >> + __builtin_memcpy (b, a, sizeof (a)); >> + reset (); >> + if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0)) >> + abort (); >> + >> + reset (); >> + if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9) >> + || !check_c (c + 496, 254, 255, 255, 256, 0, 1)) >> + abort (); >> + >> + reset (); >> + if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0)) >> + abort (); >> + >> + reset (); >> + if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, >> 0, 0)) >> + abort (); >> + >> + reset (); >> + if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, >> 1)) >> + abort (); >> + >> + reset (); >> + if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4)) >> + abort (); >> + >> + reset (); >> + if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0)) >> + abort (); >> + >> + reset (); >> + if (bar_1 (a, b, 6, 6) != 5) >> + abort (); >> + >> + reset (); >> + if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, >> 256)) >> + abort (); >> + >> + reset (); >> + if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0)) >> + abort (); >> + >> + b[100] += 1; >> + reset (); >> + if (bar (a, b, 90, 110) != 100) >> + abort (); >> + >> + reset (); >> + if (bar (a, b, 110, 105) != 100) >> + abort (); >> + >> + reset (); >> + if (bar_1 (a, b, 90, 110) != 109) >> + abort (); >> + >> + reset (); >> + if (bar_1 (a, b, 2, 90) != 100) >> + abort (); >> + >> + foo (a, b, 99, 99); >> + a[99] = b[99] + 1; >> + for (int i = 0; i < 256; i++) >> + if (a[i] != b[i] + 1) >> + abort (); >> + >> + foo_1 (a, b, 99, 99); >> + a[99] = b[99] + 1; >> + for (int i = 0; i < 256; i++) >> + if (a[i] != b[i] + 1) >> + abort (); >> + >> + exit (0); >> +} >> diff --git a/gcc/testsuite/gcc.dg/loop-split3.c >> b/gcc/testsuite/gcc.dg/loop-split3.c >> new file mode 100644 >> index 00000000000..ec93ee8bd12 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/loop-split3.c >> @@ -0,0 +1,62 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ >> + >> +void >> +foo (int *a, int *b, unsigned l, unsigned n) >> +{ >> + while (--l != n) >> + a[l] = b[l] + 1; >> +} >> + >> +void >> +foo1 (int *a, int *b, unsigned l, unsigned n) >> +{ >> + while (l-- != n) >> + a[l] = b[l] + 1; >> +} >> + >> +unsigned >> +foo2 (char *a, char *b, unsigned l, unsigned n) >> +{ >> + while (--l != n) >> + if (a[l] != b[l]) >> + break; >> + >> + return l; >> +} >> + >> +unsigned >> +foo3 (char *a, char *b, unsigned l, unsigned n) >> +{ >> + while (l-- != n) >> + if (a[l] != b[l]) >> + break; >> + >> + return l; >> +} >> + >> +void >> +bar (); >> +void >> +foo4 (unsigned n, unsigned i) >> +{ >> + do >> + { >> + if (i == n) >> + return; >> + bar (); >> + --i; >> + } >> + while (1); >> +} >> + >> +unsigned >> +find_skip_diff (char *p, char *q, unsigned n, unsigned i) >> +{ >> + while (p[i] == q[i] && --i != n) >> + p--, q--; >> + >> + return i; >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */ >> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c >> index 3a09bbc39e5..e9f23b32186 100644 >> --- a/gcc/tree-ssa-loop-split.c >> +++ b/gcc/tree-ssa-loop-split.c >> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see >> #include "cfghooks.h" >> #include "gimple-fold.h" >> #include "gimplify-me.h" >> +#include "tree-ssa-loop-ivopts.h" >> >> /* This file implements two kinds of loop splitting. >> >> @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop) >> conditional). I.e. the second loop can now be entered either >> via the original entry or via NEW_E, so the entry values of LOOP2 >> phi nodes are either the original ones or those at the exit >> - of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting >> - this. The loops need to fulfill easy_exit_values(). */ >> + of LOOP1. Selecting the previous value instead next value as the >> + exit value of LOOP1 if USE_PREV is true. Insert new phi nodes in >> + LOOP2 pre-header reflecting this. The loops need to fulfill >> + easy_exit_values(). */ >> >> static void >> -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) >> +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, >> + bool use_prev = false) >> { >> basic_block rest = loop_preheader_edge (loop2)->src; >> gcc_assert (new_e->dest == rest); >> @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop >> *loop2, >> edge new_e) >> >> gphi * newphi = create_phi_node (new_init, rest); >> add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); >> - add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); >> + add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, >> new_e, >> + UNKNOWN_LOCATION); >> SET_USE (op, new_init); >> } >> } >> @@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop) >> return do_split; >> } >> >> +/* Check if the LOOP exit branch is like "if (idx != bound)", >> + Return the branch edge which exit loop, if wrap may happen on >> "idx". */ >> + >> +static edge >> +get_ne_cond_branch (struct loop *loop, tree *step) >> +{ >> + int i; >> + edge e; >> + >> + auto_vec<edge> edges = get_loop_exit_edges (loop); >> + FOR_EACH_VEC_ELT (edges, i, e) >> + { >> + basic_block bb = e->src; >> + >> + /* Check if exit at gcond. */ >> + gimple *last = last_stmt (bb); >> + if (!last || gimple_code (last) != GIMPLE_COND) >> + continue; >> + gcond *cond = as_a<gcond *> (last); > > gcond *cont = safe_dyn_cast <gcond *> (last_stmt (bb)); > if (!last) > continue; > > is shorter. Thanks. > >> + enum tree_code code = gimple_cond_code (cond); >> + if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE)) >> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) >> + continue; >> + >> + /* Check if bound is invarant. */ >> + tree idx = gimple_cond_lhs (cond); >> + tree bnd = gimple_cond_rhs (cond); >> + if (expr_invariant_in_loop_p (loop, idx)) >> + std::swap (idx, bnd); >> + else if (!expr_invariant_in_loop_p (loop, bnd)) >> + continue; >> + >> + /* Only unsigned type conversion could cause wrap. */ >> + tree type = TREE_TYPE (idx); >> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME >> + || !TYPE_UNSIGNED (type)) >> + continue; >> + >> + /* Avoid to split if bound is MAX/MIN val. */ >> + tree bound_type = TREE_TYPE (bnd); >> + if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) >> + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) >> + continue; >> + >> + /* Check if there is possible wrap. */ >> + class tree_niter_desc niter; >> + if (!number_of_iterations_exit (loop, e, &niter, false, false)) >> + continue; >> + if (niter.control.no_overflow) >> + return NULL; >> + if (niter.cmp != NE_EXPR) >> + continue; >> + if (!integer_onep (niter.control.step) >> + && !integer_minus_onep (niter.control.step)) >> + continue; >> + *step = niter.control.step; >> + >> + /* If exit edge is just before the empty latch, it is easy to >> link >> + the split loops: just jump from the exit edge of one loop to the >> + header of new loop. */ >> + if (single_pred_p (loop->latch) >> + && single_pred_edge (loop->latch)->src == bb >> + && empty_block_p (loop->latch)) >> + return e; >> + >> + /* If exit edge is at end of header, and header contains i++ or >> ++i, >> + only, it is simple to link the split loops: jump from the end of >> + one loop header to the new loop header, and use unchanged PHI >> result >> + of the first loop as the entry PHI value of the second loop. */ >> + if (bb == loop->header) >> + { >> + /* Only one phi. */ >> + gphi_iterator psi = gsi_start_phis (bb); >> + if (gsi_end_p (psi)) >> + continue; >> + gphi *phi = psi.phi (); >> + gsi_next (&psi); >> + if (!gsi_end_p (psi)) >> + continue; >> + >> + /* Check it is ++i or ++i */ >> + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); >> + tree prev = PHI_RESULT (phi); >> + if (idx != prev && idx != next) >> + continue; >> + >> + gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb >> (bb); >> + if (gsi_end_p (gsi)) >> + continue; >> + gimple *s1 = gsi_stmt (gsi); >> + if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next >> + || gimple_assign_rhs1 (s1) != prev) >> + continue; >> + >> + gsi_next_nondebug (&gsi); >> + if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond) >> + return e; >> + } >> + } >> + >> + return NULL; >> +} >> + >> +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and >> LT_EXPR. */ >> + >> +static bool >> +split_ne_loop (struct loop *loop, edge cond_e, tree step) >> +{ >> + initialize_original_copy_tables (); >> + >> + struct loop *loop2 = loop_version (loop, boolean_true_node, NULL, >> + profile_probability::always (), >> + profile_probability::never (), >> + profile_probability::always (), >> + profile_probability::always (), true); >> + >> + gcc_assert (loop2); >> + update_ssa (TODO_update_ssa); >> + >> + basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src); >> + free_original_copy_tables (); >> + >> + gcond *gc = as_a<gcond *> (last_stmt (cond_e->src)); >> + gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb)); >> + >> + /* Invert edges for gcond. */ >> + if (gimple_cond_code (gc) == EQ_EXPR) >> + { >> + auto invert_edge = [](basic_block bb) { >> + edge out = EDGE_SUCC (bb, 0); >> + edge in = EDGE_SUCC (bb, 1); >> + if (in->flags & EDGE_TRUE_VALUE) >> + std::swap (in, out); >> + in->flags |= EDGE_TRUE_VALUE; >> + in->flags &= ~EDGE_FALSE_VALUE; >> + out->flags |= EDGE_FALSE_VALUE; >> + out->flags &= ~EDGE_TRUE_VALUE; >> + }; >> + >> + invert_edge (gimple_bb (gc)); >> + invert_edge (gimple_bb (dup_gc)); >> + } >> + >> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ >> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); >> + if (tree_int_cst_sign_bit (step)) >> + inv = !inv; >> + enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR; >> + enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR; > > You could handle gimple_cond_code (gc) == EQ_EXPR via > > if (gimple_cond_code (gc) == EQ_EXPR) > { > first_loop_code = invert_tree_comparison (first_loop_code, false); > second_loop_code = invert_tree_comparison (second_loop_code, > false); > } > > that looks simpler than the lambda dance with inverting the edge flags. I did not use invert_tree_comparison in the code, because it would generate GE_EXPR/LE_EXPR which look like containing EQ_EXPR :) I would update patch to use invert_tree_comparison which is simpler and shorter. > >> + gimple_cond_set_code (gc, first_loop_code); >> + gimple_cond_set_code (dup_gc, second_loop_code); >> + >> + /* Link the exit cond edge to new loop. */ >> + gcond *break_cond = as_a<gcond *> (gimple_copy (gc)); >> + edge pred_e = single_pred_edge (loop->latch); >> + bool simple_loop >> + = pred_e && pred_e->src == cond_e->src && empty_block_p >> (loop->latch); >> + if (simple_loop) >> + gimple_cond_set_code (break_cond, second_loop_code); >> + else >> + gimple_cond_make_true (break_cond); >> + >> + basic_block break_bb = split_edge (cond_e); >> + gimple_stmt_iterator gsi = gsi_last_bb (break_bb); >> + gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); >> + >> + edge to_exit = single_succ_edge (break_bb); >> + edge to_new_loop = make_edge (break_bb, loop_preheader_edge >> (loop2)->src, >> 0); >> + to_new_loop->flags |= EDGE_TRUE_VALUE; >> + to_exit->flags |= EDGE_FALSE_VALUE; >> + to_exit->flags &= ~EDGE_FALLTHRU; >> + to_exit->probability = cond_e->probability; >> + to_new_loop->probability = to_exit->probability.invert (); >> + >> + update_ssa (TODO_update_ssa); > > I've re-organized the pass to perform a single TODO_update_ssa at the > very end, please do not update SSA form here, nor > >> + connect_loop_phis (loop, loop2, to_new_loop, !simple_loop); >> + >> + rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); > > re-write into loop-closed SSA. Thanks! > >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + fprintf (dump_file, ";; Loop split on wrap index.\n"); >> + >> + return true; >> +} >> + >> +/* Checks if LOOP contains a suitable NE_EXPR conditional block to >> split. >> +L_H: >> + if (i!=N) >> + S; >> + else >> + goto EXIT; >> + i++; >> + goto L_H; >> + >> +The "i!=N" is like "i>N || i<N", then it could be transformed to: >> + >> +L_H: >> + if (i>N) >> + S; >> + else >> + goto EXIT; >> + i++; >> + goto L_H; >> +L1_H: >> + if (i<N) >> + S; >> + else >> + goto EXIT; >> + i++; >> + goto L1_H; >> + >> +The loop with "i<N" is in favor of both GIMPLE and RTL passes. */ >> + >> +static bool >> +split_loop_on_ne_cond (class loop *loop) >> +{ >> + tree step; >> + edge branch_edge = get_ne_cond_branch (loop, &step); >> + if (!branch_edge) >> + return false; >> + >> + int num = 0; >> + basic_block *bbs = get_loop_body (loop); >> + for (unsigned i = 0; i < loop->num_nodes; i++) >> + num += estimate_num_insns_seq (bb_seq (bbs[i]), >> &eni_size_weights); > > Since the motivation is to make data-refs analyzable after the > transform > if there are no datarefs the transform only increases code-size. In > particular I would look for calls which will be not analyzable. Since > we're looking at each stmt above that could be embedded here. As said > earlier once num exceeds param_max_peeled_insns you can stop the above > loop. So heuristically I'd do sth like Right, if no following optimization enabled, it only increases code-size and increase branches overhead, and may not gain performance. > > for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p > (gsi); gsi_next (&gsi)) > { > gimple *stmt = gsi_stmt (gsi); > if (is_gimple_debug (stmt)) > continue; > if (gimple_has_side_effects (stmt)) > { > free (bbs); > return false; > } > num += estimate_num_insns (stmt, &eni_size_weights); > if (num > param_max_peeled_insns) > { > free (bbs); > return false; > } > if (gimple_vuse (stmt)) > any_dr = true; > } > if (!any_dr) > { > free (bbs); > return false; > } > > There's also still the issue that the transformed loop will fail > number of iteration analysis for the loop that iterates until > the IV wraps. That's a blocker for the acceptance of this transform. Understand, I saw you opened PR101145. Thanks for your comments! BR, Jiufu Guo. > > Richard.
diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc b/gcc/testsuite/g++.dg/vect/pr98064.cc index 74043ce7725..dcb2985d05a 100644 --- a/gcc/testsuite/g++.dg/vect/pr98064.cc +++ b/gcc/testsuite/g++.dg/vect/pr98064.cc @@ -1,5 +1,7 @@ // { dg-do compile } -// { dg-additional-options "-O3" } +// { dg-additional-options "-O3 -Wno-stringop-overflow" } +/* There is warning message when "short g = var_8; g; g++" + is optimized/analyzed as string operation,e.g. memset. */ const long long &min(const long long &__a, long long &__b) { if (__b < __a) diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c new file mode 100644 index 00000000000..dd2d03a7b96 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split1.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ + +void +foo (int *a, int *b, unsigned l, unsigned n) +{ + while (++l != n) + a[l] = b[l] + 1; +} +void +foo_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (++l != n) + a[l] = b[l] + 1; +} + +void +foo1 (int *a, int *b, unsigned l, unsigned n) +{ + while (l++ != n) + a[l] = b[l] + 1; +} + +/* No wrap. */ +void +foo1_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (l++ != n) + a[l] = b[l] + 1; +} + +unsigned +foo2 (char *a, char *b, unsigned l, unsigned n) +{ + while (++l != n) + if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo2_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (++l != n) + if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo3 (char *a, char *b, unsigned l, unsigned n) +{ + while (l++ != n) + if (a[l] != b[l]) + break; + + return l; +} + +/* No wrap. */ +unsigned +foo3_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (l++ != n) + if (a[l] != b[l]) + break; + + return l; +} + +void +bar (); +void +foo4 (unsigned n, unsigned i) +{ + do + { + if (i == n) + return; + bar (); + ++i; + } + while (1); +} + +unsigned +find_skip_diff (char *p, char *q, unsigned n, unsigned i) +{ + while (p[i] == q[i] && ++i != n) + p++, q++; + + return i; +} + +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-split2.c b/gcc/testsuite/gcc.dg/loop-split2.c new file mode 100644 index 00000000000..0d3fded3f61 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split2.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +extern void abort (void); +extern void exit (int); + +#define NI __attribute__ ((noinline)) + +void NI +foo (int *a, int *b, unsigned char l, unsigned char n) +{ + while (++l != n) + a[l] = b[l] + 1; +} + +unsigned NI +bar (int *a, int *b, unsigned char l, unsigned char n) +{ + while (l++ != n) + if (a[l] != b[l]) + break; + + return l; +} + +int a[258]; +int b[258]; + +int main() +{ + __builtin_memcpy (b, a, sizeof (a)); + + if (bar (a, b, 3, 8) != 9) + abort (); + + if (bar (a, b, 8, 3) != 4) + abort (); + + b[100] += 1; + if (bar (a, b, 90, 110) != 100) + abort (); + + if (bar (a, b, 110, 105) != 100) + abort (); + + foo (a, b, 99, 99); + a[99] = b[99] + 1; + for (int i = 0; i < 256; i++) + if (a[i] != b[i] + 1) + abort(); + + exit (0); +} + diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 3a09bbc39e5..9c0de66e288 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "cfghooks.h" #include "gimple-fold.h" #include "gimplify-me.h" +#include "tree-ssa-loop-ivopts.h" /* This file implements two kinds of loop splitting. @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop) conditional). I.e. the second loop can now be entered either via the original entry or via NEW_E, so the entry values of LOOP2 phi nodes are either the original ones or those at the exit - of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting - this. The loops need to fulfill easy_exit_values(). */ + of LOOP1. Selecting the previous value instead next value as the + exit value of LOOP1 if USE_PREV is true. Insert new phi nodes in + LOOP2 pre-header reflecting this. The loops need to fulfill + easy_exit_values(). */ static void -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, + bool use_prev = false) { basic_block rest = loop_preheader_edge (loop2)->src; gcc_assert (new_e->dest == rest); @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) gphi * newphi = create_phi_node (new_init, rest); add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); - add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); + add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e, + UNKNOWN_LOCATION); SET_USE (op, new_init); } } @@ -1593,6 +1598,241 @@ split_loop_on_cond (struct loop *loop) return do_split; } +/* Check if the LOOP exit branch is like "if (idx != bound)", + Return the branch edge which exit loop, if wrap may happen on "idx". */ + +static edge +get_ne_cond_branch (struct loop *loop) +{ + int i; + edge e; + + auto_vec<edge> edges = get_loop_exit_edges (loop); + FOR_EACH_VEC_ELT (edges, i, e) + { + basic_block bb = e->src; + + /* Check if exit at gcond. */ + gimple *last = last_stmt (bb); + if (!last || gimple_code (last) != GIMPLE_COND) + continue; + gcond *cond = as_a<gcond *> (last); + enum tree_code code = gimple_cond_code (cond); + if (!(code == NE_EXPR + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) + continue; + + /* Check if bound is invarant. */ + tree idx = gimple_cond_lhs (cond); + tree bnd = gimple_cond_rhs (cond); + if (expr_invariant_in_loop_p (loop, idx)) + std::swap (idx, bnd); + else if (!expr_invariant_in_loop_p (loop, bnd)) + continue; + + /* Only unsigned type conversion could cause wrap. */ + tree type = TREE_TYPE (idx); + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME + || !TYPE_UNSIGNED (type)) + continue; + + /* Avoid to split if bound is MAX/MIN val. */ + tree bound_type = TREE_TYPE (bnd); + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))) + continue; + + /* Check if there is possible wrap. */ + class tree_niter_desc niter; + if (!number_of_iterations_exit (loop, e, &niter, false, false)) + continue; + if (niter.control.no_overflow) + return NULL; + if (niter.cmp != NE_EXPR) + continue; + + /* If exit edge is just before the empty latch, it is easy to link + the split loops: just jump from the exit edge of one loop to the + header of new loop. */ + if (single_pred_p (loop->latch) + && single_pred_edge (loop->latch)->src == bb + && empty_block_p (loop->latch)) + return e; + + /* If exit edge is at end of header, and header contains i++ or ++i, + only, it is simple to link the split loops: jump from the end of + one loop header to the new loop header, and use unchanged PHI + result of first loop as the entry PHI value of the second loop. */ + if (bb == loop->header) + { + /* Only one phi. */ + gphi_iterator psi = gsi_start_phis (bb); + if (gsi_end_p (psi)) + continue; + gphi *phi = psi.phi (); + gsi_next (&psi); + if (!gsi_end_p (psi)) + continue; + + /* Check it is ++i or ++i */ + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); + tree prev = PHI_RESULT (phi); + if (idx != prev && idx != next) + continue; + + gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb); + if (gsi_end_p (gsi)) + continue; + gimple *s1 = gsi_stmt (gsi); + if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next + || gimple_assign_rhs1 (s1) != prev) + continue; + + gsi_next_nondebug (&gsi); + if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond) + return e; + } + } + + return NULL; +} + +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR. */ + +static bool +split_ne_loop (struct loop *loop, edge cond_e) +{ + initialize_original_copy_tables (); + + struct loop *loop2 = loop_version (loop, boolean_true_node, NULL, + profile_probability::always (), + profile_probability::never (), + profile_probability::always (), + profile_probability::always (), true); + + gcc_assert (loop2); + update_ssa (TODO_update_ssa); + + basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src); + free_original_copy_tables (); + + gcond *gc = as_a<gcond *> (last_stmt (cond_e->src)); + gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb)); + + /* Invert edges for gcond. */ + if (gimple_cond_code (gc) == EQ_EXPR) + { + auto invert_edge = [](basic_block bb) { + edge out = EDGE_SUCC (bb, 0); + edge in = EDGE_SUCC (bb, 1); + if (in->flags & EDGE_TRUE_VALUE) + std::swap (in, out); + in->flags |= EDGE_TRUE_VALUE; + in->flags &= ~EDGE_FALSE_VALUE; + out->flags |= EDGE_FALSE_VALUE; + out->flags &= ~EDGE_TRUE_VALUE; + }; + + invert_edge (gimple_bb (gc)); + invert_edge (gimple_bb (dup_gc)); + } + + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; + + gimple_cond_set_code (gc, up_code); + gimple_cond_set_code (dup_gc, down_code); + + /* Link the exit cond edge to new loop. */ + gcond *break_cond = as_a<gcond *> (gimple_copy (gc)); + edge pred_e = single_pred_edge (loop->latch); + bool simple_loop + = pred_e && pred_e->src == cond_e->src && empty_block_p (loop->latch); + if (simple_loop) + gimple_cond_set_code (break_cond, down_code); + else + gimple_cond_make_true (break_cond); + + basic_block break_bb = split_edge (cond_e); + gimple_stmt_iterator gsi = gsi_last_bb (break_bb); + gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); + + edge to_exit = single_succ_edge (break_bb); + edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0); + to_new_loop->flags |= EDGE_TRUE_VALUE; + to_exit->flags |= EDGE_FALSE_VALUE; + to_exit->flags &= ~EDGE_FALLTHRU; + to_exit->probability = cond_e->probability; + to_new_loop->probability = to_exit->probability.invert (); + + update_ssa (TODO_update_ssa); + + connect_loop_phis (loop, loop2, to_new_loop, !simple_loop); + + rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Loop split on wrap index.\n"); + + return true; +} + +/* Checks if LOOP contains a suitable NE_EXPR conditional block to split. +L_H: + if (i!=N) + S; + else + goto EXIT; + i++; + goto L_H; + +The "i!=N" is like "i>N || i<N", then it could be transform to: + +L_H: + if (i>N) + S; + else + goto EXIT; + i++; + goto L_H; +L1_H: + if (i<N) + S; + else + goto EXIT; + i++; + goto L1_H; + +The loop with "i<N" is in favor both GIMPLE and RTL passes. */ + +static bool +split_loop_on_ne_cond (class loop *loop) +{ + int num = 0; + basic_block *bbs = get_loop_body (loop); + + if (!can_copy_bbs_p (bbs, loop->num_nodes)) + { + free (bbs); + return false; + } + + for (unsigned i = 0; i < loop->num_nodes; i++) + num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); + free (bbs); + + if (num > param_max_peeled_insns) + return false; + + edge branch_edge = get_ne_cond_branch (loop); + if (branch_edge && split_ne_loop (loop, branch_edge)) + return true; + + return false; +} + /* Main entry point. Perform loop splitting on all suitable loops. */ static unsigned int @@ -1622,7 +1862,8 @@ tree_ssa_split_loops (void) if (optimize_loop_for_size_p (loop)) continue; - if (split_loop (loop) || split_loop_on_cond (loop)) + if (split_loop (loop) || split_loop_on_cond (loop) + || split_loop_on_ne_cond (loop)) { /* Mark our containing loop as having had some split inner loops. */ loop_outer (loop)->aux = loop;