Message ID | 9f0c505cd216d45f06b63cd94152fc8a@imap.linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [RFC] New idea to split loop based on no-wrap conditions | expand |
On Mon, 21 Jun 2021, guojiufu wrote: > On 2021-06-21 14:19, guojiufu via Gcc-patches wrote: > > On 2021-06-09 19:18, guojiufu wrote: > >> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: > >>> On 2021-06-08 18:13, Richard Biener wrote: > >>>> On Fri, 4 Jun 2021, Jiufu Guo wrote: > >>>> > >>> cut... > > cut... > >> > > Besides the method in the previous mails, > I’m thinking of another way to split loops: > > foo (int *a, int *b, unsigned k, unsigned n) > { > while (++k != n) > a[k] = b[k] + 1; > } > > We may split it into: > if (k<n) > { > while (++k < n) //loop1 > a[k] = b[k] + 1; > } > else > { > while (++k != n) //loop2 > a[k] = b[k] + 1; > } > > In most cases, loop1 would be hit, the overhead of this method is only > checking “if (k<n)” > which would be smaller than the previous method. That would be your original approach of versioning the loop. I think I suggested that for this scalar evolution and dataref analysis should be enhanced to build up conditions under which IV evolutions are affine (non-wrapping) and the versioning code in actual transforms should then do the appropriate versioning (like the vectorizer already does for niter analysis ->assumptions for example). Richard. > And this method would be more easy to extend to nest loops like: > unsigned int l_n = 0; > unsigned int l_m = 0; > unsigned int l_k = 0; > for (l_n = 0; l_n != n; l_n++) > for (l_k = 0; l_k != k; l_k++) > for (l_m = 0; l_m != m; l_m++) > xxx; > > Do you think this method is more valuable to implement? > Below is a quick patch. This patch does not support nest loops yet. > > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > index 3a09bbc39e5..c9d161565e4 100644 > --- a/gcc/tree-ssa-loop-split.c > +++ b/gcc/tree-ssa-loop-split.c > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see > #include "cfghooks.h" > #include "gimple-fold.h" > #include "gimplify-me.h" > +#include "tree-ssa-loop-ivopts.h" > > /* This file implements two kinds of loop splitting. > > @@ -1593,6 +1594,468 @@ split_loop_on_cond (struct loop *loop) > return do_split; > } > > +/* Filter out type conversions on IDX. > + Store the shortest type during conversion to SMALL_TYPE. > + Store the longest type during conversion to LARGE_TYPE. */ > + > +static gimple * > +filter_conversions (class loop *loop, tree idx, tree *small_type = NULL, > + tree *large_type = NULL) > +{ > + gcc_assert (TREE_CODE (idx) == SSA_NAME); > + gimple *stmt = SSA_NAME_DEF_STMT (idx); > + while (is_gimple_assign (stmt) > + && flow_bb_inside_loop_p (loop, gimple_bb (stmt))) > + { > + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))) > + { > + idx = gimple_assign_rhs1 (stmt); > + if (small_type) > + { > + tree type = TREE_TYPE (idx); > + if (TYPE_PRECISION (*small_type) > TYPE_PRECISION (type) > + || (TYPE_PRECISION (*small_type) == TYPE_PRECISION (type) > + && TYPE_UNSIGNED (*small_type) && !TYPE_UNSIGNED > (type))) > + *small_type = type; > + } > + if (large_type) > + { > + tree type = TREE_TYPE (idx); > + if (TYPE_PRECISION (*large_type) < TYPE_PRECISION (type) > + || (TYPE_PRECISION (*large_type) == TYPE_PRECISION (type) > + && !TYPE_UNSIGNED (*large_type) && TYPE_UNSIGNED > (type))) > + *large_type = type; > + } > + } > + else > + break; > + > + if (TREE_CODE (idx) != SSA_NAME) > + break; > + stmt = SSA_NAME_DEF_STMT (idx); > + } > + return stmt; > +} > + > +/* Collection of loop index related elements. */ > +struct idx_elements > +{ > + gcond *gc; > + gphi *phi; > + gimple *inc_stmt; > + tree idx; > + tree bnd; > + tree step; > + tree large_type; > + tree small_type; > + bool cmp_on_next; > +}; > + > +/* Analyze and get the idx related elements: bnd, > + phi, increase stmt from exit edge E, etc. > + > + i = phi (b, n) > + ... > + n0 = ik + 1 > + n1 = (type)n0 > + ... > + if (i != bnd) or if (n != bnd) > + ... > + n = ()nl > + > + IDX is the i' or n'. */ > + > +bool > +analyze_idx_elements (class loop *loop, edge e, idx_elements &data) > +{ > + /* Avoid complicated edge. */ > + if (e->flags & EDGE_FAKE) > + return false; > + if (e->src != loop->header && e->src != single_pred (loop->latch)) > + return false; > + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->src)) > + return false; > + > + /* Check gcond. */ > + gimple *last = last_stmt (e->src); > + if (!last || gimple_code (last) != GIMPLE_COND) > + return false; > + > + /* Get idx and bnd from gcond. */ > + gcond *gc = as_a<gcond *> (last); > + tree bnd = gimple_cond_rhs (gc); > + tree idx = gimple_cond_lhs (gc); > + if (expr_invariant_in_loop_p (loop, idx)) > + std::swap (idx, bnd); > + else if (!expr_invariant_in_loop_p (loop, bnd)) > + return false; > + if (TREE_CODE (idx) != SSA_NAME) > + return false; > + > + gimple *inc_stmt = NULL; > + bool cmp_next = false; > + tree small_type = TREE_TYPE (idx); > + tree large_type = small_type; > + gimple *stmt = filter_conversions (loop, idx, &small_type, &large_type); > + /* The idx on gcond is not PHI, it would be next. */ > + if (is_gimple_assign (stmt)) > + { > + tree rhs = gimple_assign_rhs1 (stmt); > + if (TREE_CODE (rhs) != SSA_NAME) > + return false; > + > + cmp_next = true; > + inc_stmt = stmt; > + stmt = filter_conversions (loop, rhs, &small_type, &large_type); > + } > + > + /* Get phi and next. */ > + if (gimple_code (stmt) != GIMPLE_PHI || gimple_bb (stmt) != loop->header) > + return false; > + gphi *phi = as_a<gphi *> (stmt); > + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); > + if (TREE_CODE (next) != SSA_NAME) > + return false; > + > + /* The define of next should be the increasement stmt. */ > + stmt = filter_conversions (loop, next, &small_type, &large_type); > + if (inc_stmt != NULL && inc_stmt != stmt) > + return false; > + if (!is_gimple_assign (stmt) > + || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) > + return false; > + tree_code rhs_code = gimple_assign_rhs_code (stmt); > + if (rhs_code != PLUS_EXPR) > + return false; > + tree step = gimple_assign_rhs2 (stmt); > + if (!integer_minus_onep (step) && !integer_onep (step)) > + return false; > + inc_stmt = stmt; > + tree prev = gimple_assign_rhs1 (stmt); > + if (TREE_CODE (prev) != SSA_NAME) > + return false; > + stmt = filter_conversions (loop, prev, &small_type, &large_type); > + if (stmt != phi) > + return false; > + > + data.gc = gc; > + data.phi = phi; > + data.idx = idx; > + data.bnd = bnd; > + data.step = step; > + data.large_type = large_type; > + data.small_type = small_type; > + data.inc_stmt = inc_stmt; > + data.cmp_on_next = cmp_next; > + > + return true; > +} > + > +/* Check if the loop is possible to wrap at index. > + Return the assumption under which the wrap will not happen. > + Return NULL_TREE, if wrap will not happen. */ > + > +static tree > +get_wrap_assumption (class loop *loop, edge *exit, idx_elements &data) > +{ > + int i; > + edge e; > + if (!single_pred_p (loop->latch) || !empty_block_p (loop->latch)) > + return NULL_TREE; > + > + auto_vec<edge> edges = get_loop_exit_edges (loop); > + FOR_EACH_VEC_ELT (edges, i, e) > + { > + if (!analyze_idx_elements (loop, e, data)) > + continue; > + > + /* Check if bound is MAX/MIN val of a integral type. */ > + tree bnd = data.bnd; > + tree bnd_type = TREE_TYPE (bnd); > + if (!INTEGRAL_TYPE_P (bnd_type) || !TYPE_UNSIGNED (bnd_type)) > + continue; > + if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bnd_type)) > + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bnd_type))) > + continue; > + > + /* Check if it is "idx != bnd" or "idx < bnd". */ > + gcond *gc = data.gc; > + enum tree_code code = gimple_cond_code (gc); > + if (bnd == gimple_cond_lhs (gc)) > + code = swap_tree_comparison (code); > + if ((e->flags & EDGE_TRUE_VALUE) && code == EQ_EXPR) > + code = NE_EXPR; > + if (code != NE_EXPR && code != LT_EXPR && code != GT_EXPR) > + continue; > + > + /* Check if idx is iv with base and step. */ > + affine_iv iv; > + tree iv_niters = NULL_TREE; > + tree idx = PHI_RESULT (data.phi); > + if (!simple_iv_with_niters (loop, loop_containing_stmt (gc), idx, &iv, > + &iv_niters, false)) > + continue; > + if (!iv.base || !iv.step || !tree_int_cst_equal (iv.step, data.step)) > + continue; > + > + bool is_negative = tree_int_cst_sign_bit (iv.step); > + enum tree_code expect_code = is_negative ? GT_EXPR : LT_EXPR; > + if (code != NE_EXPR && code != expect_code) > + continue; > + > + /* Update the type for base, bound and the max value of boundary. */ > + tree base = iv.base; > + tree small_type = data.small_type; > + tree large_type = data.large_type; > + tree max_min_bnd = is_negative ? TYPE_MIN_VALUE (small_type) > + : TYPE_MAX_VALUE (small_type); > + if (large_type != TREE_TYPE (base)) > + base = fold_convert (large_type, base); > + if (large_type != TREE_TYPE (bnd)) > + bnd = fold_convert (large_type, bnd); > + if (large_type != small_type) > + max_min_bnd = fold_convert (large_type, max_min_bnd); > + > + /* There is no wrap if bnd <= max value && base <= bnd. */ > + enum tree_code expect_code1 = is_negative ? GE_EXPR : LE_EXPR; > + tree no_wrap > + = fold_build2 (expect_code1, boolean_type_node, bnd, max_min_bnd); > + no_wrap > + = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, no_wrap, > + fold_build2 (expect_code1, boolean_type_node, base, > bnd)); > + > + if (integer_zerop (no_wrap)) > + continue; > + > + *exit = e; > + return no_wrap; > + } > + > + return NULL_TREE; > +} > + > +/* Update the idx and bnd with a suitable type for no wrap/oveflow loop. > + Suitable type would be the largest type of the type conversion which > + occur on index, if the largest type is unsigned, using sizetype. */ > + > +static bool > +update_idx_bnd_type (class loop *loop, idx_elements &data) > +{ > + gphi *phi = data.phi; > + tree type = TREE_TYPE (PHI_RESULT (phi)); > + tree suitable_type = data.large_type; > + if (TYPE_UNSIGNED (suitable_type)) > + { > + if (TYPE_PRECISION (suitable_type) == TYPE_PRECISION (sizetype)) > + return false; > + suitable_type = sizetype; > + } > + > + /* New base and new bound. */ > + tree bnd = data.bnd; > + edge pre_e = loop_preheader_edge (loop); > + tree base = PHI_ARG_DEF_FROM_EDGE (phi, pre_e); > + tree new_base = fold_convert (suitable_type, base); > + tree new_bnd = fold_convert (suitable_type, bnd); > + gimple_seq seq = NULL; > + new_base = force_gimple_operand (new_base, &seq, true, NULL_TREE); > + if (seq) > + gsi_insert_seq_on_edge_immediate (pre_e, seq); > + seq = NULL; > + new_bnd = force_gimple_operand (new_bnd, &seq, true, NULL_TREE); > + if (seq) > + gsi_insert_seq_on_edge_immediate (pre_e, seq); > + > + /* new_i = phi (new_b, new_n) > + new_n = new_i + 1 */ > + edge latch_e = loop_latch_edge (loop); > + const char *name = "idx"; > + tree new_idx = make_temp_ssa_name (suitable_type, NULL, name); > + tree new_next = make_temp_ssa_name (suitable_type, NULL, name); > + gphi *newphi = create_phi_node (new_idx, loop->header); > + add_phi_arg (newphi, new_base, pre_e, UNKNOWN_LOCATION); > + add_phi_arg (newphi, new_next, latch_e, UNKNOWN_LOCATION); > + > + /* New increase stmt. */ > + gimple *inc_stmt = data.inc_stmt; > + tree step = gimple_assign_rhs2 (inc_stmt); > + if (tree_int_cst_sign_bit(step)) > + { > + wide_int v = wi::to_wide (step); > + v = wide_int::from (v, TYPE_PRECISION (suitable_type), > + SIGNED); > + > + step = wide_int_to_tree (suitable_type, v); > + } > + else > + step = fold_convert (suitable_type, step); > + new_idx = PHI_RESULT (newphi); > + tree_code inc_code = gimple_assign_rhs_code (inc_stmt); > + gimple *new_inc = gimple_build_assign (new_next, inc_code, new_idx, step); > + gimple_stmt_iterator gsi = gsi_for_stmt (inc_stmt); > + gsi_insert_before (&gsi, new_inc, GSI_SAME_STMT); > + > + /* Update gcond. */ > + gcond *gc = data.gc; > + bool inv = bnd == gimple_cond_lhs (gc); > + tree cmp_idx = data.cmp_on_next ? new_next : new_idx; > + gimple_cond_set_lhs (gc, inv ? new_bnd : cmp_idx); > + gimple_cond_set_rhs (gc, inv ? cmp_idx : new_bnd); > + update_stmt (gc); > + > + /* next = (next type)new_next > + And remove next = prev + 1. */ > + tree next = gimple_assign_lhs (inc_stmt); > + type = TREE_TYPE (next); > + gimple *stmt = gimple_build_assign (next, fold_convert (type, new_next)); > + gsi = gsi_for_stmt (inc_stmt); > + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); > + > + gsi = gsi_for_stmt (inc_stmt); > + gsi_remove (&gsi, true); > + > + /* prev = (prev type)new_prev > + And remove prev = phi. */ > + tree idx = PHI_RESULT (phi); > + type = TREE_TYPE (idx); > + stmt = gimple_build_assign (idx, fold_convert (type, new_idx)); > + gsi = gsi_after_labels (loop->header); > + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); > + > + gsi = gsi_for_stmt (phi); > + gsi_remove (&gsi, true); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, ";; Loop index type is updated to use faster > type.\n"); > + > + return true; > +} > + > +/* Split out a new loop which would not wrap, > + under the guard that NO_WRAP_COND will not be true. */ > + > +static bool > +split_wrap_boundary (class loop *loop, edge e, tree no_wrap_cond, > + bool is_negative_step) > +{ > + /* Convert the condition into a suitable gcond. */ > + gimple_seq stmts = NULL; > + no_wrap_cond = force_gimple_operand_1 (no_wrap_cond, &stmts, > + is_gimple_condexpr, NULL_TREE); > + > + /* Version the loop. */ > + initialize_original_copy_tables (); > + basic_block cond_bb; > + loop_version (loop, no_wrap_cond, &cond_bb, > + profile_probability::very_likely (), > + profile_probability::very_unlikely (), > + profile_probability::very_likely (), > + profile_probability::very_unlikely (), true); > + free_original_copy_tables (); > + > + /* Insert the statements that feed COND. */ > + if (stmts) > + { > + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); > + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); > + } > + > + /* Update gcond code. */ > + gcond *gc = as_a<gcond *> (last_stmt (e->src)); > + enum tree_code code = gimple_cond_code (gc); > + enum tree_code new_code; > + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); > + if (is_negative_step) > + new_code = inv ? LT_EXPR : GT_EXPR; > + else > + new_code = inv ? GT_EXPR : LT_EXPR; > + if (code == NE_EXPR || code == EQ_EXPR) > + gimple_cond_set_code (gc, new_code); > + > + /* Swap edge. */ > + if (code == EQ_EXPR) > + { > + edge out = EDGE_SUCC (e->src, 0); > + edge in = EDGE_SUCC (e->src, 1); > + if (in->flags & EDGE_TRUE_VALUE) > + std::swap (in, out); > + in->flags |= EDGE_TRUE_VALUE; > + in->flags &= ~EDGE_FALSE_VALUE; > + out->flags |= EDGE_FALSE_VALUE; > + out->flags &= ~EDGE_TRUE_VALUE; > + } > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + fprintf (dump_file, ";; Loop split on wrap index.\n"); > + > + return true; > +} > + > +/* Split loop if there is possible wrap. > + For example: > + transform > + > + void > + foo (int *a, int *b, unsigned l, unsigned u_n) > + { > + while (++l != u_n) > + a[l] = b[l] + 1; > + } > + > + to: > + if (l < u_n) > + { > + int li = l; > + int n = u_n; > + while (++li < n) > + a[li] = b[li] + 1; > + l = li; > + } > + else > + while (++l != n) > + a[l] = b[l] + 1; > + */ > +static bool > +split_loop_on_wrap (class loop *loop) > +{ > + edge e; > + idx_elements data; > + tree no_wrap = get_wrap_assumption (loop, &e, data); > + > + if (!no_wrap) > + return false; > + > + int num = 0; > + basic_block *bbs = get_loop_body (loop); > + for (unsigned i = 0; i < loop->num_nodes; i++) > + num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); > + > + if (num > param_max_peeled_insns) > + { > + free (bbs); > + return false; > + } > + > + if (!can_copy_bbs_p (bbs, loop->num_nodes)) > + { > + free (bbs); > + return false; > + } > + > + free (bbs); > + > + if (integer_onep (no_wrap)) > + return update_idx_bnd_type (loop, data); > + > + if (split_wrap_boundary (loop, e, no_wrap, tree_int_cst_sign_bit > (data.step))) > + { > + update_idx_bnd_type (loop, data); > + return true; > + } > + > + return false; > +} > + > /* Main entry point. Perform loop splitting on all suitable loops. */ > > static unsigned int > @@ -1622,7 +2085,8 @@ tree_ssa_split_loops (void) > if (optimize_loop_for_size_p (loop)) > continue; > > - if (split_loop (loop) || split_loop_on_cond (loop)) > + if (split_loop (loop) || split_loop_on_cond (loop) > + || split_loop_on_wrap (loop)) > { > /* Mark our containing loop as having had some split inner loops. > */ > loop_outer (loop)->aux = loop; > > > >> Here is the updated patch, thanks for your time! > > > > Updates: > > . Enhance code to support negative step. > > . Check step +-1 to make sure it hits loop condition != > > . Enhance runtime cases to check more boundary cases and run order cases. > > . Refine for compiling time: check loop num of insns and can_copy_bbs_p > > later > > > >> > >> diff --git a/gcc/testsuite/gcc.dg/loop-split1.c > >> b/gcc/testsuite/gcc.dg/loop-split1.c > >> new file mode 100644 > >> index 00000000000..dd2d03a7b96 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/loop-split1.c > >> @@ -0,0 +1,101 @@ > >> +/* { dg-do compile } */ > >> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > >> + > >> +void > >> +foo (int *a, int *b, unsigned l, unsigned n) > >> +{ > >> + while (++l != n) > >> + a[l] = b[l] + 1; > >> +} > >> +void > >> +foo_1 (int *a, int *b, unsigned n) > >> +{ > >> + unsigned l = 0; > >> + while (++l != n) > >> + a[l] = b[l] + 1; > >> +} > >> + > >> +void > >> +foo1 (int *a, int *b, unsigned l, unsigned n) > >> +{ > >> + while (l++ != n) > >> + a[l] = b[l] + 1; > >> +} > >> + > >> +/* No wrap. */ > >> +void > >> +foo1_1 (int *a, int *b, unsigned n) > >> +{ > >> + unsigned l = 0; > >> + while (l++ != n) > >> + a[l] = b[l] + 1; > >> +} > >> + > >> +unsigned > >> +foo2 (char *a, char *b, unsigned l, unsigned n) > >> +{ > >> + while (++l != n) > >> + if (a[l] != b[l]) > >> + break; > >> + > >> + return l; > >> +} > >> + > >> +unsigned > >> +foo2_1 (char *a, char *b, unsigned l, unsigned n) > >> +{ > >> + l = 0; > >> + while (++l != n) > >> + if (a[l] != b[l]) > >> + break; > >> + > >> + return l; > >> +} > >> + > >> +unsigned > >> +foo3 (char *a, char *b, unsigned l, unsigned n) > >> +{ > >> + while (l++ != n) > >> + if (a[l] != b[l]) > >> + break; > >> + > >> + return l; > >> +} > >> + > >> +/* No wrap. */ > >> +unsigned > >> +foo3_1 (char *a, char *b, unsigned l, unsigned n) > >> +{ > >> + l = 0; > >> + while (l++ != n) > >> + if (a[l] != b[l]) > >> + break; > >> + > >> + return l; > >> +} > >> + > >> +void > >> +bar (); > >> +void > >> +foo4 (unsigned n, unsigned i) > >> +{ > >> + do > >> + { > >> + if (i == n) > >> + return; > >> + bar (); > >> + ++i; > >> + } > >> + while (1); > >> +} > >> + > >> +unsigned > >> +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > >> +{ > >> + while (p[i] == q[i] && ++i != n) > >> + p++, q++; > >> + > >> + return i; > >> +} > >> + > >> +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ > >> diff --git a/gcc/testsuite/gcc.dg/loop-split2.c > >> b/gcc/testsuite/gcc.dg/loop-split2.c > >> new file mode 100644 > >> index 00000000000..56377e2f2f5 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/loop-split2.c > >> @@ -0,0 +1,155 @@ > >> +/* { dg-do run } */ > >> +/* { dg-options "-O3" } */ > >> + > >> +extern void > >> +abort (void); > >> +extern void > >> +exit (int); > >> +void > >> +push (int); > >> + > >> +#define NI __attribute__ ((noinline)) > >> + > >> +void NI > >> +foo (int *a, int *b, unsigned char l, unsigned char n) > >> +{ > >> + while (++l != n) > >> + a[l] = b[l] + 1; > >> +} > >> + > >> +unsigned NI > >> +bar (int *a, int *b, unsigned char l, unsigned char n) > >> +{ > >> + while (l++ != n) > >> + { > >> + push (l); > >> + if (a[l] != b[l]) > >> + break; > >> + push (l + 1); > >> + } > >> + return l; > >> +} > >> + > >> +void NI > >> +foo_1 (int *a, int *b, unsigned char l, unsigned char n) > >> +{ > >> + while (--l != n) > >> + a[l] = b[l] + 1; > >> +} > >> + > >> +unsigned NI > >> +bar_1 (int *a, int *b, unsigned char l, unsigned char n) > >> +{ > >> + while (l-- != n) > >> + { > >> + push (l); > >> + if (a[l] != b[l]) > >> + break; > >> + push (l + 1); > >> + } > >> + > >> + return l; > >> +} > >> + > >> +int a[258]; > >> +int b[258]; > >> +int c[1024]; > >> +static int top = 0; > >> +void > >> +push (int e) > >> +{ > >> + c[top++] = e; > >> +} > >> + > >> +void > >> +reset () > >> +{ > >> + top = 0; > >> + __builtin_memset (c, 0, sizeof (c)); > >> +} > >> + > >> +#define check(a, b) (a == b) > >> + > >> +int > >> +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5) > >> +{ > >> + return check (c[0], a0) && check (c[1], a1) && check (c[2], a2) > >> + && check (c[3], a3) && check (c[4], a4) && check (c[5], a5); > >> +} > >> + > >> +int > >> +main () > >> +{ > >> + __builtin_memcpy (b, a, sizeof (a)); > >> + reset (); > >> + if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9) > >> + || !check_c (c + 496, 254, 255, 255, 256, 0, 1)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 0, 0)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, 1)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar_1 (a, b, 6, 6) != 5) > >> + abort (); > >> + > >> + reset (); > >> + if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, 256)) > >> + abort (); > >> + > >> + reset (); > >> + if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0)) > >> + abort (); > >> + > >> + b[100] += 1; > >> + reset (); > >> + if (bar (a, b, 90, 110) != 100) > >> + abort (); > >> + > >> + reset (); > >> + if (bar (a, b, 110, 105) != 100) > >> + abort (); > >> + > >> + reset (); > >> + if (bar_1 (a, b, 90, 110) != 109) > >> + abort (); > >> + > >> + reset (); > >> + if (bar_1 (a, b, 2, 90) != 100) > >> + abort (); > >> + > >> + foo (a, b, 99, 99); > >> + a[99] = b[99] + 1; > >> + for (int i = 0; i < 256; i++) > >> + if (a[i] != b[i] + 1) > >> + abort (); > >> + > >> + foo_1 (a, b, 99, 99); > >> + a[99] = b[99] + 1; > >> + for (int i = 0; i < 256; i++) > >> + if (a[i] != b[i] + 1) > >> + abort (); > >> + > >> + exit (0); > >> +} > >> diff --git a/gcc/testsuite/gcc.dg/loop-split3.c > >> b/gcc/testsuite/gcc.dg/loop-split3.c > >> new file mode 100644 > >> index 00000000000..ec93ee8bd12 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/loop-split3.c > >> @@ -0,0 +1,62 @@ > >> +/* { dg-do compile } */ > >> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > >> + > >> +void > >> +foo (int *a, int *b, unsigned l, unsigned n) > >> +{ > >> + while (--l != n) > >> + a[l] = b[l] + 1; > >> +} > >> + > >> +void > >> +foo1 (int *a, int *b, unsigned l, unsigned n) > >> +{ > >> + while (l-- != n) > >> + a[l] = b[l] + 1; > >> +} > >> + > >> +unsigned > >> +foo2 (char *a, char *b, unsigned l, unsigned n) > >> +{ > >> + while (--l != n) > >> + if (a[l] != b[l]) > >> + break; > >> + > >> + return l; > >> +} > >> + > >> +unsigned > >> +foo3 (char *a, char *b, unsigned l, unsigned n) > >> +{ > >> + while (l-- != n) > >> + if (a[l] != b[l]) > >> + break; > >> + > >> + return l; > >> +} > >> + > >> +void > >> +bar (); > >> +void > >> +foo4 (unsigned n, unsigned i) > >> +{ > >> + do > >> + { > >> + if (i == n) > >> + return; > >> + bar (); > >> + --i; > >> + } > >> + while (1); > >> +} > >> + > >> +unsigned > >> +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > >> +{ > >> + while (p[i] == q[i] && --i != n) > >> + p--, q--; > >> + > >> + return i; > >> +} > >> + > >> +/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */ > >> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c > >> index 3a09bbc39e5..e9f23b32186 100644 > >> --- a/gcc/tree-ssa-loop-split.c > >> +++ b/gcc/tree-ssa-loop-split.c > >> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see > >> #include "cfghooks.h" > >> #include "gimple-fold.h" > >> #include "gimplify-me.h" > >> +#include "tree-ssa-loop-ivopts.h" > >> > >> /* This file implements two kinds of loop splitting. > >> > >> @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop) > >> conditional). I.e. the second loop can now be entered either > >> via the original entry or via NEW_E, so the entry values of LOOP2 > >> phi nodes are either the original ones or those at the exit > >> - of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting > >> - this. The loops need to fulfill easy_exit_values(). */ > >> + of LOOP1. Selecting the previous value instead next value as the > >> + exit value of LOOP1 if USE_PREV is true. Insert new phi nodes in > >> + LOOP2 pre-header reflecting this. The loops need to fulfill > >> + easy_exit_values(). */ > >> > >> static void > >> -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e) > >> +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e, > >> + bool use_prev = false) > >> { > >> basic_block rest = loop_preheader_edge (loop2)->src; > >> gcc_assert (new_e->dest == rest); > >> @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop > >> *loop2, edge new_e) > >> > >> gphi * newphi = create_phi_node (new_init, rest); > >> add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION); > >> - add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION); > >> + add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, > >> new_e, > >> + UNKNOWN_LOCATION); > >> SET_USE (op, new_init); > >> } > >> } > >> @@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop) > >> return do_split; > >> } > >> > >> +/* Check if the LOOP exit branch is like "if (idx != bound)", > >> + Return the branch edge which exit loop, if wrap may happen on "idx". > >> */ > >> + > >> +static edge > >> +get_ne_cond_branch (struct loop *loop, tree *step) > >> +{ > >> + int i; > >> + edge e; > >> + > >> + auto_vec<edge> edges = get_loop_exit_edges (loop); > >> + FOR_EACH_VEC_ELT (edges, i, e) > >> + { > >> + basic_block bb = e->src; > >> + > >> + /* Check if exit at gcond. */ > >> + gimple *last = last_stmt (bb); > >> + if (!last || gimple_code (last) != GIMPLE_COND) > >> + continue; > >> + gcond *cond = as_a<gcond *> (last); > >> + enum tree_code code = gimple_cond_code (cond); > >> + if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE)) > >> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE)))) > >> + continue; > >> + > >> + /* Check if bound is invarant. */ > >> + tree idx = gimple_cond_lhs (cond); > >> + tree bnd = gimple_cond_rhs (cond); > >> + if (expr_invariant_in_loop_p (loop, idx)) > >> + std::swap (idx, bnd); > >> + else if (!expr_invariant_in_loop_p (loop, bnd)) > >> + continue; > >> + > >> + /* Only unsigned type conversion could cause wrap. */ > >> + tree type = TREE_TYPE (idx); > >> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME > >> + || !TYPE_UNSIGNED (type)) > >> + continue; > >> + > >> + /* Avoid to split if bound is MAX/MIN val. */ > >> + tree bound_type = TREE_TYPE (bnd); > >> + if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > >> + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) > >> + continue; > >> + > >> + /* Check if there is possible wrap. */ > >> + class tree_niter_desc niter; > >> + if (!number_of_iterations_exit (loop, e, &niter, false, false)) > >> + continue; > >> + if (niter.control.no_overflow) > >> + return NULL; > >> + if (niter.cmp != NE_EXPR) > >> + continue; > >> + if (!integer_onep (niter.control.step) > >> + && !integer_minus_onep (niter.control.step)) > >> + continue; > >> + *step = niter.control.step; > >> + > >> + /* If exit edge is just before the empty latch, it is easy to link > >> + the split loops: just jump from the exit edge of one loop to the > >> + header of new loop. */ > >> + if (single_pred_p (loop->latch) > >> + && single_pred_edge (loop->latch)->src == bb > >> + && empty_block_p (loop->latch)) > >> + return e; > >> + > >> + /* If exit edge is at end of header, and header contains i++ or ++i, > >> + only, it is simple to link the split loops: jump from the end of > >> + one loop header to the new loop header, and use unchanged PHI result > >> + of the first loop as the entry PHI value of the second loop. */ > >> + if (bb == loop->header) > >> + { > >> + /* Only one phi. */ > >> + gphi_iterator psi = gsi_start_phis (bb); > >> + if (gsi_end_p (psi)) > >> + continue; > >> + gphi *phi = psi.phi (); > >> + gsi_next (&psi); > >> + if (!gsi_end_p (psi)) > >> + continue; > >> + > >> + /* Check it is ++i or ++i */ > >> + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); > >> + tree prev = PHI_RESULT (phi); > >> + if (idx != prev && idx != next) > >> + continue; > >> + > >> + gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb); > >> + if (gsi_end_p (gsi)) > >> + continue; > >> + gimple *s1 = gsi_stmt (gsi); > >> + if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next > >> + || gimple_assign_rhs1 (s1) != prev) > >> + continue; > >> + > >> + gsi_next_nondebug (&gsi); > >> + if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond) > >> + return e; > >> + } > >> + } > >> + > >> + return NULL; > >> +} > >> + > >> +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR. > >> */ > >> + > >> +static bool > >> +split_ne_loop (struct loop *loop, edge cond_e, tree step) > >> +{ > >> + initialize_original_copy_tables (); > >> + > >> + struct loop *loop2 = loop_version (loop, boolean_true_node, NULL, > >> + profile_probability::always (), > >> + profile_probability::never (), > >> + profile_probability::always (), > >> + profile_probability::always (), true); > >> + > >> + gcc_assert (loop2); > >> + update_ssa (TODO_update_ssa); > >> + > >> + basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src); > >> + free_original_copy_tables (); > >> + > >> + gcond *gc = as_a<gcond *> (last_stmt (cond_e->src)); > >> + gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb)); > >> + > >> + /* Invert edges for gcond. */ > >> + if (gimple_cond_code (gc) == EQ_EXPR) > >> + { > >> + auto invert_edge = [](basic_block bb) { > >> + edge out = EDGE_SUCC (bb, 0); > >> + edge in = EDGE_SUCC (bb, 1); > >> + if (in->flags & EDGE_TRUE_VALUE) > >> + std::swap (in, out); > >> + in->flags |= EDGE_TRUE_VALUE; > >> + in->flags &= ~EDGE_FALSE_VALUE; > >> + out->flags |= EDGE_FALSE_VALUE; > >> + out->flags &= ~EDGE_TRUE_VALUE; > >> + }; > >> + > >> + invert_edge (gimple_bb (gc)); > >> + invert_edge (gimple_bb (dup_gc)); > >> + } > >> + > >> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ > >> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); > >> + if (tree_int_cst_sign_bit (step)) > >> + inv = !inv; > >> + enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR; > >> + enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR; > >> + > >> + gimple_cond_set_code (gc, first_loop_code); > >> + gimple_cond_set_code (dup_gc, second_loop_code); > >> + > >> + /* Link the exit cond edge to new loop. */ > >> + gcond *break_cond = as_a<gcond *> (gimple_copy (gc)); > >> + edge pred_e = single_pred_edge (loop->latch); > >> + bool simple_loop > >> + = pred_e && pred_e->src == cond_e->src && empty_block_p (loop->latch); > >> + if (simple_loop) > >> + gimple_cond_set_code (break_cond, second_loop_code); > >> + else > >> + gimple_cond_make_true (break_cond); > >> + > >> + basic_block break_bb = split_edge (cond_e); > >> + gimple_stmt_iterator gsi = gsi_last_bb (break_bb); > >> + gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); > >> + > >> + edge to_exit = single_succ_edge (break_bb); > >> + edge to_new_loop = make_edge (break_bb, loop_preheader_edge > >> (loop2)->src, 0); > >> + to_new_loop->flags |= EDGE_TRUE_VALUE; > >> + to_exit->flags |= EDGE_FALSE_VALUE; > >> + to_exit->flags &= ~EDGE_FALLTHRU; > >> + to_exit->probability = cond_e->probability; > >> + to_new_loop->probability = to_exit->probability.invert (); > >> + > >> + update_ssa (TODO_update_ssa); > >> + > >> + connect_loop_phis (loop, loop2, to_new_loop, !simple_loop); > >> + > >> + rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); > >> + if (dump_file && (dump_flags & TDF_DETAILS)) > >> + fprintf (dump_file, ";; Loop split on wrap index.\n"); > >> + > >> + return true; > >> +} > >> + > >> +/* Checks if LOOP contains a suitable NE_EXPR conditional block to split. > >> +L_H: > >> + if (i!=N) > >> + S; > >> + else > >> + goto EXIT; > >> + i++; > >> + goto L_H; > >> + > >> +The "i!=N" is like "i>N || i<N", then it could be transformed to: > >> + > >> +L_H: > >> + if (i>N) > >> + S; > >> + else > >> + goto EXIT; > >> + i++; > >> + goto L_H; > >> +L1_H: > >> + if (i<N) > >> + S; > >> + else > >> + goto EXIT; > >> + i++; > >> + goto L1_H; > >> + > >> +The loop with "i<N" is in favor of both GIMPLE and RTL passes. */ > >> + > >> +static bool > >> +split_loop_on_ne_cond (class loop *loop) > >> +{ > >> + tree step; > >> + edge branch_edge = get_ne_cond_branch (loop, &step); > >> + if (!branch_edge) > >> + return false; > >> + > >> + int num = 0; > >> + basic_block *bbs = get_loop_body (loop); > >> + for (unsigned i = 0; i < loop->num_nodes; i++) > >> + num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); > >> + > >> + if (num > param_max_peeled_insns) > >> + { > >> + free (bbs); > >> + return false; > >> + } > >> + > >> + if (!can_copy_bbs_p (bbs, loop->num_nodes)) > >> + { > >> + free (bbs); > >> + return false; > >> + } > >> + free (bbs); > >> + > >> + if (split_ne_loop (loop, branch_edge, step)) > >> + return true; > >> + > >> + return false; > >> +} > >> + > >> /* Main entry point. Perform loop splitting on all suitable loops. */ > >> > >> static unsigned int > >> @@ -1622,7 +1873,8 @@ tree_ssa_split_loops (void) > >> if (optimize_loop_for_size_p (loop)) > >> continue; > >> > >> - if (split_loop (loop) || split_loop_on_cond (loop)) > >> + if (split_loop (loop) || split_loop_on_cond (loop) > >> + || split_loop_on_ne_cond (loop)) > >> { > >> /* Mark our containing loop as having had some split inner loops. > >> */ > >> loop_outer (loop)->aux = loop; > >
On 2021-06-21 20:36, Richard Biener wrote: > On Mon, 21 Jun 2021, guojiufu wrote: > >> On 2021-06-21 14:19, guojiufu via Gcc-patches wrote: >> > On 2021-06-09 19:18, guojiufu wrote: >> >> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: >> >>> On 2021-06-08 18:13, Richard Biener wrote: >> >>>> On Fri, 4 Jun 2021, Jiufu Guo wrote: >> >>>> >> >>> cut... >> > cut... >> >> >> >> Besides the method in the previous mails, >> I’m thinking of another way to split loops: >> >> foo (int *a, int *b, unsigned k, unsigned n) >> { >> while (++k != n) >> a[k] = b[k] + 1; >> } >> >> We may split it into: >> if (k<n) >> { >> while (++k < n) //loop1 >> a[k] = b[k] + 1; >> } >> else >> { >> while (++k != n) //loop2 >> a[k] = b[k] + 1; >> } >> >> In most cases, loop1 would be hit, the overhead of this method is only >> checking “if (k<n)” >> which would be smaller than the previous method. > > That would be your original approach of versioning the loop. I think > I suggested that for this scalar evolution and dataref analysis should > be enhanced to build up conditions under which IV evolutions are > affine (non-wrapping) and the versioning code in actual transforms > should then do the appropriate versioning (like the vectorizer already > does for niter analysis ->assumptions for example). Hi Richi, Thanks for your suggestion! The original idea was trying to cover cases like multi-exit loops, while it seems not benefited too much. The method you said would help for common cases. I'm thinking of the methods to implement this: During scev analyzing, add more possible wrap checking (especially for unsigned) for convert_affine_scev/scev_probably_wraps_p/chrec_convert_1; introducing no_wrap_assumption for the conditions of no-wrapping on given chrec/iv. And using this assumption in simple_iv_with_niters/dr_analyze_innermost. One question is, where is the best place to add this assumption? Is it a flexible idea to add no_wrap_assumption to affine_iv/loop, and set the assumption when scev checks wrap? Thanks for your suggestions! BR. Jiufu > > Richard. > cut....
On Fri, 23 Jul 2021, guojiufu wrote: > On 2021-06-21 20:36, Richard Biener wrote: > > On Mon, 21 Jun 2021, guojiufu wrote: > > > >> On 2021-06-21 14:19, guojiufu via Gcc-patches wrote: > >> > On 2021-06-09 19:18, guojiufu wrote: > >> >> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: > >> >>> On 2021-06-08 18:13, Richard Biener wrote: > >> >>>> On Fri, 4 Jun 2021, Jiufu Guo wrote: > >> >>>> > >> >>> cut... > >> > cut... > >> >> > >> > >> Besides the method in the previous mails, > >> I’m thinking of another way to split loops: > >> > >> foo (int *a, int *b, unsigned k, unsigned n) > >> { > >> while (++k != n) > >> a[k] = b[k] + 1; > >> } > >> > >> We may split it into: > >> if (k<n) > >> { > >> while (++k < n) //loop1 > >> a[k] = b[k] + 1; > >> } > >> else > >> { > >> while (++k != n) //loop2 > >> a[k] = b[k] + 1; > >> } > >> > >> In most cases, loop1 would be hit, the overhead of this method is only > >> checking “if (k<n)” > >> which would be smaller than the previous method. > > > > That would be your original approach of versioning the loop. I think > > I suggested that for this scalar evolution and dataref analysis should > > be enhanced to build up conditions under which IV evolutions are > > affine (non-wrapping) and the versioning code in actual transforms > > should then do the appropriate versioning (like the vectorizer already > > does for niter analysis ->assumptions for example). > > Hi Richi, > > Thanks for your suggestion! > > The original idea was trying to cover cases like multi-exit loops, while it > seems not benefited too much. The method you said would help for common > cases. > > I'm thinking of the methods to implement this: > During scev analyzing, add more possible wrap checking (especially for > unsigned) > for convert_affine_scev/scev_probably_wraps_p/chrec_convert_1; introducing > no_wrap_assumption for the conditions of no-wrapping on given chrec/iv. > And using this assumption in simple_iv_with_niters/dr_analyze_innermost. > > One question is, where is the best place to add this assumption? > Is it a flexible idea to add no_wrap_assumption to affine_iv/loop, and > set the assumption when scev checks wrap? I'm not sure what you exactly mean here. I was thinking of making analyze_scalar_evolution return more than a plain tree, for example by providing an alternate (optional) output, for example a pointer to some new scev_info that could initially be just struct scev_info { tree assumptions; }; when analyze_scalar_evolution recurses there are certain points it can end up returning chrec_dont_know. If we passed in the alternate output there might be the possibility to add to the assumption to make the result well-defined (and yes, this extends to chrec_fold_* routines, mostly chrec_convert I think). Handled cases could be added piecewise, just passing down the scev_info pointer will be intrusive initially. For simple_iv there's already a struct we could add 'assumptions' to (and maybe a flag to the API whether assumptions are allowed). For DR analysis the same could be done. Richard. > Thanks for your suggestions! > > BR. > Jiufu > > > > > > Richard. > > > cut.... > >
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 3a09bbc39e5..c9d161565e4 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "cfghooks.h" #include "gimple-fold.h" #include "gimplify-me.h" +#include "tree-ssa-loop-ivopts.h" /* This file implements two kinds of loop splitting. @@ -1593,6 +1594,468 @@ split_loop_on_cond (struct loop *loop) return do_split; } +/* Filter out type conversions on IDX. + Store the shortest type during conversion to SMALL_TYPE. + Store the longest type during conversion to LARGE_TYPE. */ + +static gimple * +filter_conversions (class loop *loop, tree idx, tree *small_type = NULL, + tree *large_type = NULL) +{ + gcc_assert (TREE_CODE (idx) == SSA_NAME); + gimple *stmt = SSA_NAME_DEF_STMT (idx); + while (is_gimple_assign (stmt) + && flow_bb_inside_loop_p (loop, gimple_bb (stmt))) + { + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))) + { + idx = gimple_assign_rhs1 (stmt); + if (small_type) + { + tree type = TREE_TYPE (idx); + if (TYPE_PRECISION (*small_type) > TYPE_PRECISION (type) + || (TYPE_PRECISION (*small_type) == TYPE_PRECISION (type) + && TYPE_UNSIGNED (*small_type) && !TYPE_UNSIGNED (type))) + *small_type = type; + } + if (large_type) + { + tree type = TREE_TYPE (idx); + if (TYPE_PRECISION (*large_type) < TYPE_PRECISION (type) + || (TYPE_PRECISION (*large_type) == TYPE_PRECISION (type) + && !TYPE_UNSIGNED (*large_type) && TYPE_UNSIGNED (type))) + *large_type = type; + } + } + else + break; + + if (TREE_CODE (idx) != SSA_NAME) + break; + stmt = SSA_NAME_DEF_STMT (idx); + } + return stmt; +} + +/* Collection of loop index related elements. */ +struct idx_elements +{ + gcond *gc; + gphi *phi; + gimple *inc_stmt; + tree idx; + tree bnd; + tree step; + tree large_type; + tree small_type; + bool cmp_on_next; +}; + +/* Analyze and get the idx related elements: bnd, + phi, increase stmt from exit edge E, etc. + + i = phi (b, n) + ... + n0 = ik + 1 + n1 = (type)n0 + ... + if (i != bnd) or if (n != bnd) + ... + n = ()nl + + IDX is the i' or n'. */ + +bool +analyze_idx_elements (class loop *loop, edge e, idx_elements &data) +{ + /* Avoid complicated edge. */ + if (e->flags & EDGE_FAKE) + return false; + if (e->src != loop->header && e->src != single_pred (loop->latch)) + return false; + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->src)) + return false; + + /* Check gcond. */ + gimple *last = last_stmt (e->src); + if (!last || gimple_code (last) != GIMPLE_COND) + return false; + + /* Get idx and bnd from gcond. */ + gcond *gc = as_a<gcond *> (last); + tree bnd = gimple_cond_rhs (gc); + tree idx = gimple_cond_lhs (gc); + if (expr_invariant_in_loop_p (loop, idx)) + std::swap (idx, bnd); + else if (!expr_invariant_in_loop_p (loop, bnd)) + return false; + if (TREE_CODE (idx) != SSA_NAME) + return false; + + gimple *inc_stmt = NULL; + bool cmp_next = false; + tree small_type = TREE_TYPE (idx); + tree large_type = small_type; + gimple *stmt = filter_conversions (loop, idx, &small_type, &large_type); + /* The idx on gcond is not PHI, it would be next. */ + if (is_gimple_assign (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); + if (TREE_CODE (rhs) != SSA_NAME) + return false; + + cmp_next = true; + inc_stmt = stmt; + stmt = filter_conversions (loop, rhs, &small_type, &large_type); + } + + /* Get phi and next. */ + if (gimple_code (stmt) != GIMPLE_PHI || gimple_bb (stmt) != loop->header) + return false; + gphi *phi = as_a<gphi *> (stmt); + tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); + if (TREE_CODE (next) != SSA_NAME) + return false; + + /* The define of next should be the increasement stmt. */ + stmt = filter_conversions (loop, next, &small_type, &large_type); + if (inc_stmt != NULL && inc_stmt != stmt) + return false; + if (!is_gimple_assign (stmt) + || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) + return false; + tree_code rhs_code = gimple_assign_rhs_code (stmt); + if (rhs_code != PLUS_EXPR) + return false; + tree step = gimple_assign_rhs2 (stmt); + if (!integer_minus_onep (step) && !integer_onep (step)) + return false; + inc_stmt = stmt; + tree prev = gimple_assign_rhs1 (stmt); + if (TREE_CODE (prev) != SSA_NAME) + return false; + stmt = filter_conversions (loop, prev, &small_type, &large_type); + if (stmt != phi) + return false; + + data.gc = gc; + data.phi = phi; + data.idx = idx; + data.bnd = bnd; + data.step = step; + data.large_type = large_type; + data.small_type = small_type; + data.inc_stmt = inc_stmt; + data.cmp_on_next = cmp_next; + + return true; +} + +/* Check if the loop is possible to wrap at index. + Return the assumption under which the wrap will not happen. + Return NULL_TREE, if wrap will not happen. */ + +static tree +get_wrap_assumption (class loop *loop, edge *exit, idx_elements &data) +{ + int i; + edge e; + if (!single_pred_p (loop->latch) || !empty_block_p (loop->latch)) + return NULL_TREE; + + auto_vec<edge> edges = get_loop_exit_edges (loop); + FOR_EACH_VEC_ELT (edges, i, e) + { + if (!analyze_idx_elements (loop, e, data)) + continue; + + /* Check if bound is MAX/MIN val of a integral type. */ + tree bnd = data.bnd; + tree bnd_type = TREE_TYPE (bnd); + if (!INTEGRAL_TYPE_P (bnd_type) || !TYPE_UNSIGNED (bnd_type)) + continue; + if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bnd_type)) + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bnd_type))) + continue; + + /* Check if it is "idx != bnd" or "idx < bnd". */ + gcond *gc = data.gc; + enum tree_code code = gimple_cond_code (gc); + if (bnd == gimple_cond_lhs (gc)) + code = swap_tree_comparison (code); + if ((e->flags & EDGE_TRUE_VALUE) && code == EQ_EXPR) + code = NE_EXPR; + if (code != NE_EXPR && code != LT_EXPR && code != GT_EXPR) + continue; + + /* Check if idx is iv with base and step. */ + affine_iv iv; + tree iv_niters = NULL_TREE; + tree idx = PHI_RESULT (data.phi); + if (!simple_iv_with_niters (loop, loop_containing_stmt (gc), idx, &iv, + &iv_niters, false)) + continue; + if (!iv.base || !iv.step || !tree_int_cst_equal (iv.step, data.step)) + continue; + + bool is_negative = tree_int_cst_sign_bit (iv.step); + enum tree_code expect_code = is_negative ? GT_EXPR : LT_EXPR; + if (code != NE_EXPR && code != expect_code) + continue; + + /* Update the type for base, bound and the max value of boundary. */ + tree base = iv.base; + tree small_type = data.small_type; + tree large_type = data.large_type; + tree max_min_bnd = is_negative ? TYPE_MIN_VALUE (small_type) + : TYPE_MAX_VALUE (small_type); + if (large_type != TREE_TYPE (base)) + base = fold_convert (large_type, base); + if (large_type != TREE_TYPE (bnd)) + bnd = fold_convert (large_type, bnd); + if (large_type != small_type) + max_min_bnd = fold_convert (large_type, max_min_bnd); + + /* There is no wrap if bnd <= max value && base <= bnd. */ + enum tree_code expect_code1 = is_negative ? GE_EXPR : LE_EXPR; + tree no_wrap + = fold_build2 (expect_code1, boolean_type_node, bnd, max_min_bnd); + no_wrap + = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, no_wrap, + fold_build2 (expect_code1, boolean_type_node, base, bnd)); + + if (integer_zerop (no_wrap)) + continue; + + *exit = e; + return no_wrap; + } + + return NULL_TREE; +} + +/* Update the idx and bnd with a suitable type for no wrap/oveflow loop. + Suitable type would be the largest type of the type conversion which + occur on index, if the largest type is unsigned, using sizetype. */ + +static bool +update_idx_bnd_type (class loop *loop, idx_elements &data) +{ + gphi *phi = data.phi; + tree type = TREE_TYPE (PHI_RESULT (phi)); + tree suitable_type = data.large_type; + if (TYPE_UNSIGNED (suitable_type)) + { + if (TYPE_PRECISION (suitable_type) == TYPE_PRECISION (sizetype)) + return false; + suitable_type = sizetype; + } + + /* New base and new bound. */ + tree bnd = data.bnd; + edge pre_e = loop_preheader_edge (loop); + tree base = PHI_ARG_DEF_FROM_EDGE (phi, pre_e); + tree new_base = fold_convert (suitable_type, base); + tree new_bnd = fold_convert (suitable_type, bnd); + gimple_seq seq = NULL; + new_base = force_gimple_operand (new_base, &seq, true, NULL_TREE); + if (seq) + gsi_insert_seq_on_edge_immediate (pre_e, seq); + seq = NULL; + new_bnd = force_gimple_operand (new_bnd, &seq, true, NULL_TREE); + if (seq) + gsi_insert_seq_on_edge_immediate (pre_e, seq); + + /* new_i = phi (new_b, new_n) + new_n = new_i + 1 */ + edge latch_e = loop_latch_edge (loop); + const char *name = "idx"; + tree new_idx = make_temp_ssa_name (suitable_type, NULL, name); + tree new_next = make_temp_ssa_name (suitable_type, NULL, name); + gphi *newphi = create_phi_node (new_idx, loop->header); + add_phi_arg (newphi, new_base, pre_e, UNKNOWN_LOCATION); + add_phi_arg (newphi, new_next, latch_e, UNKNOWN_LOCATION); + + /* New increase stmt. */ + gimple *inc_stmt = data.inc_stmt; + tree step = gimple_assign_rhs2 (inc_stmt); + if (tree_int_cst_sign_bit(step)) + { + wide_int v = wi::to_wide (step); + v = wide_int::from (v, TYPE_PRECISION (suitable_type), + SIGNED); + + step = wide_int_to_tree (suitable_type, v); + } + else + step = fold_convert (suitable_type, step); + new_idx = PHI_RESULT (newphi); + tree_code inc_code = gimple_assign_rhs_code (inc_stmt); + gimple *new_inc = gimple_build_assign (new_next, inc_code, new_idx, step); + gimple_stmt_iterator gsi = gsi_for_stmt (inc_stmt); + gsi_insert_before (&gsi, new_inc, GSI_SAME_STMT); + + /* Update gcond. */ + gcond *gc = data.gc; + bool inv = bnd == gimple_cond_lhs (gc); + tree cmp_idx = data.cmp_on_next ? new_next : new_idx; + gimple_cond_set_lhs (gc, inv ? new_bnd : cmp_idx); + gimple_cond_set_rhs (gc, inv ? cmp_idx : new_bnd); + update_stmt (gc); + + /* next = (next type)new_next + And remove next = prev + 1. */ + tree next = gimple_assign_lhs (inc_stmt); + type = TREE_TYPE (next); + gimple *stmt = gimple_build_assign (next, fold_convert (type, new_next)); + gsi = gsi_for_stmt (inc_stmt); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + + gsi = gsi_for_stmt (inc_stmt); + gsi_remove (&gsi, true); + + /* prev = (prev type)new_prev + And remove prev = phi. */ + tree idx = PHI_RESULT (phi); + type = TREE_TYPE (idx); + stmt = gimple_build_assign (idx, fold_convert (type, new_idx)); + gsi = gsi_after_labels (loop->header); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + + gsi = gsi_for_stmt (phi); + gsi_remove (&gsi, true); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Loop index type is updated to use faster type.\n"); + + return true; +} + +/* Split out a new loop which would not wrap, + under the guard that NO_WRAP_COND will not be true. */ + +static bool +split_wrap_boundary (class loop *loop, edge e, tree no_wrap_cond, + bool is_negative_step) +{ + /* Convert the condition into a suitable gcond. */ + gimple_seq stmts = NULL; + no_wrap_cond = force_gimple_operand_1 (no_wrap_cond, &stmts, + is_gimple_condexpr, NULL_TREE); + + /* Version the loop. */ + initialize_original_copy_tables (); + basic_block cond_bb; + loop_version (loop, no_wrap_cond, &cond_bb, + profile_probability::very_likely (), + profile_probability::very_unlikely (), + profile_probability::very_likely (), + profile_probability::very_unlikely (), true); + free_original_copy_tables (); + + /* Insert the statements that feed COND. */ + if (stmts) + { + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + } + + /* Update gcond code. */ + gcond *gc = as_a<gcond *> (last_stmt (e->src)); + enum tree_code code = gimple_cond_code (gc); + enum tree_code new_code; + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); + if (is_negative_step) + new_code = inv ? LT_EXPR : GT_EXPR; + else + new_code = inv ? GT_EXPR : LT_EXPR; + if (code == NE_EXPR || code == EQ_EXPR) + gimple_cond_set_code (gc, new_code); + + /* Swap edge. */ + if (code == EQ_EXPR) + { + edge out = EDGE_SUCC (e->src, 0); + edge in = EDGE_SUCC (e->src, 1); + if (in->flags & EDGE_TRUE_VALUE) + std::swap (in, out); + in->flags |= EDGE_TRUE_VALUE; + in->flags &= ~EDGE_FALSE_VALUE; + out->flags |= EDGE_FALSE_VALUE; + out->flags &= ~EDGE_TRUE_VALUE; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Loop split on wrap index.\n"); + + return true; +} + +/* Split loop if there is possible wrap. + For example: + transform + + void + foo (int *a, int *b, unsigned l, unsigned u_n) + { + while (++l != u_n) + a[l] = b[l] + 1; + } + + to: + if (l < u_n) + { + int li = l; + int n = u_n; + while (++li < n) + a[li] = b[li] + 1; + l = li; + } + else + while (++l != n) + a[l] = b[l] + 1; + */ +static bool +split_loop_on_wrap (class loop *loop) +{ + edge e; + idx_elements data; + tree no_wrap = get_wrap_assumption (loop, &e, data); + + if (!no_wrap) + return false; + + int num = 0; + basic_block *bbs = get_loop_body (loop); + for (unsigned i = 0; i < loop->num_nodes; i++) + num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); + + if (num > param_max_peeled_insns) + { + free (bbs); + return false; + } + + if (!can_copy_bbs_p (bbs, loop->num_nodes)) + { + free (bbs); + return false; + } + + free (bbs); + + if (integer_onep (no_wrap)) + return update_idx_bnd_type (loop, data); +