diff mbox series

[RFC] New idea to split loop based on no-wrap conditions

Message ID 9f0c505cd216d45f06b63cd94152fc8a@imap.linux.ibm.com
State New
Headers show
Series [RFC] New idea to split loop based on no-wrap conditions | expand

Commit Message

Jiufu Guo June 21, 2021, 7:02 a.m. UTC
On 2021-06-21 14:19, guojiufu via Gcc-patches wrote:
> On 2021-06-09 19:18, guojiufu wrote:
>> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote:
>>> On 2021-06-08 18:13, Richard Biener wrote:
>>>> On Fri, 4 Jun 2021, Jiufu Guo wrote:
>>>> 
>>> cut...
> cut...
>> 

Besides the method in the previous mails, 
I’m thinking of another way to split loops:

foo (int *a, int *b, unsigned k, unsigned n)
{                                                               
 while (++k != n)
   a[k] = b[k] + 1;                               
} 

We may split it into:
if (k<n)
{
  while (++k < n)  //loop1
   a[k] = b[k] + 1;   
}
else
{
 while (++k != n) //loop2
   a[k] = b[k] + 1;  
}

In most cases, loop1 would be hit, the overhead of this method is only 
checking “if (k<n)”
which would be smaller than the previous method.

And this method would be more easy to extend to nest loops like:
 unsigned int l_n = 0;
 unsigned int l_m = 0;
 unsigned int l_k = 0;
 for (l_n = 0; l_n != n; l_n++)
   for (l_k = 0; l_k != k; l_k++)
     for (l_m = 0; l_m != m; l_m++)
         xxx;

Do you think this method is more valuable to implement? 
Below is a quick patch.  This patch does not support nest loops yet.

+  if (split_wrap_boundary (loop, e, no_wrap, tree_int_cst_sign_bit 
(data.step)))
+    {
+      update_idx_bnd_type (loop, data);
+      return true;
+    }
+
+  return false;
+}
+
  /* Main entry point.  Perform loop splitting on all suitable loops.  */

  static unsigned int
@@ -1622,7 +2085,8 @@ tree_ssa_split_loops (void)
        if (optimize_loop_for_size_p (loop))
  	continue;

-      if (split_loop (loop) || split_loop_on_cond (loop))
+      if (split_loop (loop) || split_loop_on_cond (loop)
+	  || split_loop_on_wrap (loop))
  	{
  	  /* Mark our containing loop as having had some split inner loops.  
*/
  	  loop_outer (loop)->aux = loop;


>> Here is the updated patch, thanks for your time!
> 
> Updates:
> . Enhance code to support negative step.
> . Check step +-1 to make sure it hits loop condition !=
> . Enhance runtime cases to check more boundary cases and run order 
> cases.
> . Refine for compiling time: check loop num of insns and can_copy_bbs_p 
> later
> 
>> 
>> diff --git a/gcc/testsuite/gcc.dg/loop-split1.c
>> b/gcc/testsuite/gcc.dg/loop-split1.c
>> new file mode 100644
>> index 00000000000..dd2d03a7b96
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/loop-split1.c
>> @@ -0,0 +1,101 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
>> +
>> +void
>> +foo (int *a, int *b, unsigned l, unsigned n)
>> +{
>> +  while (++l != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +void
>> +foo_1 (int *a, int *b, unsigned n)
>> +{
>> +  unsigned l = 0;
>> +  while (++l != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +
>> +void
>> +foo1 (int *a, int *b, unsigned l, unsigned n)
>> +{
>> +  while (l++ != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +
>> +/* No wrap.  */
>> +void
>> +foo1_1 (int *a, int *b, unsigned n)
>> +{
>> +  unsigned l = 0;
>> +  while (l++ != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +
>> +unsigned
>> +foo2 (char *a, char *b, unsigned l, unsigned n)
>> +{
>> +  while (++l != n)
>> +    if (a[l] != b[l])
>> +      break;
>> +
>> +  return l;
>> +}
>> +
>> +unsigned
>> +foo2_1 (char *a, char *b, unsigned l, unsigned n)
>> +{
>> +  l = 0;
>> +  while (++l != n)
>> +    if (a[l] != b[l])
>> +      break;
>> +
>> +  return l;
>> +}
>> +
>> +unsigned
>> +foo3 (char *a, char *b, unsigned l, unsigned n)
>> +{
>> +  while (l++ != n)
>> +    if (a[l] != b[l])
>> +      break;
>> +
>> +  return l;
>> +}
>> +
>> +/* No wrap.  */
>> +unsigned
>> +foo3_1 (char *a, char *b, unsigned l, unsigned n)
>> +{
>> +  l = 0;
>> +  while (l++ != n)
>> +    if (a[l] != b[l])
>> +      break;
>> +
>> +  return l;
>> +}
>> +
>> +void
>> +bar ();
>> +void
>> +foo4 (unsigned n, unsigned i)
>> +{
>> +  do
>> +    {
>> +      if (i == n)
>> +	return;
>> +      bar ();
>> +      ++i;
>> +    }
>> +  while (1);
>> +}
>> +
>> +unsigned
>> +find_skip_diff (char *p, char *q, unsigned n, unsigned i)
>> +{
>> +  while (p[i] == q[i] && ++i != n)
>> +    p++, q++;
>> +
>> +  return i;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */
>> diff --git a/gcc/testsuite/gcc.dg/loop-split2.c
>> b/gcc/testsuite/gcc.dg/loop-split2.c
>> new file mode 100644
>> index 00000000000..56377e2f2f5
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/loop-split2.c
>> @@ -0,0 +1,155 @@
>> +/* { dg-do run } */
>> +/* { dg-options "-O3" } */
>> +
>> +extern void
>> +abort (void);
>> +extern void
>> +exit (int);
>> +void
>> +push (int);
>> +
>> +#define NI __attribute__ ((noinline))
>> +
>> +void NI
>> +foo (int *a, int *b, unsigned char l, unsigned char n)
>> +{
>> +  while (++l != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +
>> +unsigned NI
>> +bar (int *a, int *b, unsigned char l, unsigned char n)
>> +{
>> +  while (l++ != n)
>> +    {
>> +      push (l);
>> +      if (a[l] != b[l])
>> +	break;
>> +      push (l + 1);
>> +    }
>> +  return l;
>> +}
>> +
>> +void NI
>> +foo_1 (int *a, int *b, unsigned char l, unsigned char n)
>> +{
>> +  while (--l != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +
>> +unsigned NI
>> +bar_1 (int *a, int *b, unsigned char l, unsigned char n)
>> +{
>> +  while (l-- != n)
>> +    {
>> +      push (l);
>> +      if (a[l] != b[l])
>> +	break;
>> +      push (l + 1);
>> +    }
>> +
>> +  return l;
>> +}
>> +
>> +int a[258];
>> +int b[258];
>> +int c[1024];
>> +static int top = 0;
>> +void
>> +push (int e)
>> +{
>> +  c[top++] = e;
>> +}
>> +
>> +void
>> +reset ()
>> +{
>> +  top = 0;
>> +  __builtin_memset (c, 0, sizeof (c));
>> +}
>> +
>> +#define check(a, b) (a == b)
>> +
>> +int
>> +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5)
>> +{
>> +  return check (c[0], a0) && check (c[1], a1) && check (c[2], a2)
>> +	 && check (c[3], a3) && check (c[4], a4) && check (c[5], a5);
>> +}
>> +
>> +int
>> +main ()
>> +{
>> +  __builtin_memcpy (b, a, sizeof (a));
>> +  reset ();
>> +  if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9)
>> +      || !check_c (c + 496, 254, 255, 255, 256, 0, 1))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 
>> 0, 0))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, 
>> 1))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar_1 (a, b, 6, 6) != 5)
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, 
>> 256))
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0))
>> +    abort ();
>> +
>> +  b[100] += 1;
>> +  reset ();
>> +  if (bar (a, b, 90, 110) != 100)
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar (a, b, 110, 105) != 100)
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar_1 (a, b, 90, 110) != 109)
>> +    abort ();
>> +
>> +  reset ();
>> +  if (bar_1 (a, b, 2, 90) != 100)
>> +    abort ();
>> +
>> +  foo (a, b, 99, 99);
>> +  a[99] = b[99] + 1;
>> +  for (int i = 0; i < 256; i++)
>> +    if (a[i] != b[i] + 1)
>> +      abort ();
>> +
>> +  foo_1 (a, b, 99, 99);
>> +  a[99] = b[99] + 1;
>> +  for (int i = 0; i < 256; i++)
>> +    if (a[i] != b[i] + 1)
>> +      abort ();
>> +
>> +  exit (0);
>> +}
>> diff --git a/gcc/testsuite/gcc.dg/loop-split3.c
>> b/gcc/testsuite/gcc.dg/loop-split3.c
>> new file mode 100644
>> index 00000000000..ec93ee8bd12
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/loop-split3.c
>> @@ -0,0 +1,62 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
>> +
>> +void
>> +foo (int *a, int *b, unsigned l, unsigned n)
>> +{
>> +  while (--l != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +
>> +void
>> +foo1 (int *a, int *b, unsigned l, unsigned n)
>> +{
>> +  while (l-- != n)
>> +    a[l] = b[l] + 1;
>> +}
>> +
>> +unsigned
>> +foo2 (char *a, char *b, unsigned l, unsigned n)
>> +{
>> +  while (--l != n)
>> +    if (a[l] != b[l])
>> +      break;
>> +
>> +  return l;
>> +}
>> +
>> +unsigned
>> +foo3 (char *a, char *b, unsigned l, unsigned n)
>> +{
>> +  while (l-- != n)
>> +    if (a[l] != b[l])
>> +      break;
>> +
>> +  return l;
>> +}
>> +
>> +void
>> +bar ();
>> +void
>> +foo4 (unsigned n, unsigned i)
>> +{
>> +  do
>> +    {
>> +      if (i == n)
>> +	return;
>> +      bar ();
>> +      --i;
>> +    }
>> +  while (1);
>> +}
>> +
>> +unsigned
>> +find_skip_diff (char *p, char *q, unsigned n, unsigned i)
>> +{
>> +  while (p[i] == q[i] && --i != n)
>> +    p--, q--;
>> +
>> +  return i;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */
>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>> index 3a09bbc39e5..e9f23b32186 100644
>> --- a/gcc/tree-ssa-loop-split.c
>> +++ b/gcc/tree-ssa-loop-split.c
>> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "cfghooks.h"
>>  #include "gimple-fold.h"
>>  #include "gimplify-me.h"
>> +#include "tree-ssa-loop-ivopts.h"
>> 
>>  /* This file implements two kinds of loop splitting.
>> 
>> @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop)
>>     conditional).  I.e. the second loop can now be entered either
>>     via the original entry or via NEW_E, so the entry values of LOOP2
>>     phi nodes are either the original ones or those at the exit
>> -   of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
>> -   this.  The loops need to fulfill easy_exit_values().  */
>> +   of LOOP1.  Selecting the previous value instead next value as the
>> +   exit value of LOOP1 if USE_PREV is true.  Insert new phi nodes in
>> +   LOOP2 pre-header reflecting this.  The loops need to fulfill
>> +   easy_exit_values().  */
>> 
>>  static void
>> -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
>> +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
>> +		   bool use_prev = false)
>>  {
>>    basic_block rest = loop_preheader_edge (loop2)->src;
>>    gcc_assert (new_e->dest == rest);
>> @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop
>> *loop2, edge new_e)
>> 
>>        gphi * newphi = create_phi_node (new_init, rest);
>>        add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
>> -      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
>> +      add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, 
>> new_e,
>> +		   UNKNOWN_LOCATION);
>>        SET_USE (op, new_init);
>>      }
>>  }
>> @@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop)
>>    return do_split;
>>  }
>> 
>> +/* Check if the LOOP exit branch is like "if (idx != bound)",
>> +   Return the branch edge which exit loop, if wrap may happen on 
>> "idx".  */
>> +
>> +static edge
>> +get_ne_cond_branch (struct loop *loop, tree *step)
>> +{
>> +  int i;
>> +  edge e;
>> +
>> +  auto_vec<edge> edges = get_loop_exit_edges (loop);
>> +  FOR_EACH_VEC_ELT (edges, i, e)
>> +    {
>> +      basic_block bb = e->src;
>> +
>> +      /* Check if exit at gcond.  */
>> +      gimple *last = last_stmt (bb);
>> +      if (!last || gimple_code (last) != GIMPLE_COND)
>> +	continue;
>> +      gcond *cond = as_a<gcond *> (last);
>> +      enum tree_code code = gimple_cond_code (cond);
>> +      if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE))
>> +	    || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE))))
>> +	continue;
>> +
>> +      /* Check if bound is invarant.  */
>> +      tree idx = gimple_cond_lhs (cond);
>> +      tree bnd = gimple_cond_rhs (cond);
>> +      if (expr_invariant_in_loop_p (loop, idx))
>> +	std::swap (idx, bnd);
>> +      else if (!expr_invariant_in_loop_p (loop, bnd))
>> +	continue;
>> +
>> +      /* Only unsigned type conversion could cause wrap.  */
>> +      tree type = TREE_TYPE (idx);
>> +      if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME
>> +	  || !TYPE_UNSIGNED (type))
>> +	continue;
>> +
>> +      /* Avoid to split if bound is MAX/MIN val.  */
>> +      tree bound_type = TREE_TYPE (bnd);
>> +      if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type))
>> +	  || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))
>> +	continue;
>> +
>> +      /* Check if there is possible wrap.  */
>> +      class tree_niter_desc niter;
>> +      if (!number_of_iterations_exit (loop, e, &niter, false, false))
>> +	continue;
>> +      if (niter.control.no_overflow)
>> +	return NULL;
>> +      if (niter.cmp != NE_EXPR)
>> +	continue;
>> +      if (!integer_onep (niter.control.step)
>> +	  && !integer_minus_onep (niter.control.step))
>> +	continue;
>> +      *step = niter.control.step;
>> +
>> +      /* If exit edge is just before the empty latch, it is easy to 
>> link
>> +	 the split loops: just jump from the exit edge of one loop to the
>> +	 header of new loop.  */
>> +      if (single_pred_p (loop->latch)
>> +	  && single_pred_edge (loop->latch)->src == bb
>> +	  && empty_block_p (loop->latch))
>> +	return e;
>> +
>> +      /* If exit edge is at end of header, and header contains i++ or 
>> ++i,
>> +	 only, it is simple to link the split loops: jump from the end of
>> +	 one loop header to the new loop header, and use unchanged PHI 
>> result
>> +	 of the first loop as the entry PHI value of the second loop.  */
>> +      if (bb == loop->header)
>> +	{
>> +	  /* Only one phi.  */
>> +	  gphi_iterator psi = gsi_start_phis (bb);
>> +	  if (gsi_end_p (psi))
>> +	    continue;
>> +	  gphi *phi = psi.phi ();
>> +	  gsi_next (&psi);
>> +	  if (!gsi_end_p (psi))
>> +	    continue;
>> +
>> +	  /* Check it is ++i or ++i */
>> +	  tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
>> +	  tree prev = PHI_RESULT (phi);
>> +	  if (idx != prev && idx != next)
>> +	    continue;
>> +
>> +	  gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb 
>> (bb);
>> +	  if (gsi_end_p (gsi))
>> +	    continue;
>> +	  gimple *s1 = gsi_stmt (gsi);
>> +	  if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next
>> +	      || gimple_assign_rhs1 (s1) != prev)
>> +	    continue;
>> +
>> +	  gsi_next_nondebug (&gsi);
>> +	  if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond)
>> +	    return e;
>> +	}
>> +    }
>> +
>> +  return NULL;
>> +}
>> +
>> +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and 
>> LT_EXPR.  */
>> +
>> +static bool
>> +split_ne_loop (struct loop *loop, edge cond_e, tree step)
>> +{
>> +  initialize_original_copy_tables ();
>> +
>> +  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
>> +				     profile_probability::always (),
>> +				     profile_probability::never (),
>> +				     profile_probability::always (),
>> +				     profile_probability::always (), true);
>> +
>> +  gcc_assert (loop2);
>> +  update_ssa (TODO_update_ssa);
>> +
>> +  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
>> +  free_original_copy_tables ();
>> +
>> +  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
>> +  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
>> +
>> +  /* Invert edges for gcond.  */
>> +  if (gimple_cond_code (gc) == EQ_EXPR)
>> +    {
>> +      auto invert_edge = [](basic_block bb) {
>> +	edge out = EDGE_SUCC (bb, 0);
>> +	edge in = EDGE_SUCC (bb, 1);
>> +	if (in->flags & EDGE_TRUE_VALUE)
>> +	  std::swap (in, out);
>> +	in->flags |= EDGE_TRUE_VALUE;
>> +	in->flags &= ~EDGE_FALSE_VALUE;
>> +	out->flags |= EDGE_FALSE_VALUE;
>> +	out->flags &= ~EDGE_TRUE_VALUE;
>> +      };
>> +
>> +      invert_edge (gimple_bb (gc));
>> +      invert_edge (gimple_bb (dup_gc));
>> +    }
>> +
>> +  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
>> +  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
>> +  if (tree_int_cst_sign_bit (step))
>> +    inv = !inv;
>> +  enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR;
>> +  enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR;
>> +
>> +  gimple_cond_set_code (gc, first_loop_code);
>> +  gimple_cond_set_code (dup_gc, second_loop_code);
>> +
>> +  /* Link the exit cond edge to new loop.  */
>> +  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
>> +  edge pred_e = single_pred_edge (loop->latch);
>> +  bool simple_loop
>> +    = pred_e && pred_e->src == cond_e->src && empty_block_p 
>> (loop->latch);
>> +  if (simple_loop)
>> +    gimple_cond_set_code (break_cond, second_loop_code);
>> +  else
>> +    gimple_cond_make_true (break_cond);
>> +
>> +  basic_block break_bb = split_edge (cond_e);
>> +  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
>> +  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
>> +
>> +  edge to_exit = single_succ_edge (break_bb);
>> +  edge to_new_loop = make_edge (break_bb, loop_preheader_edge 
>> (loop2)->src, 0);
>> +  to_new_loop->flags |= EDGE_TRUE_VALUE;
>> +  to_exit->flags |= EDGE_FALSE_VALUE;
>> +  to_exit->flags &= ~EDGE_FALLTHRU;
>> +  to_exit->probability = cond_e->probability;
>> +  to_new_loop->probability = to_exit->probability.invert ();
>> +
>> +  update_ssa (TODO_update_ssa);
>> +
>> +  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
>> +
>> +  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
>> +  if (dump_file && (dump_flags & TDF_DETAILS))
>> +    fprintf (dump_file, ";; Loop split on wrap index.\n");
>> +
>> +  return true;
>> +}
>> +
>> +/* Checks if LOOP contains a suitable NE_EXPR conditional block to 
>> split.
>> +L_H:
>> + if (i!=N)
>> +   S;
>> + else
>> +   goto EXIT;
>> + i++;
>> + goto L_H;
>> +
>> +The "i!=N" is like "i>N || i<N", then it could be transformed to:
>> +
>> +L_H:
>> + if (i>N)
>> +   S;
>> + else
>> +   goto EXIT;
>> + i++;
>> + goto L_H;
>> +L1_H:
>> + if (i<N)
>> +   S;
>> + else
>> +   goto EXIT;
>> + i++;
>> + goto L1_H;
>> +
>> +The loop with "i<N" is in favor of both GIMPLE and RTL passes.  */
>> +
>> +static bool
>> +split_loop_on_ne_cond (class loop *loop)
>> +{
>> +  tree step;
>> +  edge branch_edge = get_ne_cond_branch (loop, &step);
>> +  if (!branch_edge)
>> +    return false;
>> +
>> +  int num = 0;
>> +  basic_block *bbs = get_loop_body (loop);
>> +  for (unsigned i = 0; i < loop->num_nodes; i++)
>> +    num += estimate_num_insns_seq (bb_seq (bbs[i]), 
>> &eni_size_weights);
>> +
>> +  if (num > param_max_peeled_insns)
>> +    {
>> +      free (bbs);
>> +      return false;
>> +    }
>> +
>> +  if (!can_copy_bbs_p (bbs, loop->num_nodes))
>> +    {
>> +      free (bbs);
>> +      return false;
>> +    }
>> +  free (bbs);
>> +
>> +  if (split_ne_loop (loop, branch_edge, step))
>> +    return true;
>> +
>> +  return false;
>> +}
>> +
>>  /* Main entry point.  Perform loop splitting on all suitable loops.  
>> */
>> 
>>  static unsigned int
>> @@ -1622,7 +1873,8 @@ tree_ssa_split_loops (void)
>>        if (optimize_loop_for_size_p (loop))
>>  	continue;
>> 
>> -      if (split_loop (loop) || split_loop_on_cond (loop))
>> +      if (split_loop (loop) || split_loop_on_cond (loop)
>> +	  || split_loop_on_ne_cond (loop))
>>  	{
>>  	  /* Mark our containing loop as having had some split inner loops.  
>> */
>>  	  loop_outer (loop)->aux = loop;

Comments

Richard Biener June 21, 2021, 12:36 p.m. UTC | #1
On Mon, 21 Jun 2021, guojiufu wrote:

> On 2021-06-21 14:19, guojiufu via Gcc-patches wrote:
> > On 2021-06-09 19:18, guojiufu wrote:
> >> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote:
> >>> On 2021-06-08 18:13, Richard Biener wrote:
> >>>> On Fri, 4 Jun 2021, Jiufu Guo wrote:
> >>>> 
> >>> cut...
> > cut...
> >> 
> 
> Besides the method in the previous mails, 
> I’m thinking of another way to split loops:
> 
> foo (int *a, int *b, unsigned k, unsigned n)
> {                                                               
>  while (++k != n)
>    a[k] = b[k] + 1;                               
> } 
> 
> We may split it into:
> if (k<n)
> {
>   while (++k < n)  //loop1
>    a[k] = b[k] + 1;   
> }
> else
> {
>  while (++k != n) //loop2
>    a[k] = b[k] + 1;  
> }
> 
> In most cases, loop1 would be hit, the overhead of this method is only
> checking “if (k<n)”
> which would be smaller than the previous method.

That would be your original approach of versioning the loop.  I think
I suggested that for this scalar evolution and dataref analysis should
be enhanced to build up conditions under which IV evolutions are
affine (non-wrapping) and the versioning code in actual transforms
should then do the appropriate versioning (like the vectorizer already
does for niter analysis ->assumptions for example).

Richard.

> And this method would be more easy to extend to nest loops like:
>  unsigned int l_n = 0;
>  unsigned int l_m = 0;
>  unsigned int l_k = 0;
>  for (l_n = 0; l_n != n; l_n++)
>    for (l_k = 0; l_k != k; l_k++)
>      for (l_m = 0; l_m != m; l_m++)
>          xxx;
> 
> Do you think this method is more valuable to implement? 
> Below is a quick patch.  This patch does not support nest loops yet.
> 
> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index 3a09bbc39e5..c9d161565e4 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfghooks.h"
>  #include "gimple-fold.h"
>  #include "gimplify-me.h"
> +#include "tree-ssa-loop-ivopts.h"
> 
>  /* This file implements two kinds of loop splitting.
> 
> @@ -1593,6 +1594,468 @@ split_loop_on_cond (struct loop *loop)
>    return do_split;
>  }
> 
> +/* Filter out type conversions on IDX.
> +   Store the shortest type during conversion to SMALL_TYPE.
> +   Store the longest type during conversion to LARGE_TYPE.  */
> +
> +static gimple *
> +filter_conversions (class loop *loop, tree idx, tree *small_type = NULL,
> +		    tree *large_type = NULL)
> +{
> +  gcc_assert (TREE_CODE (idx) == SSA_NAME);
> +  gimple *stmt = SSA_NAME_DEF_STMT (idx);
> +  while (is_gimple_assign (stmt)
> +	 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
> +    {
> +      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
> +	{
> +	  idx = gimple_assign_rhs1 (stmt);
> +	  if (small_type)
> +	    {
> +	      tree type = TREE_TYPE (idx);
> +	      if (TYPE_PRECISION (*small_type) > TYPE_PRECISION (type)
> +		  || (TYPE_PRECISION (*small_type) == TYPE_PRECISION (type)
> +		      && TYPE_UNSIGNED (*small_type) && !TYPE_UNSIGNED
> (type)))
> +		*small_type = type;
> +	    }
> +	  if (large_type)
> +	    {
> +	      tree type = TREE_TYPE (idx);
> +	      if (TYPE_PRECISION (*large_type) < TYPE_PRECISION (type)
> +		  || (TYPE_PRECISION (*large_type) == TYPE_PRECISION (type)
> +		      && !TYPE_UNSIGNED (*large_type) && TYPE_UNSIGNED
> (type)))
> +		*large_type = type;
> +	    }
> +	}
> +      else
> +	break;
> +
> +      if (TREE_CODE (idx) != SSA_NAME)
> +	break;
> +      stmt = SSA_NAME_DEF_STMT (idx);
> +    }
> +  return stmt;
> +}
> +
> +/* Collection of loop index related elements.  */
> +struct idx_elements
> +{
> +  gcond *gc;
> +  gphi *phi;
> +  gimple *inc_stmt;
> +  tree idx;
> +  tree bnd;
> +  tree step;
> +  tree large_type;
> +  tree small_type;
> +  bool cmp_on_next;
> +};
> +
> +/*  Analyze and get the idx related elements: bnd,
> +    phi, increase stmt from exit edge E, etc.
> +
> +    i = phi (b, n)
> +    ...
> +    n0 = ik + 1
> +    n1 = (type)n0
> +    ...
> +    if (i != bnd) or if (n != bnd)
> +    ...
> +    n = ()nl
> +
> +   IDX is the i' or n'.  */
> +
> +bool
> +analyze_idx_elements (class loop *loop, edge e, idx_elements &data)
> +{
> +  /* Avoid complicated edge.  */
> +  if (e->flags & EDGE_FAKE)
> +    return false;
> +  if (e->src != loop->header && e->src != single_pred (loop->latch))
> +    return false;
> +  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->src))
> +    return false;
> +
> +  /* Check gcond.  */
> +  gimple *last = last_stmt (e->src);
> +  if (!last || gimple_code (last) != GIMPLE_COND)
> +    return false;
> +
> +  /* Get idx and bnd from gcond. */
> +  gcond *gc = as_a<gcond *> (last);
> +  tree bnd = gimple_cond_rhs (gc);
> +  tree idx = gimple_cond_lhs (gc);
> +  if (expr_invariant_in_loop_p (loop, idx))
> +    std::swap (idx, bnd);
> +  else if (!expr_invariant_in_loop_p (loop, bnd))
> +    return false;
> +  if (TREE_CODE (idx) != SSA_NAME)
> +    return false;
> +
> +  gimple *inc_stmt = NULL;
> +  bool cmp_next = false;
> +  tree small_type = TREE_TYPE (idx);
> +  tree large_type = small_type;
> +  gimple *stmt = filter_conversions (loop, idx, &small_type, &large_type);
> +  /* The idx on gcond is not PHI, it would be next. */
> +  if (is_gimple_assign (stmt))
> +    {
> +      tree rhs = gimple_assign_rhs1 (stmt);
> +      if (TREE_CODE (rhs) != SSA_NAME)
> +	return false;
> +
> +      cmp_next = true;
> +      inc_stmt = stmt;
> +      stmt = filter_conversions (loop, rhs, &small_type, &large_type);
> +    }
> +
> +  /* Get phi and next.  */
> +  if (gimple_code (stmt) != GIMPLE_PHI || gimple_bb (stmt) != loop->header)
> +    return false;
> +  gphi *phi = as_a<gphi *> (stmt);
> +  tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
> +  if (TREE_CODE (next) != SSA_NAME)
> +    return false;
> +
> +  /* The define of next should be the increasement stmt.  */
> +  stmt = filter_conversions (loop, next, &small_type, &large_type);
> +  if (inc_stmt != NULL && inc_stmt != stmt)
> +    return false;
> +  if (!is_gimple_assign (stmt)
> +      || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
> +    return false;
> +  tree_code rhs_code = gimple_assign_rhs_code (stmt);
> +  if (rhs_code != PLUS_EXPR)
> +    return false;
> +  tree step = gimple_assign_rhs2 (stmt);
> +  if (!integer_minus_onep (step) && !integer_onep (step))
> +    return false;
> +  inc_stmt = stmt;
> +  tree prev = gimple_assign_rhs1 (stmt);
> +  if (TREE_CODE (prev) != SSA_NAME)
> +    return false;
> +  stmt = filter_conversions (loop, prev, &small_type, &large_type);
> +  if (stmt != phi)
> +    return false;
> +
> +  data.gc = gc;
> +  data.phi = phi;
> +  data.idx = idx;
> +  data.bnd = bnd;
> +  data.step = step;
> +  data.large_type = large_type;
> +  data.small_type = small_type;
> +  data.inc_stmt = inc_stmt;
> +  data.cmp_on_next = cmp_next;
> +
> +  return true;
> +}
> +
> +/* Check if the loop is possible to wrap at index.
> +   Return the assumption under which the wrap will not happen.
> +   Return NULL_TREE, if wrap will not happen.  */
> +
> +static tree
> +get_wrap_assumption (class loop *loop, edge *exit, idx_elements &data)
> +{
> +  int i;
> +  edge e;
> +  if (!single_pred_p (loop->latch) || !empty_block_p (loop->latch))
> +    return NULL_TREE;
> +
> +  auto_vec<edge> edges = get_loop_exit_edges (loop);
> +  FOR_EACH_VEC_ELT (edges, i, e)
> +    {
> +      if (!analyze_idx_elements (loop, e, data))
> +	continue;
> +
> +      /* Check if bound is MAX/MIN val of a integral type.  */
> +      tree bnd = data.bnd;
> +      tree bnd_type = TREE_TYPE (bnd);
> +      if (!INTEGRAL_TYPE_P (bnd_type) || !TYPE_UNSIGNED (bnd_type))
> +	continue;
> +      if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bnd_type))
> +	  || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bnd_type)))
> +	continue;
> +
> +      /* Check if it is "idx != bnd" or "idx < bnd".  */
> +      gcond *gc = data.gc;
> +      enum tree_code code = gimple_cond_code (gc);
> +      if (bnd == gimple_cond_lhs (gc))
> +	code = swap_tree_comparison (code);
> +      if ((e->flags & EDGE_TRUE_VALUE) && code == EQ_EXPR)
> +	code = NE_EXPR;
> +      if (code != NE_EXPR && code != LT_EXPR && code != GT_EXPR)
> +	continue;
> +
> +      /* Check if idx is iv with base and step.  */
> +      affine_iv iv;
> +      tree iv_niters = NULL_TREE;
> +      tree idx = PHI_RESULT (data.phi);
> +      if (!simple_iv_with_niters (loop, loop_containing_stmt (gc), idx, &iv,
> +				  &iv_niters, false))
> +	continue;
> +      if (!iv.base || !iv.step || !tree_int_cst_equal (iv.step, data.step))
> +	continue;
> +
> +      bool is_negative = tree_int_cst_sign_bit (iv.step);
> +      enum tree_code expect_code = is_negative ? GT_EXPR : LT_EXPR;
> +      if (code != NE_EXPR && code != expect_code)
> +	continue;
> +
> +      /* Update the type for base, bound and the max value of boundary.  */
> +      tree base = iv.base;
> +      tree small_type = data.small_type;
> +      tree large_type = data.large_type;
> +      tree max_min_bnd = is_negative ? TYPE_MIN_VALUE (small_type)
> +				     : TYPE_MAX_VALUE (small_type);
> +      if (large_type != TREE_TYPE (base))
> +	base = fold_convert (large_type, base);
> +      if (large_type != TREE_TYPE (bnd))
> +	bnd = fold_convert (large_type, bnd);
> +      if (large_type != small_type)
> +	max_min_bnd = fold_convert (large_type, max_min_bnd);
> +
> +      /* There is no wrap if bnd <= max value && base <= bnd.  */
> +      enum tree_code expect_code1 = is_negative ? GE_EXPR : LE_EXPR;
> +      tree no_wrap
> +	= fold_build2 (expect_code1, boolean_type_node, bnd, max_min_bnd);
> +      no_wrap
> +	= fold_build2 (TRUTH_AND_EXPR, boolean_type_node, no_wrap,
> +		       fold_build2 (expect_code1, boolean_type_node, base,
> bnd));
> +
> +      if (integer_zerop (no_wrap))
> +	continue;
> +
> +      *exit = e;
> +      return no_wrap;
> +    }
> +
> +  return NULL_TREE;
> +}
> +
> +/*  Update the idx and bnd with a suitable type for no wrap/oveflow loop.
> +    Suitable type would be the largest type of the type conversion which
> +    occur on index, if the largest type is unsigned, using sizetype.  */
> +
> +static bool
> +update_idx_bnd_type (class loop *loop, idx_elements &data)
> +{
> +  gphi *phi = data.phi;
> +  tree type = TREE_TYPE (PHI_RESULT (phi));
> +  tree suitable_type = data.large_type;
> +  if (TYPE_UNSIGNED (suitable_type))
> +    {
> +      if (TYPE_PRECISION (suitable_type) == TYPE_PRECISION (sizetype))
> +	return false;
> +      suitable_type = sizetype;
> +    }
> +
> +  /* New base and new bound.  */
> +  tree bnd = data.bnd;
> +  edge pre_e = loop_preheader_edge (loop);
> +  tree base = PHI_ARG_DEF_FROM_EDGE (phi, pre_e);
> +  tree new_base = fold_convert (suitable_type, base);
> +  tree new_bnd = fold_convert (suitable_type, bnd);
> +  gimple_seq seq = NULL;
> +  new_base = force_gimple_operand (new_base, &seq, true, NULL_TREE);
> +  if (seq)
> +    gsi_insert_seq_on_edge_immediate (pre_e, seq);
> +  seq = NULL;
> +  new_bnd = force_gimple_operand (new_bnd, &seq, true, NULL_TREE);
> +  if (seq)
> +    gsi_insert_seq_on_edge_immediate (pre_e, seq);
> +
> +  /* new_i = phi (new_b, new_n)
> +     new_n = new_i + 1   */
> +  edge latch_e = loop_latch_edge (loop);
> +  const char *name = "idx";
> +  tree new_idx = make_temp_ssa_name (suitable_type, NULL, name);
> +  tree new_next = make_temp_ssa_name (suitable_type, NULL, name);
> +  gphi *newphi = create_phi_node (new_idx, loop->header);
> +  add_phi_arg (newphi, new_base, pre_e, UNKNOWN_LOCATION);
> +  add_phi_arg (newphi, new_next, latch_e, UNKNOWN_LOCATION);
> +
> +  /* New increase stmt.  */
> +  gimple *inc_stmt = data.inc_stmt;
> +  tree step = gimple_assign_rhs2 (inc_stmt);
> +  if (tree_int_cst_sign_bit(step))
> +    {
> +      wide_int v = wi::to_wide (step);
> +      v = wide_int::from (v, TYPE_PRECISION (suitable_type),
> +			  SIGNED);
> +
> +      step = wide_int_to_tree (suitable_type, v);
> +    }
> +  else
> +    step = fold_convert (suitable_type, step);
> +  new_idx = PHI_RESULT (newphi);
> +  tree_code inc_code = gimple_assign_rhs_code (inc_stmt);
> +  gimple *new_inc = gimple_build_assign (new_next, inc_code, new_idx, step);
> +  gimple_stmt_iterator gsi = gsi_for_stmt (inc_stmt);
> +  gsi_insert_before (&gsi, new_inc, GSI_SAME_STMT);
> +
> +  /* Update gcond.  */
> +  gcond *gc = data.gc;
> +  bool inv = bnd == gimple_cond_lhs (gc);
> +  tree cmp_idx = data.cmp_on_next ? new_next : new_idx;
> +  gimple_cond_set_lhs (gc, inv ? new_bnd : cmp_idx);
> +  gimple_cond_set_rhs (gc, inv ? cmp_idx : new_bnd);
> +  update_stmt (gc);
> +
> +  /* next = (next type)new_next
> +     And remove next = prev + 1.  */
> +  tree next = gimple_assign_lhs (inc_stmt);
> +  type = TREE_TYPE (next);
> +  gimple *stmt = gimple_build_assign (next, fold_convert (type, new_next));
> +  gsi = gsi_for_stmt (inc_stmt);
> +  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
> +
> +  gsi = gsi_for_stmt (inc_stmt);
> +  gsi_remove (&gsi, true);
> +
> +  /* prev = (prev type)new_prev
> +     And remove prev = phi.  */
> +  tree idx = PHI_RESULT (phi);
> +  type = TREE_TYPE (idx);
> +  stmt = gimple_build_assign (idx, fold_convert (type, new_idx));
> +  gsi = gsi_after_labels (loop->header);
> +  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
> +
> +  gsi = gsi_for_stmt (phi);
> +  gsi_remove (&gsi, true);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, ";; Loop index type is updated to use faster
> type.\n");
> +
> +  return true;
> +}
> +
> +/* Split out a new loop which would not wrap,
> +   under the guard that NO_WRAP_COND will not be true. */
> +
> +static bool
> +split_wrap_boundary (class loop *loop, edge e, tree no_wrap_cond,
> +		     bool is_negative_step)
> +{
> +  /* Convert the condition into a suitable gcond.  */
> +  gimple_seq stmts = NULL;
> +  no_wrap_cond = force_gimple_operand_1 (no_wrap_cond, &stmts,
> +					 is_gimple_condexpr, NULL_TREE);
> +
> +  /* Version the loop.  */
> +  initialize_original_copy_tables ();
> +  basic_block cond_bb;
> +  loop_version (loop, no_wrap_cond, &cond_bb,
> +		profile_probability::very_likely (),
> +		profile_probability::very_unlikely (),
> +		profile_probability::very_likely (),
> +		profile_probability::very_unlikely (), true);
> +  free_original_copy_tables ();
> +
> +  /* Insert the statements that feed COND.  */
> +  if (stmts)
> +    {
> +      gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
> +      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
> +    }
> +
> +  /* Update gcond code.  */
> +  gcond *gc = as_a<gcond *> (last_stmt (e->src));
> +  enum tree_code code = gimple_cond_code (gc);
> +  enum tree_code new_code;
> +  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
> +  if (is_negative_step)
> +    new_code = inv ? LT_EXPR : GT_EXPR;
> +  else
> +    new_code = inv ? GT_EXPR : LT_EXPR;
> +  if (code == NE_EXPR || code == EQ_EXPR)
> +    gimple_cond_set_code (gc, new_code);
> +
> +  /* Swap edge.  */
> +  if (code == EQ_EXPR)
> +    {
> +      edge out = EDGE_SUCC (e->src, 0);
> +      edge in = EDGE_SUCC (e->src, 1);
> +      if (in->flags & EDGE_TRUE_VALUE)
> +	std::swap (in, out);
> +      in->flags |= EDGE_TRUE_VALUE;
> +      in->flags &= ~EDGE_FALSE_VALUE;
> +      out->flags |= EDGE_FALSE_VALUE;
> +      out->flags &= ~EDGE_TRUE_VALUE;
> +    }
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, ";; Loop split on wrap index.\n");
> +
> +  return true;
> +}
> +
> +/* Split loop if there is possible wrap.
> +   For example:
> +   transform
> +
> +    void
> +    foo (int *a, int *b, unsigned l, unsigned u_n)
> +    {
> +      while (++l != u_n)
> +	a[l] = b[l]  + 1;
> +    }
> +
> +   to:
> +    if (l < u_n)
> +    {
> +      int li = l;
> +      int n = u_n;
> +      while (++li < n)
> +	a[li] = b[li]  + 1;
> +      l = li;
> +    }
> +    else
> +      while (++l != n)
> +	a[l] = b[l]  + 1;
> +  */
> +static bool
> +split_loop_on_wrap (class loop *loop)
> +{
> +  edge e;
> +  idx_elements data;
> +  tree no_wrap = get_wrap_assumption (loop, &e, data);
> +
> +  if (!no_wrap)
> +    return false;
> +
> +  int num = 0;
> +  basic_block *bbs = get_loop_body (loop);
> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> +    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
> +
> +  if (num > param_max_peeled_insns)
> +    {
> +      free (bbs);
> +      return false;
> +    }
> +
> +  if (!can_copy_bbs_p (bbs, loop->num_nodes))
> +    {
> +      free (bbs);
> +      return false;
> +    }
> +
> +  free (bbs);
> +
> +  if (integer_onep (no_wrap))
> +    return update_idx_bnd_type (loop, data);
> +
> +  if (split_wrap_boundary (loop, e, no_wrap, tree_int_cst_sign_bit
> (data.step)))
> +    {
> +      update_idx_bnd_type (loop, data);
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* Main entry point.  Perform loop splitting on all suitable loops.  */
> 
>  static unsigned int
> @@ -1622,7 +2085,8 @@ tree_ssa_split_loops (void)
>        if (optimize_loop_for_size_p (loop))
>  	continue;
> 
> -      if (split_loop (loop) || split_loop_on_cond (loop))
> +      if (split_loop (loop) || split_loop_on_cond (loop)
> +	  || split_loop_on_wrap (loop))
>  	{
>  	  /* Mark our containing loop as having had some split inner loops.
> */
>  	  loop_outer (loop)->aux = loop;
> 
> 
> >> Here is the updated patch, thanks for your time!
> > 
> > Updates:
> > . Enhance code to support negative step.
> > . Check step +-1 to make sure it hits loop condition !=
> > . Enhance runtime cases to check more boundary cases and run order cases.
> > . Refine for compiling time: check loop num of insns and can_copy_bbs_p
> > later
> > 
> >> 
> >> diff --git a/gcc/testsuite/gcc.dg/loop-split1.c
> >> b/gcc/testsuite/gcc.dg/loop-split1.c
> >> new file mode 100644
> >> index 00000000000..dd2d03a7b96
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/loop-split1.c
> >> @@ -0,0 +1,101 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
> >> +
> >> +void
> >> +foo (int *a, int *b, unsigned l, unsigned n)
> >> +{
> >> +  while (++l != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +void
> >> +foo_1 (int *a, int *b, unsigned n)
> >> +{
> >> +  unsigned l = 0;
> >> +  while (++l != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +
> >> +void
> >> +foo1 (int *a, int *b, unsigned l, unsigned n)
> >> +{
> >> +  while (l++ != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +
> >> +/* No wrap.  */
> >> +void
> >> +foo1_1 (int *a, int *b, unsigned n)
> >> +{
> >> +  unsigned l = 0;
> >> +  while (l++ != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +
> >> +unsigned
> >> +foo2 (char *a, char *b, unsigned l, unsigned n)
> >> +{
> >> +  while (++l != n)
> >> +    if (a[l] != b[l])
> >> +      break;
> >> +
> >> +  return l;
> >> +}
> >> +
> >> +unsigned
> >> +foo2_1 (char *a, char *b, unsigned l, unsigned n)
> >> +{
> >> +  l = 0;
> >> +  while (++l != n)
> >> +    if (a[l] != b[l])
> >> +      break;
> >> +
> >> +  return l;
> >> +}
> >> +
> >> +unsigned
> >> +foo3 (char *a, char *b, unsigned l, unsigned n)
> >> +{
> >> +  while (l++ != n)
> >> +    if (a[l] != b[l])
> >> +      break;
> >> +
> >> +  return l;
> >> +}
> >> +
> >> +/* No wrap.  */
> >> +unsigned
> >> +foo3_1 (char *a, char *b, unsigned l, unsigned n)
> >> +{
> >> +  l = 0;
> >> +  while (l++ != n)
> >> +    if (a[l] != b[l])
> >> +      break;
> >> +
> >> +  return l;
> >> +}
> >> +
> >> +void
> >> +bar ();
> >> +void
> >> +foo4 (unsigned n, unsigned i)
> >> +{
> >> +  do
> >> +    {
> >> +      if (i == n)
> >> +	return;
> >> +      bar ();
> >> +      ++i;
> >> +    }
> >> +  while (1);
> >> +}
> >> +
> >> +unsigned
> >> +find_skip_diff (char *p, char *q, unsigned n, unsigned i)
> >> +{
> >> +  while (p[i] == q[i] && ++i != n)
> >> +    p++, q++;
> >> +
> >> +  return i;
> >> +}
> >> +
> >> +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */
> >> diff --git a/gcc/testsuite/gcc.dg/loop-split2.c
> >> b/gcc/testsuite/gcc.dg/loop-split2.c
> >> new file mode 100644
> >> index 00000000000..56377e2f2f5
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/loop-split2.c
> >> @@ -0,0 +1,155 @@
> >> +/* { dg-do run } */
> >> +/* { dg-options "-O3" } */
> >> +
> >> +extern void
> >> +abort (void);
> >> +extern void
> >> +exit (int);
> >> +void
> >> +push (int);
> >> +
> >> +#define NI __attribute__ ((noinline))
> >> +
> >> +void NI
> >> +foo (int *a, int *b, unsigned char l, unsigned char n)
> >> +{
> >> +  while (++l != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +
> >> +unsigned NI
> >> +bar (int *a, int *b, unsigned char l, unsigned char n)
> >> +{
> >> +  while (l++ != n)
> >> +    {
> >> +      push (l);
> >> +      if (a[l] != b[l])
> >> +	break;
> >> +      push (l + 1);
> >> +    }
> >> +  return l;
> >> +}
> >> +
> >> +void NI
> >> +foo_1 (int *a, int *b, unsigned char l, unsigned char n)
> >> +{
> >> +  while (--l != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +
> >> +unsigned NI
> >> +bar_1 (int *a, int *b, unsigned char l, unsigned char n)
> >> +{
> >> +  while (l-- != n)
> >> +    {
> >> +      push (l);
> >> +      if (a[l] != b[l])
> >> +	break;
> >> +      push (l + 1);
> >> +    }
> >> +
> >> +  return l;
> >> +}
> >> +
> >> +int a[258];
> >> +int b[258];
> >> +int c[1024];
> >> +static int top = 0;
> >> +void
> >> +push (int e)
> >> +{
> >> +  c[top++] = e;
> >> +}
> >> +
> >> +void
> >> +reset ()
> >> +{
> >> +  top = 0;
> >> +  __builtin_memset (c, 0, sizeof (c));
> >> +}
> >> +
> >> +#define check(a, b) (a == b)
> >> +
> >> +int
> >> +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5)
> >> +{
> >> +  return check (c[0], a0) && check (c[1], a1) && check (c[2], a2)
> >> +	 && check (c[3], a3) && check (c[4], a4) && check (c[5], a5);
> >> +}
> >> +
> >> +int
> >> +main ()
> >> +{
> >> +  __builtin_memcpy (b, a, sizeof (a));
> >> +  reset ();
> >> +  if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9)
> >> +      || !check_c (c + 496, 254, 255, 255, 256, 0, 1))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 0, 0))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, 1))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar_1 (a, b, 6, 6) != 5)
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, 256))
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0))
> >> +    abort ();
> >> +
> >> +  b[100] += 1;
> >> +  reset ();
> >> +  if (bar (a, b, 90, 110) != 100)
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar (a, b, 110, 105) != 100)
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar_1 (a, b, 90, 110) != 109)
> >> +    abort ();
> >> +
> >> +  reset ();
> >> +  if (bar_1 (a, b, 2, 90) != 100)
> >> +    abort ();
> >> +
> >> +  foo (a, b, 99, 99);
> >> +  a[99] = b[99] + 1;
> >> +  for (int i = 0; i < 256; i++)
> >> +    if (a[i] != b[i] + 1)
> >> +      abort ();
> >> +
> >> +  foo_1 (a, b, 99, 99);
> >> +  a[99] = b[99] + 1;
> >> +  for (int i = 0; i < 256; i++)
> >> +    if (a[i] != b[i] + 1)
> >> +      abort ();
> >> +
> >> +  exit (0);
> >> +}
> >> diff --git a/gcc/testsuite/gcc.dg/loop-split3.c
> >> b/gcc/testsuite/gcc.dg/loop-split3.c
> >> new file mode 100644
> >> index 00000000000..ec93ee8bd12
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/loop-split3.c
> >> @@ -0,0 +1,62 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
> >> +
> >> +void
> >> +foo (int *a, int *b, unsigned l, unsigned n)
> >> +{
> >> +  while (--l != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +
> >> +void
> >> +foo1 (int *a, int *b, unsigned l, unsigned n)
> >> +{
> >> +  while (l-- != n)
> >> +    a[l] = b[l] + 1;
> >> +}
> >> +
> >> +unsigned
> >> +foo2 (char *a, char *b, unsigned l, unsigned n)
> >> +{
> >> +  while (--l != n)
> >> +    if (a[l] != b[l])
> >> +      break;
> >> +
> >> +  return l;
> >> +}
> >> +
> >> +unsigned
> >> +foo3 (char *a, char *b, unsigned l, unsigned n)
> >> +{
> >> +  while (l-- != n)
> >> +    if (a[l] != b[l])
> >> +      break;
> >> +
> >> +  return l;
> >> +}
> >> +
> >> +void
> >> +bar ();
> >> +void
> >> +foo4 (unsigned n, unsigned i)
> >> +{
> >> +  do
> >> +    {
> >> +      if (i == n)
> >> +	return;
> >> +      bar ();
> >> +      --i;
> >> +    }
> >> +  while (1);
> >> +}
> >> +
> >> +unsigned
> >> +find_skip_diff (char *p, char *q, unsigned n, unsigned i)
> >> +{
> >> +  while (p[i] == q[i] && --i != n)
> >> +    p--, q--;
> >> +
> >> +  return i;
> >> +}
> >> +
> >> +/* { dg-final { scan-tree-dump-times "Loop split" 6 "lsplit" } } */
> >> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> >> index 3a09bbc39e5..e9f23b32186 100644
> >> --- a/gcc/tree-ssa-loop-split.c
> >> +++ b/gcc/tree-ssa-loop-split.c
> >> @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
> >>  #include "cfghooks.h"
> >>  #include "gimple-fold.h"
> >>  #include "gimplify-me.h"
> >> +#include "tree-ssa-loop-ivopts.h"
> >> 
> >>  /* This file implements two kinds of loop splitting.
> >> 
> >> @@ -229,11 +230,14 @@ easy_exit_values (class loop *loop)
> >>     conditional).  I.e. the second loop can now be entered either
> >>     via the original entry or via NEW_E, so the entry values of LOOP2
> >>     phi nodes are either the original ones or those at the exit
> >> -   of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
> >> -   this.  The loops need to fulfill easy_exit_values().  */
> >> +   of LOOP1.  Selecting the previous value instead next value as the
> >> +   exit value of LOOP1 if USE_PREV is true.  Insert new phi nodes in
> >> +   LOOP2 pre-header reflecting this.  The loops need to fulfill
> >> +   easy_exit_values().  */
> >> 
> >> static void
> >> -connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
> >> +connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
> >> +		   bool use_prev = false)
> >>  {
> >>    basic_block rest = loop_preheader_edge (loop2)->src;
> >>    gcc_assert (new_e->dest == rest);
> >> @@ -279,7 +283,8 @@ connect_loop_phis (class loop *loop1, class loop
> >> *loop2, edge new_e)
> >> 
> >>        gphi * newphi = create_phi_node (new_init, rest);
> >>        add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
> >> -      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
> >> +      add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next,
> >> new_e,
> >> +		   UNKNOWN_LOCATION);
> >>        SET_USE (op, new_init);
> >>      }
> >> }
> >> @@ -1593,6 +1598,252 @@ split_loop_on_cond (struct loop *loop)
> >>    return do_split;
> >>  }
> >> 
> >> +/* Check if the LOOP exit branch is like "if (idx != bound)",
> >> +   Return the branch edge which exit loop, if wrap may happen on "idx".
> >> */
> >> +
> >> +static edge
> >> +get_ne_cond_branch (struct loop *loop, tree *step)
> >> +{
> >> +  int i;
> >> +  edge e;
> >> +
> >> +  auto_vec<edge> edges = get_loop_exit_edges (loop);
> >> +  FOR_EACH_VEC_ELT (edges, i, e)
> >> +    {
> >> +      basic_block bb = e->src;
> >> +
> >> +      /* Check if exit at gcond.  */
> >> +      gimple *last = last_stmt (bb);
> >> +      if (!last || gimple_code (last) != GIMPLE_COND)
> >> +	continue;
> >> +      gcond *cond = as_a<gcond *> (last);
> >> +      enum tree_code code = gimple_cond_code (cond);
> >> +      if (!((code == NE_EXPR && (e->flags & EDGE_FALSE_VALUE))
> >> +	    || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE))))
> >> +	continue;
> >> +
> >> +      /* Check if bound is invarant.  */
> >> +      tree idx = gimple_cond_lhs (cond);
> >> +      tree bnd = gimple_cond_rhs (cond);
> >> +      if (expr_invariant_in_loop_p (loop, idx))
> >> +	std::swap (idx, bnd);
> >> +      else if (!expr_invariant_in_loop_p (loop, bnd))
> >> +	continue;
> >> +
> >> +      /* Only unsigned type conversion could cause wrap.  */
> >> +      tree type = TREE_TYPE (idx);
> >> +      if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME
> >> +	  || !TYPE_UNSIGNED (type))
> >> +	continue;
> >> +
> >> +      /* Avoid to split if bound is MAX/MIN val.  */
> >> +      tree bound_type = TREE_TYPE (bnd);
> >> +      if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type))
> >> +	  || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type)))
> >> +	continue;
> >> +
> >> +      /* Check if there is possible wrap.  */
> >> +      class tree_niter_desc niter;
> >> +      if (!number_of_iterations_exit (loop, e, &niter, false, false))
> >> +	continue;
> >> +      if (niter.control.no_overflow)
> >> +	return NULL;
> >> +      if (niter.cmp != NE_EXPR)
> >> +	continue;
> >> +      if (!integer_onep (niter.control.step)
> >> +	  && !integer_minus_onep (niter.control.step))
> >> +	continue;
> >> +      *step = niter.control.step;
> >> +
> >> +      /* If exit edge is just before the empty latch, it is easy to link
> >> +	 the split loops: just jump from the exit edge of one loop to the
> >> +	 header of new loop.  */
> >> +      if (single_pred_p (loop->latch)
> >> +	  && single_pred_edge (loop->latch)->src == bb
> >> +	  && empty_block_p (loop->latch))
> >> +	return e;
> >> +
> >> +      /* If exit edge is at end of header, and header contains i++ or ++i,
> >> +	 only, it is simple to link the split loops: jump from the end of
> >> +	 one loop header to the new loop header, and use unchanged PHI result
> >> +	 of the first loop as the entry PHI value of the second loop.  */
> >> +      if (bb == loop->header)
> >> +	{
> >> +	  /* Only one phi.  */
> >> +	  gphi_iterator psi = gsi_start_phis (bb);
> >> +	  if (gsi_end_p (psi))
> >> +	    continue;
> >> +	  gphi *phi = psi.phi ();
> >> +	  gsi_next (&psi);
> >> +	  if (!gsi_end_p (psi))
> >> +	    continue;
> >> +
> >> +	  /* Check it is ++i or ++i */
> >> +	  tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
> >> +	  tree prev = PHI_RESULT (phi);
> >> +	  if (idx != prev && idx != next)
> >> +	    continue;
> >> +
> >> +	  gimple_stmt_iterator gsi = gsi_start_nondebug_after_labels_bb (bb);
> >> +	  if (gsi_end_p (gsi))
> >> +	    continue;
> >> +	  gimple *s1 = gsi_stmt (gsi);
> >> +	  if (!is_gimple_assign (s1) || gimple_assign_lhs (s1) != next
> >> +	      || gimple_assign_rhs1 (s1) != prev)
> >> +	    continue;
> >> +
> >> +	  gsi_next_nondebug (&gsi);
> >> +	  if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond)
> >> +	    return e;
> >> +	}
> >> +    }
> >> +
> >> +  return NULL;
> >> +}
> >> +
> >> +/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR.
> >> */
> >> +
> >> +static bool
> >> +split_ne_loop (struct loop *loop, edge cond_e, tree step)
> >> +{
> >> +  initialize_original_copy_tables ();
> >> +
> >> +  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
> >> +				     profile_probability::always (),
> >> +				     profile_probability::never (),
> >> +				     profile_probability::always (),
> >> +				     profile_probability::always (), true);
> >> +
> >> +  gcc_assert (loop2);
> >> +  update_ssa (TODO_update_ssa);
> >> +
> >> +  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
> >> +  free_original_copy_tables ();
> >> +
> >> +  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
> >> +  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
> >> +
> >> +  /* Invert edges for gcond.  */
> >> +  if (gimple_cond_code (gc) == EQ_EXPR)
> >> +    {
> >> +      auto invert_edge = [](basic_block bb) {
> >> +	edge out = EDGE_SUCC (bb, 0);
> >> +	edge in = EDGE_SUCC (bb, 1);
> >> +	if (in->flags & EDGE_TRUE_VALUE)
> >> +	  std::swap (in, out);
> >> +	in->flags |= EDGE_TRUE_VALUE;
> >> +	in->flags &= ~EDGE_FALSE_VALUE;
> >> +	out->flags |= EDGE_FALSE_VALUE;
> >> +	out->flags &= ~EDGE_TRUE_VALUE;
> >> +      };
> >> +
> >> +      invert_edge (gimple_bb (gc));
> >> +      invert_edge (gimple_bb (dup_gc));
> >> +    }
> >> +
> >> +  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
> >> +  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
> >> +  if (tree_int_cst_sign_bit (step))
> >> +    inv = !inv;
> >> +  enum tree_code first_loop_code = inv ? LT_EXPR : GT_EXPR;
> >> +  enum tree_code second_loop_code = inv ? GT_EXPR : LT_EXPR;
> >> +
> >> +  gimple_cond_set_code (gc, first_loop_code);
> >> +  gimple_cond_set_code (dup_gc, second_loop_code);
> >> +
> >> +  /* Link the exit cond edge to new loop.  */
> >> +  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
> >> +  edge pred_e = single_pred_edge (loop->latch);
> >> +  bool simple_loop
> >> +    = pred_e && pred_e->src == cond_e->src && empty_block_p (loop->latch);
> >> +  if (simple_loop)
> >> +    gimple_cond_set_code (break_cond, second_loop_code);
> >> +  else
> >> +    gimple_cond_make_true (break_cond);
> >> +
> >> +  basic_block break_bb = split_edge (cond_e);
> >> +  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
> >> +  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
> >> +
> >> +  edge to_exit = single_succ_edge (break_bb);
> >> +  edge to_new_loop = make_edge (break_bb, loop_preheader_edge
> >> (loop2)->src, 0);
> >> +  to_new_loop->flags |= EDGE_TRUE_VALUE;
> >> +  to_exit->flags |= EDGE_FALSE_VALUE;
> >> +  to_exit->flags &= ~EDGE_FALLTHRU;
> >> +  to_exit->probability = cond_e->probability;
> >> +  to_new_loop->probability = to_exit->probability.invert ();
> >> +
> >> +  update_ssa (TODO_update_ssa);
> >> +
> >> +  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
> >> +
> >> +  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
> >> +  if (dump_file && (dump_flags & TDF_DETAILS))
> >> +    fprintf (dump_file, ";; Loop split on wrap index.\n");
> >> +
> >> +  return true;
> >> +}
> >> +
> >> +/* Checks if LOOP contains a suitable NE_EXPR conditional block to split.
> >> +L_H:
> >> + if (i!=N)
> >> +   S;
> >> + else
> >> +   goto EXIT;
> >> + i++;
> >> + goto L_H;
> >> +
> >> +The "i!=N" is like "i>N || i<N", then it could be transformed to:
> >> +
> >> +L_H:
> >> + if (i>N)
> >> +   S;
> >> + else
> >> +   goto EXIT;
> >> + i++;
> >> + goto L_H;
> >> +L1_H:
> >> + if (i<N)
> >> +   S;
> >> + else
> >> +   goto EXIT;
> >> + i++;
> >> + goto L1_H;
> >> +
> >> +The loop with "i<N" is in favor of both GIMPLE and RTL passes.  */
> >> +
> >> +static bool
> >> +split_loop_on_ne_cond (class loop *loop)
> >> +{
> >> +  tree step;
> >> +  edge branch_edge = get_ne_cond_branch (loop, &step);
> >> +  if (!branch_edge)
> >> +    return false;
> >> +
> >> +  int num = 0;
> >> +  basic_block *bbs = get_loop_body (loop);
> >> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> >> +    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
> >> +
> >> +  if (num > param_max_peeled_insns)
> >> +    {
> >> +      free (bbs);
> >> +      return false;
> >> +    }
> >> +
> >> +  if (!can_copy_bbs_p (bbs, loop->num_nodes))
> >> +    {
> >> +      free (bbs);
> >> +      return false;
> >> +    }
> >> +  free (bbs);
> >> +
> >> +  if (split_ne_loop (loop, branch_edge, step))
> >> +    return true;
> >> +
> >> +  return false;
> >> +}
> >> +
> >> /* Main entry point.  Perform loop splitting on all suitable loops.  */
> >> 
> >> static unsigned int
> >> @@ -1622,7 +1873,8 @@ tree_ssa_split_loops (void)
> >>         if (optimize_loop_for_size_p (loop))
> >>   continue;
> >> 
> >> -      if (split_loop (loop) || split_loop_on_cond (loop))
> >> +      if (split_loop (loop) || split_loop_on_cond (loop)
> >> +	  || split_loop_on_ne_cond (loop))
> >>   {
> >> 	  /* Mark our containing loop as having had some split inner loops.
> >> */
> >>     loop_outer (loop)->aux = loop;
> 
>
Jiufu Guo July 23, 2021, 3:31 a.m. UTC | #2
On 2021-06-21 20:36, Richard Biener wrote:
> On Mon, 21 Jun 2021, guojiufu wrote:
> 
>> On 2021-06-21 14:19, guojiufu via Gcc-patches wrote:
>> > On 2021-06-09 19:18, guojiufu wrote:
>> >> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote:
>> >>> On 2021-06-08 18:13, Richard Biener wrote:
>> >>>> On Fri, 4 Jun 2021, Jiufu Guo wrote:
>> >>>>
>> >>> cut...
>> > cut...
>> >>
>> 
>> Besides the method in the previous mails, 
>> I’m thinking of another way to split loops:
>> 
>> foo (int *a, int *b, unsigned k, unsigned n)
>> {                                                               
>>  while (++k != n)
>>    a[k] = b[k] + 1;                               
>> } 
>> 
>> We may split it into:
>> if (k<n)
>> {
>>   while (++k < n)  //loop1
>>    a[k] = b[k] + 1;   
>> }
>> else
>> {
>>  while (++k != n) //loop2
>>    a[k] = b[k] + 1;  
>> }
>> 
>> In most cases, loop1 would be hit, the overhead of this method is only
>> checking “if (k<n)”
>> which would be smaller than the previous method.
> 
> That would be your original approach of versioning the loop.  I think
> I suggested that for this scalar evolution and dataref analysis should
> be enhanced to build up conditions under which IV evolutions are
> affine (non-wrapping) and the versioning code in actual transforms
> should then do the appropriate versioning (like the vectorizer already
> does for niter analysis ->assumptions for example).

Hi Richi,

Thanks for your suggestion!

The original idea was trying to cover cases like multi-exit loops, while 
it
seems not benefited too much.  The method you said would help for common 
cases.

I'm thinking of the methods to implement this:
During scev analyzing, add more possible wrap checking (especially for 
unsigned)
for convert_affine_scev/scev_probably_wraps_p/chrec_convert_1;  
introducing
no_wrap_assumption for the conditions of no-wrapping on given chrec/iv.
And using this assumption in simple_iv_with_niters/dr_analyze_innermost.

One question is, where is the best place to add this assumption?
Is it a flexible idea to add no_wrap_assumption to affine_iv/loop, and
set the assumption when scev checks wrap?

Thanks for your suggestions!

BR.
Jiufu


> 
> Richard.
> 
cut....
Richard Biener July 28, 2021, 10:50 a.m. UTC | #3
On Fri, 23 Jul 2021, guojiufu wrote:

> On 2021-06-21 20:36, Richard Biener wrote:
> > On Mon, 21 Jun 2021, guojiufu wrote:
> > 
> >> On 2021-06-21 14:19, guojiufu via Gcc-patches wrote:
> >> > On 2021-06-09 19:18, guojiufu wrote:
> >> >> On 2021-06-09 17:42, guojiufu via Gcc-patches wrote:
> >> >>> On 2021-06-08 18:13, Richard Biener wrote:
> >> >>>> On Fri, 4 Jun 2021, Jiufu Guo wrote:
> >> >>>>
> >> >>> cut...
> >> > cut...
> >> >>
> >> 
> >> Besides the method in the previous mails, 
> >> I’m thinking of another way to split loops:
> >> 
> >> foo (int *a, int *b, unsigned k, unsigned n)
> >> {                                                               
> >>  while (++k != n)
> >>    a[k] = b[k] + 1;                               
> >> } 
> >> 
> >> We may split it into:
> >> if (k<n)
> >> {
> >>   while (++k < n)  //loop1
> >>    a[k] = b[k] + 1;   
> >> }
> >> else
> >> {
> >>  while (++k != n) //loop2
> >>    a[k] = b[k] + 1;  
> >> }
> >> 
> >> In most cases, loop1 would be hit, the overhead of this method is only
> >> checking “if (k<n)”
> >> which would be smaller than the previous method.
> > 
> > That would be your original approach of versioning the loop.  I think
> > I suggested that for this scalar evolution and dataref analysis should
> > be enhanced to build up conditions under which IV evolutions are
> > affine (non-wrapping) and the versioning code in actual transforms
> > should then do the appropriate versioning (like the vectorizer already
> > does for niter analysis ->assumptions for example).
> 
> Hi Richi,
> 
> Thanks for your suggestion!
> 
> The original idea was trying to cover cases like multi-exit loops, while it
> seems not benefited too much.  The method you said would help for common
> cases.
> 
> I'm thinking of the methods to implement this:
> During scev analyzing, add more possible wrap checking (especially for
> unsigned)
> for convert_affine_scev/scev_probably_wraps_p/chrec_convert_1;  introducing
> no_wrap_assumption for the conditions of no-wrapping on given chrec/iv.
> And using this assumption in simple_iv_with_niters/dr_analyze_innermost.
> 
> One question is, where is the best place to add this assumption?
> Is it a flexible idea to add no_wrap_assumption to affine_iv/loop, and
> set the assumption when scev checks wrap?

I'm not sure what you exactly mean here.  I was thinking of
making analyze_scalar_evolution return more than a plain
tree, for example by providing an alternate (optional) output,
for example a pointer to some new scev_info that could initially
be just

struct scev_info {
  tree assumptions;
};

when analyze_scalar_evolution recurses there are certain points
it can end up returning chrec_dont_know.  If we passed in the
alternate output there might be the possibility to add
to the assumption to make the result well-defined (and yes,
this extends to chrec_fold_* routines, mostly chrec_convert
I think).  Handled cases could be added piecewise, just passing
down the scev_info pointer will be intrusive initially.

For simple_iv there's already a struct we could add 'assumptions'
to (and maybe a flag to the API whether assumptions are allowed).

For DR analysis the same could be done.

Richard.

> Thanks for your suggestions!
> 
> BR.
> Jiufu
> 
> 
> > 
> > Richard.
> > 
> cut....
> 
>
diff mbox series

Patch

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..c9d161565e4 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -41,6 +41,7 @@  along with GCC; see the file COPYING3.  If not see
  #include "cfghooks.h"
  #include "gimple-fold.h"
  #include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"

  /* This file implements two kinds of loop splitting.

@@ -1593,6 +1594,468 @@  split_loop_on_cond (struct loop *loop)
    return do_split;
  }

+/* Filter out type conversions on IDX.
+   Store the shortest type during conversion to SMALL_TYPE.
+   Store the longest type during conversion to LARGE_TYPE.  */
+
+static gimple *
+filter_conversions (class loop *loop, tree idx, tree *small_type = 
NULL,
+		    tree *large_type = NULL)
+{
+  gcc_assert (TREE_CODE (idx) == SSA_NAME);
+  gimple *stmt = SSA_NAME_DEF_STMT (idx);
+  while (is_gimple_assign (stmt)
+	 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
+    {
+      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
+	{
+	  idx = gimple_assign_rhs1 (stmt);
+	  if (small_type)
+	    {
+	      tree type = TREE_TYPE (idx);
+	      if (TYPE_PRECISION (*small_type) > TYPE_PRECISION (type)
+		  || (TYPE_PRECISION (*small_type) == TYPE_PRECISION (type)
+		      && TYPE_UNSIGNED (*small_type) && !TYPE_UNSIGNED (type)))
+		*small_type = type;
+	    }
+	  if (large_type)
+	    {
+	      tree type = TREE_TYPE (idx);
+	      if (TYPE_PRECISION (*large_type) < TYPE_PRECISION (type)
+		  || (TYPE_PRECISION (*large_type) == TYPE_PRECISION (type)
+		      && !TYPE_UNSIGNED (*large_type) && TYPE_UNSIGNED (type)))
+		*large_type = type;
+	    }
+	}
+      else
+	break;
+
+      if (TREE_CODE (idx) != SSA_NAME)
+	break;
+      stmt = SSA_NAME_DEF_STMT (idx);
+    }
+  return stmt;
+}
+
+/* Collection of loop index related elements.  */
+struct idx_elements
+{
+  gcond *gc;
+  gphi *phi;
+  gimple *inc_stmt;
+  tree idx;
+  tree bnd;
+  tree step;
+  tree large_type;
+  tree small_type;
+  bool cmp_on_next;
+};
+
+/*  Analyze and get the idx related elements: bnd,
+    phi, increase stmt from exit edge E, etc.
+
+    i = phi (b, n)
+    ...
+    n0 = ik + 1
+    n1 = (type)n0
+    ...
+    if (i != bnd) or if (n != bnd)
+    ...
+    n = ()nl
+
+   IDX is the i' or n'.  */
+
+bool
+analyze_idx_elements (class loop *loop, edge e, idx_elements &data)
+{
+  /* Avoid complicated edge.  */
+  if (e->flags & EDGE_FAKE)
+    return false;
+  if (e->src != loop->header && e->src != single_pred (loop->latch))
+    return false;
+  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->src))
+    return false;
+
+  /* Check gcond.  */
+  gimple *last = last_stmt (e->src);
+  if (!last || gimple_code (last) != GIMPLE_COND)
+    return false;
+
+  /* Get idx and bnd from gcond. */
+  gcond *gc = as_a<gcond *> (last);
+  tree bnd = gimple_cond_rhs (gc);
+  tree idx = gimple_cond_lhs (gc);
+  if (expr_invariant_in_loop_p (loop, idx))
+    std::swap (idx, bnd);
+  else if (!expr_invariant_in_loop_p (loop, bnd))
+    return false;
+  if (TREE_CODE (idx) != SSA_NAME)
+    return false;
+
+  gimple *inc_stmt = NULL;
+  bool cmp_next = false;
+  tree small_type = TREE_TYPE (idx);
+  tree large_type = small_type;
+  gimple *stmt = filter_conversions (loop, idx, &small_type, 
&large_type);
+  /* The idx on gcond is not PHI, it would be next. */
+  if (is_gimple_assign (stmt))
+    {
+      tree rhs = gimple_assign_rhs1 (stmt);
+      if (TREE_CODE (rhs) != SSA_NAME)
+	return false;
+
+      cmp_next = true;
+      inc_stmt = stmt;
+      stmt = filter_conversions (loop, rhs, &small_type, &large_type);
+    }
+
+  /* Get phi and next.  */
+  if (gimple_code (stmt) != GIMPLE_PHI || gimple_bb (stmt) != 
loop->header)
+    return false;
+  gphi *phi = as_a<gphi *> (stmt);
+  tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+  if (TREE_CODE (next) != SSA_NAME)
+    return false;
+
+  /* The define of next should be the increasement stmt.  */
+  stmt = filter_conversions (loop, next, &small_type, &large_type);
+  if (inc_stmt != NULL && inc_stmt != stmt)
+    return false;
+  if (!is_gimple_assign (stmt)
+      || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
+    return false;
+  tree_code rhs_code = gimple_assign_rhs_code (stmt);
+  if (rhs_code != PLUS_EXPR)
+    return false;
+  tree step = gimple_assign_rhs2 (stmt);
+  if (!integer_minus_onep (step) && !integer_onep (step))
+    return false;
+  inc_stmt = stmt;
+  tree prev = gimple_assign_rhs1 (stmt);
+  if (TREE_CODE (prev) != SSA_NAME)
+    return false;
+  stmt = filter_conversions (loop, prev, &small_type, &large_type);
+  if (stmt != phi)
+    return false;
+
+  data.gc = gc;
+  data.phi = phi;
+  data.idx = idx;
+  data.bnd = bnd;
+  data.step = step;
+  data.large_type = large_type;
+  data.small_type = small_type;
+  data.inc_stmt = inc_stmt;
+  data.cmp_on_next = cmp_next;
+
+  return true;
+}
+
+/* Check if the loop is possible to wrap at index.
+   Return the assumption under which the wrap will not happen.
+   Return NULL_TREE, if wrap will not happen.  */
+
+static tree
+get_wrap_assumption (class loop *loop, edge *exit, idx_elements &data)
+{
+  int i;
+  edge e;
+  if (!single_pred_p (loop->latch) || !empty_block_p (loop->latch))
+    return NULL_TREE;
+
+  auto_vec<edge> edges = get_loop_exit_edges (loop);
+  FOR_EACH_VEC_ELT (edges, i, e)
+    {
+      if (!analyze_idx_elements (loop, e, data))
+	continue;
+
+      /* Check if bound is MAX/MIN val of a integral type.  */
+      tree bnd = data.bnd;
+      tree bnd_type = TREE_TYPE (bnd);
+      if (!INTEGRAL_TYPE_P (bnd_type) || !TYPE_UNSIGNED (bnd_type))
+	continue;
+      if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bnd_type))
+	  || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bnd_type)))
+	continue;
+
+      /* Check if it is "idx != bnd" or "idx < bnd".  */
+      gcond *gc = data.gc;
+      enum tree_code code = gimple_cond_code (gc);
+      if (bnd == gimple_cond_lhs (gc))
+	code = swap_tree_comparison (code);
+      if ((e->flags & EDGE_TRUE_VALUE) && code == EQ_EXPR)
+	code = NE_EXPR;
+      if (code != NE_EXPR && code != LT_EXPR && code != GT_EXPR)
+	continue;
+
+      /* Check if idx is iv with base and step.  */
+      affine_iv iv;
+      tree iv_niters = NULL_TREE;
+      tree idx = PHI_RESULT (data.phi);
+      if (!simple_iv_with_niters (loop, loop_containing_stmt (gc), idx, 
&iv,
+				  &iv_niters, false))
+	continue;
+      if (!iv.base || !iv.step || !tree_int_cst_equal (iv.step, 
data.step))
+	continue;
+
+      bool is_negative = tree_int_cst_sign_bit (iv.step);
+      enum tree_code expect_code = is_negative ? GT_EXPR : LT_EXPR;
+      if (code != NE_EXPR && code != expect_code)
+	continue;
+
+      /* Update the type for base, bound and the max value of boundary. 
  */
+      tree base = iv.base;
+      tree small_type = data.small_type;
+      tree large_type = data.large_type;
+      tree max_min_bnd = is_negative ? TYPE_MIN_VALUE (small_type)
+				     : TYPE_MAX_VALUE (small_type);
+      if (large_type != TREE_TYPE (base))
+	base = fold_convert (large_type, base);
+      if (large_type != TREE_TYPE (bnd))
+	bnd = fold_convert (large_type, bnd);
+      if (large_type != small_type)
+	max_min_bnd = fold_convert (large_type, max_min_bnd);
+
+      /* There is no wrap if bnd <= max value && base <= bnd.  */
+      enum tree_code expect_code1 = is_negative ? GE_EXPR : LE_EXPR;
+      tree no_wrap
+	= fold_build2 (expect_code1, boolean_type_node, bnd, max_min_bnd);
+      no_wrap
+	= fold_build2 (TRUTH_AND_EXPR, boolean_type_node, no_wrap,
+		       fold_build2 (expect_code1, boolean_type_node, base, bnd));
+
+      if (integer_zerop (no_wrap))
+	continue;
+
+      *exit = e;
+      return no_wrap;
+    }
+
+  return NULL_TREE;
+}
+
+/*  Update the idx and bnd with a suitable type for no wrap/oveflow 
loop.
+    Suitable type would be the largest type of the type conversion 
which
+    occur on index, if the largest type is unsigned, using sizetype.  
*/
+
+static bool
+update_idx_bnd_type (class loop *loop, idx_elements &data)
+{
+  gphi *phi = data.phi;
+  tree type = TREE_TYPE (PHI_RESULT (phi));
+  tree suitable_type = data.large_type;
+  if (TYPE_UNSIGNED (suitable_type))
+    {
+      if (TYPE_PRECISION (suitable_type) == TYPE_PRECISION (sizetype))
+	return false;
+      suitable_type = sizetype;
+    }
+
+  /* New base and new bound.  */
+  tree bnd = data.bnd;
+  edge pre_e = loop_preheader_edge (loop);
+  tree base = PHI_ARG_DEF_FROM_EDGE (phi, pre_e);
+  tree new_base = fold_convert (suitable_type, base);
+  tree new_bnd = fold_convert (suitable_type, bnd);
+  gimple_seq seq = NULL;
+  new_base = force_gimple_operand (new_base, &seq, true, NULL_TREE);
+  if (seq)
+    gsi_insert_seq_on_edge_immediate (pre_e, seq);
+  seq = NULL;
+  new_bnd = force_gimple_operand (new_bnd, &seq, true, NULL_TREE);
+  if (seq)
+    gsi_insert_seq_on_edge_immediate (pre_e, seq);
+
+  /* new_i = phi (new_b, new_n)
+     new_n = new_i + 1   */
+  edge latch_e = loop_latch_edge (loop);
+  const char *name = "idx";
+  tree new_idx = make_temp_ssa_name (suitable_type, NULL, name);
+  tree new_next = make_temp_ssa_name (suitable_type, NULL, name);
+  gphi *newphi = create_phi_node (new_idx, loop->header);
+  add_phi_arg (newphi, new_base, pre_e, UNKNOWN_LOCATION);
+  add_phi_arg (newphi, new_next, latch_e, UNKNOWN_LOCATION);
+
+  /* New increase stmt.  */
+  gimple *inc_stmt = data.inc_stmt;
+  tree step = gimple_assign_rhs2 (inc_stmt);
+  if (tree_int_cst_sign_bit(step))
+    {
+      wide_int v = wi::to_wide (step);
+      v = wide_int::from (v, TYPE_PRECISION (suitable_type),
+			  SIGNED);
+
+      step = wide_int_to_tree (suitable_type, v);
+    }
+  else
+    step = fold_convert (suitable_type, step);
+  new_idx = PHI_RESULT (newphi);
+  tree_code inc_code = gimple_assign_rhs_code (inc_stmt);
+  gimple *new_inc = gimple_build_assign (new_next, inc_code, new_idx, 
step);
+  gimple_stmt_iterator gsi = gsi_for_stmt (inc_stmt);
+  gsi_insert_before (&gsi, new_inc, GSI_SAME_STMT);
+
+  /* Update gcond.  */
+  gcond *gc = data.gc;
+  bool inv = bnd == gimple_cond_lhs (gc);
+  tree cmp_idx = data.cmp_on_next ? new_next : new_idx;
+  gimple_cond_set_lhs (gc, inv ? new_bnd : cmp_idx);
+  gimple_cond_set_rhs (gc, inv ? cmp_idx : new_bnd);
+  update_stmt (gc);
+
+  /* next = (next type)new_next
+     And remove next = prev + 1.  */
+  tree next = gimple_assign_lhs (inc_stmt);
+  type = TREE_TYPE (next);
+  gimple *stmt = gimple_build_assign (next, fold_convert (type, 
new_next));
+  gsi = gsi_for_stmt (inc_stmt);
+  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+  gsi = gsi_for_stmt (inc_stmt);
+  gsi_remove (&gsi, true);
+
+  /* prev = (prev type)new_prev
+     And remove prev = phi.  */
+  tree idx = PHI_RESULT (phi);
+  type = TREE_TYPE (idx);
+  stmt = gimple_build_assign (idx, fold_convert (type, new_idx));
+  gsi = gsi_after_labels (loop->header);
+  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+  gsi = gsi_for_stmt (phi);
+  gsi_remove (&gsi, true);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop index type is updated to use faster 
type.\n");
+
+  return true;
+}
+
+/* Split out a new loop which would not wrap,
+   under the guard that NO_WRAP_COND will not be true. */
+
+static bool
+split_wrap_boundary (class loop *loop, edge e, tree no_wrap_cond,
+		     bool is_negative_step)
+{
+  /* Convert the condition into a suitable gcond.  */
+  gimple_seq stmts = NULL;
+  no_wrap_cond = force_gimple_operand_1 (no_wrap_cond, &stmts,
+					 is_gimple_condexpr, NULL_TREE);
+
+  /* Version the loop.  */
+  initialize_original_copy_tables ();
+  basic_block cond_bb;
+  loop_version (loop, no_wrap_cond, &cond_bb,
+		profile_probability::very_likely (),
+		profile_probability::very_unlikely (),
+		profile_probability::very_likely (),
+		profile_probability::very_unlikely (), true);
+  free_original_copy_tables ();
+
+  /* Insert the statements that feed COND.  */
+  if (stmts)
+    {
+      gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+    }
+
+  /* Update gcond code.  */
+  gcond *gc = as_a<gcond *> (last_stmt (e->src));
+  enum tree_code code = gimple_cond_code (gc);
+  enum tree_code new_code;
+  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
+  if (is_negative_step)
+    new_code = inv ? LT_EXPR : GT_EXPR;
+  else
+    new_code = inv ? GT_EXPR : LT_EXPR;
+  if (code == NE_EXPR || code == EQ_EXPR)
+    gimple_cond_set_code (gc, new_code);
+
+  /* Swap edge.  */
+  if (code == EQ_EXPR)
+    {
+      edge out = EDGE_SUCC (e->src, 0);
+      edge in = EDGE_SUCC (e->src, 1);
+      if (in->flags & EDGE_TRUE_VALUE)
+	std::swap (in, out);
+      in->flags |= EDGE_TRUE_VALUE;
+      in->flags &= ~EDGE_FALSE_VALUE;
+      out->flags |= EDGE_FALSE_VALUE;
+      out->flags &= ~EDGE_TRUE_VALUE;
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop split on wrap index.\n");
+
+  return true;
+}
+
+/* Split loop if there is possible wrap.
+   For example:
+   transform
+
+    void
+    foo (int *a, int *b, unsigned l, unsigned u_n)
+    {
+      while (++l != u_n)
+	a[l] = b[l]  + 1;
+    }
+
+   to:
+    if (l < u_n)
+    {
+      int li = l;
+      int n = u_n;
+      while (++li < n)
+	a[li] = b[li]  + 1;
+      l = li;
+    }
+    else
+      while (++l != n)
+	a[l] = b[l]  + 1;
+  */
+static bool
+split_loop_on_wrap (class loop *loop)
+{
+  edge e;
+  idx_elements data;
+  tree no_wrap = get_wrap_assumption (loop, &e, data);
+
+  if (!no_wrap)
+    return false;
+
+  int num = 0;
+  basic_block *bbs = get_loop_body (loop);
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+
+  if (num > param_max_peeled_insns)
+    {
+      free (bbs);
+      return false;
+    }
+
+  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+    {
+      free (bbs);
+      return false;
+    }
+
+  free (bbs);
+
+  if (integer_onep (no_wrap))
+    return update_idx_bnd_type (loop, data);
+