diff mbox series

[1/2] Refactor final_value_replacement_loop [PR90594]

Message ID LV2PR01MB78393E51D5BC32B6C070E3CFF7312@LV2PR01MB7839.prod.exchangelabs.com
State New
Headers show
Series [1/2] Refactor final_value_replacement_loop [PR90594] | expand

Commit Message

Feng Xue OS Dec. 6, 2024, 1:55 p.m. UTC
This patch refactors the procedure in tree-scalar-evolution.cc in order to partially export its functionality to other module, so decomposes it to several relatively independent utility functions.

Thanks,
Feng
---
gcc/
	PR tree-optimization/90594
	* tree-scalar-evolution.cc (simple_scev_final_value): New function.
	(apply_scev_final_value_replacement): Likewise.
	(final_value_replacement_loop): Call new functions.
---
 gcc/tree-scalar-evolution.cc | 288 ++++++++++++++++++++---------------
 1 file changed, 165 insertions(+), 123 deletions(-)

Comments

Richard Biener Dec. 9, 2024, 10:46 a.m. UTC | #1
On Fri, Dec 6, 2024 at 2:56 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> This patch refactors the procedure in tree-scalar-evolution.cc in order to partially export its functionality to other module, so decomposes it to several relatively independent utility functions.
>
> Thanks,
> Feng
> ---
> gcc/
>         PR tree-optimization/90594
>         * tree-scalar-evolution.cc (simple_scev_final_value): New function.
>         (apply_scev_final_value_replacement): Likewise.
>         (final_value_replacement_loop): Call new functions.
> ---
>  gcc/tree-scalar-evolution.cc | 288 ++++++++++++++++++++---------------
>  1 file changed, 165 insertions(+), 123 deletions(-)
>
> diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
> index abb2bad7773..5733632aa78 100644
> --- a/gcc/tree-scalar-evolution.cc
> +++ b/gcc/tree-scalar-evolution.cc
> @@ -3775,13 +3775,17 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
>    return fold_build2 (code1, type, inv, match_op[0]);
>  }
>
> -/* Do final value replacement for LOOP, return true if we did anything.  */
> +/* For induction VALUE of LOOP, return true if its SCEV is simple enough that
> +   its final value at loop exit could be directly calculated based on the
> +   initial value and loop niter, and this value is recorded in FINAL_VALUE,
> +   also set REWRITE_OVERFLOW to true in the case that we need to rewrite the
> +   final value to avoid overflow UB when replacement would really happen
> +   later.  */
>
> -bool
> -final_value_replacement_loop (class loop *loop)
> +static bool
> +simple_scev_final_value (class loop *loop, tree value, tree *final_value,
> +                        bool *rewrite_overflow)
>  {
> -  /* If we do not know exact number of iterations of the loop, we cannot
> -     replace the final value.  */
>    edge exit = single_exit (loop);
>    if (!exit)
>      return false;
> @@ -3790,100 +3794,170 @@ final_value_replacement_loop (class loop *loop)
>    if (niter == chrec_dont_know)
>      return false;
>
> -  /* Ensure that it is possible to insert new statements somewhere.  */
> -  if (!single_pred_p (exit->dest))
> -    split_loop_exit_edge (exit);
> -
> -  /* Set stmt insertion pointer.  All stmts are inserted before this point.  */
> +  /* TODO: allow float value for fast math.  */
> +  if (!POINTER_TYPE_P (TREE_TYPE (value))
> +       && !INTEGRAL_TYPE_P (TREE_TYPE (value)))
> +    return false;
>
>    class loop *ex_loop
> -    = superloop_at_depth (loop,
> -                         loop_depth (exit->dest->loop_father) + 1);
> +    = superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1);
>
> -  bool any = false;
> -  gphi_iterator psi;
> -  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
> +  bool folded_casts;
> +  tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value,
> +                                              &folded_casts);
> +  tree bitinv_def, bit_def;
> +  unsigned HOST_WIDE_INT niter_num;
> +
> +  if (def != chrec_dont_know)
> +    def = compute_overall_effect_of_inner_loop (ex_loop, def);
> +
> +  /* Handle bitop with invariant induction expression.
> +
> +     .i.e
> +     for (int i =0 ;i < 32; i++)
> +       tmp &= bit2;
> +     if bit2 is an invariant in loop which could simple to tmp &= bit2.  */
> +  else if ((bitinv_def
> +               = analyze_and_compute_bitop_with_inv_effect (loop,
> +                                                            value, niter)))
> +    def = bitinv_def;
> +
> +  /* Handle bitwise induction expression.
> +
> +     .i.e.
> +     for (int i = 0; i != 64; i+=3)
> +       res &= ~(1UL << i);
> +
> +     RES can't be analyzed out by SCEV because it is not polynomially
> +     expressible, but in fact final value of RES can be replaced by
> +     RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
> +     being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
> +  else if (tree_fits_uhwi_p (niter)
> +          && (niter_num = tree_to_uhwi (niter)) != 0
> +          && niter_num < TYPE_PRECISION (TREE_TYPE (value))
> +          && (bit_def
> +                  = analyze_and_compute_bitwise_induction_effect (loop, value,
> +                                                                  niter_num)))
> +    def = bit_def;
> +
> +  bool cond_overflow_p;
> +  if (!tree_does_not_contain_chrecs (def)
> +      || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
> +      /* Moving the computation from the loop may prolong life range
> +        of some ssa names, which may cause problems if they appear
> +        on abnormal edges.  */
> +      || contains_abnormal_ssa_name_p (def)
> +      /* Do not emit expensive expressions.  The rationale is that
> +        when someone writes a code like
> +
> +        while (n > 45) n -= 45;
> +
> +        he probably knows that n is not large, and does not want it
> +        to be turned into n %= 45.  */
> +      || expression_expensive_p (def, &cond_overflow_p))
> +    return false;
> +
> +  *final_value = def;
> +
> +  if ((folded_casts
> +       && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
> +       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
> +      || cond_overflow_p)
> +    *rewrite_overflow = true;
> +  else
> +    *rewrite_overflow = false;
> +
> +  return true;
> +}
> +
> +/* Given a loop closed PHI, replace it with a new assignment from its
> +   FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to
> +   rewrite expressions in FINAL_VALUE to avoid overflow UB.  When FINAL_VALUE
> +   is constant, we could just propagate the constant, however, sometimes we
> +   have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set
> +   to true.  */
> +
> +static void
> +apply_scev_final_value_replacement (gphi *phi, tree final_value,
> +                                   bool rewrite_overflow, bool always_keep)
> +{
> +  tree rslt = gimple_phi_result (phi);
> +  gphi_iterator psi = gsi_for_phi (phi);
> +  gimple_stmt_iterator gsi;
> +  location_t loc = gimple_phi_arg_location (phi, 0);
> +  basic_block bb = gimple_bb (phi);
> +  gimple_seq stmts;
> +
> +  /* This is a degenerate PHI with only one argument.  */
> +  gcc_assert (gimple_phi_num_args (phi) == 1);
> +
> +  remove_phi_node (&psi, false);
> +
> +  /* Propagate constants immediately.  */
> +  if (CONSTANT_CLASS_P (final_value)
> +      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
>      {
> -      gphi *phi = psi.phi ();
> -      tree rslt = PHI_RESULT (phi);
> -      tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
> -      tree def = phidef;
> -      if (virtual_operand_p (def))
> -       {
> -         gsi_next (&psi);
> -         continue;
> -       }
> +      replace_uses_by (rslt, final_value);
> +
> +      /* Sometimes, we need to leave an unused initialization around to avoid
> +        invalidating the SCEV cache.  */
> +      if (!always_keep)
> +       return;
> +    }
> +
> +  tree def = force_gimple_operand (final_value, &stmts, false, NULL_TREE);
> +  gassign *ass = gimple_build_assign (rslt, def);
> +
> +  gimple_set_location (ass, loc);
> +  gimple_seq_add_stmt (&stmts, ass);
>
> -      if (!POINTER_TYPE_P (TREE_TYPE (def))
> -         && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
> +  /* If def's type has undefined overflow and there were folded casts, rewrite
> +     all stmts added for def into arithmetics with defined overflow
> +     behavior.  */
> +  if (rewrite_overflow)
> +    {
> +      gsi = gsi_start (stmts);
> +      while (!gsi_end_p (gsi))
>         {
> -         gsi_next (&psi);
> -         continue;
> +         gimple *stmt = gsi_stmt (gsi);
> +         if (is_gimple_assign (stmt)
> +             && arith_code_with_undefined_signed_overflow
> +                               (gimple_assign_rhs_code (stmt)))
> +           rewrite_to_defined_overflow (&gsi);
> +         gsi_next (&gsi);
>         }
> +    }
>
> -      bool folded_casts;
> -      def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
> -                                             &folded_casts);
> +  gsi = gsi_after_labels (bb);
> +  gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
> +}
>
> -      tree bitinv_def, bit_def;
> -      unsigned HOST_WIDE_INT niter_num;
> +/* Do final value replacement for LOOP, return true if we did anything.  */
> +
> +bool
> +final_value_replacement_loop (class loop *loop)
> +{
> +  /* If we do not know exact number of iterations of the loop, we cannot
> +     replace the final value.  */
> +  edge exit = single_exit (loop);
> +  if (!exit || number_of_latch_executions (loop) == chrec_dont_know)
> +    return false;
>
> -      if (def != chrec_dont_know)
> -       def = compute_overall_effect_of_inner_loop (ex_loop, def);
> +  /* Ensure that it is possible to insert new statements somewhere.  */
> +  if (!single_pred_p (exit->dest))
> +    split_loop_exit_edge (exit);
>
> -      /* Handle bitop with invariant induction expression.
> +  bool any = false;
> +  gphi_iterator psi;
> +  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
> +    {
> +      gphi *phi = psi.phi ();
> +      tree rslt = PHI_RESULT (phi);
> +      tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
> +      bool rewrite_overflow;
>
> -       .i.e
> -       for (int i =0 ;i < 32; i++)
> -         tmp &= bit2;
> -       if bit2 is an invariant in loop which could simple to
> -       tmp &= bit2.  */
> -      else if ((bitinv_def
> -               = analyze_and_compute_bitop_with_inv_effect (loop,
> -                                                            phidef, niter)))
> -       def = bitinv_def;
> -
> -      /* Handle bitwise induction expression.
> -
> -        .i.e.
> -        for (int i = 0; i != 64; i+=3)
> -          res &= ~(1UL << i);
> -
> -        RES can't be analyzed out by SCEV because it is not polynomially
> -        expressible, but in fact final value of RES can be replaced by
> -        RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
> -        being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
> -      else if (tree_fits_uhwi_p (niter)
> -              && (niter_num = tree_to_uhwi (niter)) != 0
> -              && niter_num < TYPE_PRECISION (TREE_TYPE (phidef))
> -              && (bit_def
> -                  = analyze_and_compute_bitwise_induction_effect (loop,
> -                                                                  phidef,
> -                                                                  niter_num)))
> -       def = bit_def;
> -
> -      bool cond_overflow_p;
> -      if (!tree_does_not_contain_chrecs (def)
> -         || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
> -         /* Moving the computation from the loop may prolong life range
> -            of some ssa names, which may cause problems if they appear
> -            on abnormal edges.  */
> -         || contains_abnormal_ssa_name_p (def)
> -         /* Do not emit expensive expressions.  The rationale is that
> -            when someone writes a code like
> -
> -            while (n > 45) n -= 45;
> -
> -            he probably knows that n is not large, and does not want it
> -            to be turned into n %= 45.  */
> -         || expression_expensive_p (def, &cond_overflow_p))
> +      if (!simple_scev_final_value (loop, def, &def, &rewrite_overflow))
>         {
> -         if (dump_file && (dump_flags & TDF_DETAILS))
> -           {
> -             fprintf (dump_file, "not replacing:\n  ");
> -             print_gimple_stmt (dump_file, phi, 0);
> -             fprintf (dump_file, "\n");
> -           }

Can you keep this dump please?

Otherwise OK.

Thanks,
Richard.

>           gsi_next (&psi);
>           continue;
>         }
> @@ -3904,43 +3978,11 @@ final_value_replacement_loop (class loop *loop)
>          to a GIMPLE sequence or to a statement list (keeping this a
>          GENERIC interface).  */
>        def = unshare_expr (def);
> -      auto loc = gimple_phi_arg_location (phi, exit->dest_idx);
> -      remove_phi_node (&psi, false);
> -
> -      /* Propagate constants immediately, but leave an unused initialization
> -        around to avoid invalidating the SCEV cache.  */
> -      if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
> -       replace_uses_by (rslt, def);
> -
> -      /* Create the replacement statements.  */
> -      gimple_seq stmts;
> -      def = force_gimple_operand (def, &stmts, false, NULL_TREE);
> -      gassign *ass = gimple_build_assign (rslt, def);
> -      gimple_set_location (ass, loc);
> -      gimple_seq_add_stmt (&stmts, ass);
> -
> -      /* If def's type has undefined overflow and there were folded
> -        casts, rewrite all stmts added for def into arithmetics
> -        with defined overflow behavior.  */
> -      if ((folded_casts
> -          && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
> -          && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
> -         || cond_overflow_p)
> -       {
> -         gimple_stmt_iterator gsi2;
> -         gsi2 = gsi_start (stmts);
> -         while (!gsi_end_p (gsi2))
> -           {
> -             gimple *stmt = gsi_stmt (gsi2);
> -             if (is_gimple_assign (stmt)
> -                 && arith_code_with_undefined_signed_overflow
> -                      (gimple_assign_rhs_code (stmt)))
> -               rewrite_to_defined_overflow (&gsi2);
> -             gsi_next (&gsi2);
> -           }
> -       }
> -      gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
> -      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
> +
> +      /* Advance iterator before the PHI is removed.  */
> +      gsi_next (&psi);
> +      apply_scev_final_value_replacement (phi, def, rewrite_overflow, true);
> +
>        if (dump_file)
>         {
>           fprintf (dump_file, " final stmt:\n  ");
> --
> 2.17.1
Feng Xue OS Dec. 12, 2024, 1:55 p.m. UTC | #2
Updated the patch according to comments. OK for trunk?

Thanks,
Feng
---
gcc/
        PR tree-optimization/90594
        * tree-scalar-evolution.cc (get_scev_final_value): New function.
        (apply_scev_final_value_replacement): Likewise.
        (final_value_replacement_loop): Call new functions.
        * tree-scalar-evolution.h (get_scev_final_value): New function
        declaration.
        (apply_scev_final_value_replacement): Likewise.
        (scev_const_prop): Remove unused declaration.
---
 gcc/tree-scalar-evolution.cc | 294 +++++++++++++++++++++--------------
 gcc/tree-scalar-evolution.h  |   3 +-
 2 files changed, 179 insertions(+), 118 deletions(-)

diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index abb2bad7773..3c3719e8e64 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -3775,6 +3775,170 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
   return fold_build2 (code1, type, inv, match_op[0]);
 }

+/* For induction VALUE of LOOP, return its final value at loop exit if it could
+   be directly calculated based on the initial value and loop niter, also set
+   REWRITE_OVERFLOW to true in the case that we need to rewrite the final value
+   to avoid overflow UB when replacement would really happen later. Otherwise,
+   empty value is returned.  The flag CONSIDER_COST specifies whether we care
+   about if the value is expensive or not.  */
+
+tree
+get_scev_final_value (class loop *loop, tree value, bool *rewrite_overflow,
+                     bool consider_cost)
+{
+  edge exit = single_exit (loop);
+  if (!exit)
+    return NULL_TREE;
+
+  tree niter = number_of_latch_executions (loop);
+  if (niter == chrec_dont_know)
+    return NULL_TREE;
+
+  class loop *ex_loop
+    = superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1);
+
+  bool folded_casts;
+  tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value,
+                                              &folded_casts);
+  tree bitinv_def, bit_def;
+  unsigned HOST_WIDE_INT niter_num;
+
+  if (def != chrec_dont_know)
+    def = compute_overall_effect_of_inner_loop (ex_loop, def);
+
+  /* Handle bitop with invariant induction expression.
+
+     .i.e
+     for (int i =0 ;i < 32; i++)
+       tmp &= bit2;
+     if bit2 is an invariant in loop which could simple to tmp &= bit2.  */
+  else if ((bitinv_def
+               = analyze_and_compute_bitop_with_inv_effect (loop,
+                                                            value, niter)))
+    def = bitinv_def;
+
+  /* Handle bitwise induction expression.
+
+     .i.e.
+     for (int i = 0; i != 64; i+=3)
+       res &= ~(1UL << i);
+
+     RES can't be analyzed out by SCEV because it is not polynomially
+     expressible, but in fact final value of RES can be replaced by
+     RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
+     being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
+  else if (tree_fits_uhwi_p (niter)
+          && (niter_num = tree_to_uhwi (niter)) != 0
+          && niter_num < TYPE_PRECISION (TREE_TYPE (value))
+          && (bit_def
+                  = analyze_and_compute_bitwise_induction_effect (loop, value,
+                                                                  niter_num)))
+    def = bit_def;
+
+  bool cond_overflow_p = false;
+
+  if (!tree_does_not_contain_chrecs (def)
+      || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
+      /* Moving the computation from the loop may prolong life range
+        of some ssa names, which may cause problems if they appear
+        on abnormal edges.  */
+      || contains_abnormal_ssa_name_p (def)
+      /* Do not emit expensive expressions.  The rationale is that
+        when someone writes a code like
+
+        while (n > 45) n -= 45;
+
+        he probably knows that n is not large, and does not want it
+        to be turned into n %= 45.  */
+      || (consider_cost && expression_expensive_p (def, &cond_overflow_p)))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       {
+         fprintf (dump_file, "skip scev final value:\n  ");
+         print_generic_expr (dump_file, value);
+         fprintf (dump_file, " -> ");
+         print_generic_expr (dump_file, def);
+         fprintf (dump_file, "\n");
+       }
+      return NULL_TREE;
+    }
+
+  if (rewrite_overflow)
+    {
+      *rewrite_overflow = false;
+
+      if ((folded_casts
+         && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
+         && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
+        || cond_overflow_p)
+       *rewrite_overflow = true;
+    }
+
+  return def;
+}
+
+/* Given a loop closed PHI, replace it with a new assignment from its
+   FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to
+   rewrite expressions in FINAL_VALUE to avoid overflow UB.  When FINAL_VALUE
+   is constant, we could just propagate the constant, however, sometimes we
+   have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set
+   to true.  */
+
+void
+apply_scev_final_value_replacement (gphi *phi, tree final_value,
+                                   bool rewrite_overflow, bool always_keep)
+{
+  tree rslt = gimple_phi_result (phi);
+  gphi_iterator psi = gsi_for_phi (phi);
+  gimple_stmt_iterator gsi;
+  location_t loc = gimple_phi_arg_location (phi, 0);
+  basic_block bb = gimple_bb (phi);
+  gimple_seq stmts;
+
+  /* This is a degenerate PHI with only one argument.  */
+  gcc_assert (gimple_phi_num_args (phi) == 1);
+
+  remove_phi_node (&psi, false);
+
+  /* Propagate constants immediately.  */
+  if (CONSTANT_CLASS_P (final_value)
+      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
+    {
+      replace_uses_by (rslt, final_value);
+
+      /* Sometimes, we need to leave an unused initialization around to avoid
+        invalidating the SCEV cache.  */
+      if (!always_keep)
+       return;
+    }
+
+  tree def = force_gimple_operand (final_value, &stmts, false, NULL_TREE);
+  gassign *ass = gimple_build_assign (rslt, def);
+
+  gimple_set_location (ass, loc);
+  gimple_seq_add_stmt (&stmts, ass);
+
+  /* If def's type has undefined overflow and there were folded casts, rewrite
+     all stmts added for def into arithmetics with defined overflow
+     behavior.  */
+  if (rewrite_overflow)
+    {
+      gsi = gsi_start (stmts);
+      while (!gsi_end_p (gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         if (is_gimple_assign (stmt)
+             && arith_code_with_undefined_signed_overflow
+                               (gimple_assign_rhs_code (stmt)))
+           rewrite_to_defined_overflow (&gsi);
+         gsi_next (&gsi);
+       }
+    }
+
+  gsi = gsi_after_labels (bb);
+  gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+}
+
 /* Do final value replacement for LOOP, return true if we did anything.  */

 bool
@@ -3783,37 +3947,25 @@ final_value_replacement_loop (class loop *loop)
   /* If we do not know exact number of iterations of the loop, we cannot
      replace the final value.  */
   edge exit = single_exit (loop);
-  if (!exit)
-    return false;
-
-  tree niter = number_of_latch_executions (loop);
-  if (niter == chrec_dont_know)
+  if (!exit || number_of_latch_executions (loop) == chrec_dont_know)
     return false;

   /* Ensure that it is possible to insert new statements somewhere.  */
   if (!single_pred_p (exit->dest))
     split_loop_exit_edge (exit);

-  /* Set stmt insertion pointer.  All stmts are inserted before this point.  */
-
-  class loop *ex_loop
-    = superloop_at_depth (loop,
-                         loop_depth (exit->dest->loop_father) + 1);
-
   bool any = false;
   gphi_iterator psi;
   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
     {
       gphi *phi = psi.phi ();
       tree rslt = PHI_RESULT (phi);
-      tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
-      tree def = phidef;
-      if (virtual_operand_p (def))
-       {
-         gsi_next (&psi);
-         continue;
-       }
+      tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
+      bool rewrite_overflow;

+      /* Now only allow integral types. This check also excludes virtual
+        ssa name who has void type.  TODO: support float value for fast
+        math.  */
       if (!POINTER_TYPE_P (TREE_TYPE (def))
          && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
        {
@@ -3821,69 +3973,9 @@ final_value_replacement_loop (class loop *loop)
          continue;
        }

-      bool folded_casts;
-      def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
-                                             &folded_casts);
-
-      tree bitinv_def, bit_def;
-      unsigned HOST_WIDE_INT niter_num;
-
-      if (def != chrec_dont_know)
-       def = compute_overall_effect_of_inner_loop (ex_loop, def);
-
-      /* Handle bitop with invariant induction expression.
-
-       .i.e
-       for (int i =0 ;i < 32; i++)
-         tmp &= bit2;
-       if bit2 is an invariant in loop which could simple to
-       tmp &= bit2.  */
-      else if ((bitinv_def
-               = analyze_and_compute_bitop_with_inv_effect (loop,
-                                                            phidef, niter)))
-       def = bitinv_def;
-
-      /* Handle bitwise induction expression.
-
-        .i.e.
-        for (int i = 0; i != 64; i+=3)
-          res &= ~(1UL << i);
-
-        RES can't be analyzed out by SCEV because it is not polynomially
-        expressible, but in fact final value of RES can be replaced by
-        RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
-        being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
-      else if (tree_fits_uhwi_p (niter)
-              && (niter_num = tree_to_uhwi (niter)) != 0
-              && niter_num < TYPE_PRECISION (TREE_TYPE (phidef))
-              && (bit_def
-                  = analyze_and_compute_bitwise_induction_effect (loop,
-                                                                  phidef,
-                                                                  niter_num)))
-       def = bit_def;
-
-      bool cond_overflow_p;
-      if (!tree_does_not_contain_chrecs (def)
-         || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
-         /* Moving the computation from the loop may prolong life range
-            of some ssa names, which may cause problems if they appear
-            on abnormal edges.  */
-         || contains_abnormal_ssa_name_p (def)
-         /* Do not emit expensive expressions.  The rationale is that
-            when someone writes a code like
-
-            while (n > 45) n -= 45;
-
-            he probably knows that n is not large, and does not want it
-            to be turned into n %= 45.  */
-         || expression_expensive_p (def, &cond_overflow_p))
+      def = get_scev_final_value (loop, def, &rewrite_overflow, true);
+      if (!def)
        {
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           {
-             fprintf (dump_file, "not replacing:\n  ");
-             print_gimple_stmt (dump_file, phi, 0);
-             fprintf (dump_file, "\n");
-           }
          gsi_next (&psi);
          continue;
        }
@@ -3904,43 +3996,11 @@ final_value_replacement_loop (class loop *loop)
         to a GIMPLE sequence or to a statement list (keeping this a
         GENERIC interface).  */
       def = unshare_expr (def);
-      auto loc = gimple_phi_arg_location (phi, exit->dest_idx);
-      remove_phi_node (&psi, false);
-
-      /* Propagate constants immediately, but leave an unused initialization
-        around to avoid invalidating the SCEV cache.  */
-      if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
-       replace_uses_by (rslt, def);
-
-      /* Create the replacement statements.  */
-      gimple_seq stmts;
-      def = force_gimple_operand (def, &stmts, false, NULL_TREE);
-      gassign *ass = gimple_build_assign (rslt, def);
-      gimple_set_location (ass, loc);
-      gimple_seq_add_stmt (&stmts, ass);
-
-      /* If def's type has undefined overflow and there were folded
-        casts, rewrite all stmts added for def into arithmetics
-        with defined overflow behavior.  */
-      if ((folded_casts
-          && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
-          && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
-         || cond_overflow_p)
-       {
-         gimple_stmt_iterator gsi2;
-         gsi2 = gsi_start (stmts);
-         while (!gsi_end_p (gsi2))
-           {
-             gimple *stmt = gsi_stmt (gsi2);
-             if (is_gimple_assign (stmt)
-                 && arith_code_with_undefined_signed_overflow
-                      (gimple_assign_rhs_code (stmt)))
-               rewrite_to_defined_overflow (&gsi2);
-             gsi_next (&gsi2);
-           }
-       }
-      gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
-      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+
+      /* Advance iterator before the PHI is removed.  */
+      gsi_next (&psi);
+      apply_scev_final_value_replacement (phi, def, rewrite_overflow, true);
+
       if (dump_file)
        {
          fprintf (dump_file, " final stmt:\n  ");
diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
index 41ba6b237b5..1e0defacf3b 100644
--- a/gcc/tree-scalar-evolution.h
+++ b/gcc/tree-scalar-evolution.h
@@ -34,8 +34,9 @@ extern tree analyze_scalar_evolution (class loop *, tree);
 extern tree instantiate_scev (edge, class loop *, tree);
 extern tree resolve_mixers (class loop *, tree, bool *);
 extern void gather_stats_on_scev_database (void);
+extern tree get_scev_final_value (class loop *, tree, bool *, bool);
+extern void apply_scev_final_value_replacement (gphi *, tree, bool, bool);
 extern bool final_value_replacement_loop (class loop *);
-extern unsigned int scev_const_prop (void);
 extern bool expression_expensive_p (tree, bool *);
 extern bool simple_iv_with_niters (class loop *, class loop *, tree,
                                   struct affine_iv *, tree *, bool);
--
2.17.1
diff mbox series

Patch

From 621e05c9013f1e8e116f7b348383634af59e3202 Mon Sep 17 00:00:00 2001
From: Feng Xue <fxue@os.amperecomputing.com>
Date: Fri, 6 Dec 2024 10:57:52 +0800
Subject: [PATCH 1/2] Refactor final_value_replacement_loop [PR90594]

This patch refactors the procedure in tree-scalar-evolution.cc in order to
partially export its functionality to other module, so decomposes it to
several relatively independent utility functions.

2024-12-06 Feng Xue <fxue@os.amperecomputing.com>

gcc/
	PR tree-optimization/90594
	* tree-scalar-evolution.cc (simple_scev_final_value): New function.
	(apply_scev_final_value_replacement): Likewise.
	(final_value_replacement_loop): Call new functions.
---
 gcc/tree-scalar-evolution.cc | 288 ++++++++++++++++++++---------------
 1 file changed, 165 insertions(+), 123 deletions(-)

diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index abb2bad7773..5733632aa78 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -3775,13 +3775,17 @@  analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
   return fold_build2 (code1, type, inv, match_op[0]);
 }
 
-/* Do final value replacement for LOOP, return true if we did anything.  */
+/* For induction VALUE of LOOP, return true if its SCEV is simple enough that
+   its final value at loop exit could be directly calculated based on the
+   initial value and loop niter, and this value is recorded in FINAL_VALUE,
+   also set REWRITE_OVERFLOW to true in the case that we need to rewrite the
+   final value to avoid overflow UB when replacement would really happen
+   later.  */
 
-bool
-final_value_replacement_loop (class loop *loop)
+static bool
+simple_scev_final_value (class loop *loop, tree value, tree *final_value,
+			 bool *rewrite_overflow)
 {
-  /* If we do not know exact number of iterations of the loop, we cannot
-     replace the final value.  */
   edge exit = single_exit (loop);
   if (!exit)
     return false;
@@ -3790,100 +3794,170 @@  final_value_replacement_loop (class loop *loop)
   if (niter == chrec_dont_know)
     return false;
 
-  /* Ensure that it is possible to insert new statements somewhere.  */
-  if (!single_pred_p (exit->dest))
-    split_loop_exit_edge (exit);
-
-  /* Set stmt insertion pointer.  All stmts are inserted before this point.  */
+  /* TODO: allow float value for fast math.  */
+  if (!POINTER_TYPE_P (TREE_TYPE (value))
+       && !INTEGRAL_TYPE_P (TREE_TYPE (value)))
+    return false;
 
   class loop *ex_loop
-    = superloop_at_depth (loop,
-			  loop_depth (exit->dest->loop_father) + 1);
+    = superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1);
 
-  bool any = false;
-  gphi_iterator psi;
-  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
+  bool folded_casts;
+  tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value,
+					       &folded_casts);
+  tree bitinv_def, bit_def;
+  unsigned HOST_WIDE_INT niter_num;
+
+  if (def != chrec_dont_know)
+    def = compute_overall_effect_of_inner_loop (ex_loop, def);
+
+  /* Handle bitop with invariant induction expression.
+
+     .i.e
+     for (int i =0 ;i < 32; i++)
+       tmp &= bit2;
+     if bit2 is an invariant in loop which could simple to tmp &= bit2.  */
+  else if ((bitinv_def
+		= analyze_and_compute_bitop_with_inv_effect (loop,
+							     value, niter)))
+    def = bitinv_def;
+
+  /* Handle bitwise induction expression.
+
+     .i.e.
+     for (int i = 0; i != 64; i+=3)
+       res &= ~(1UL << i);
+
+     RES can't be analyzed out by SCEV because it is not polynomially
+     expressible, but in fact final value of RES can be replaced by
+     RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
+     being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
+  else if (tree_fits_uhwi_p (niter)
+	   && (niter_num = tree_to_uhwi (niter)) != 0
+	   && niter_num < TYPE_PRECISION (TREE_TYPE (value))
+	   && (bit_def
+		   = analyze_and_compute_bitwise_induction_effect (loop, value,
+								   niter_num)))
+    def = bit_def;
+
+  bool cond_overflow_p;
+  if (!tree_does_not_contain_chrecs (def)
+      || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
+      /* Moving the computation from the loop may prolong life range
+	 of some ssa names, which may cause problems if they appear
+	 on abnormal edges.  */
+      || contains_abnormal_ssa_name_p (def)
+      /* Do not emit expensive expressions.  The rationale is that
+	 when someone writes a code like
+
+	 while (n > 45) n -= 45;
+
+	 he probably knows that n is not large, and does not want it
+	 to be turned into n %= 45.  */
+      || expression_expensive_p (def, &cond_overflow_p))
+    return false;
+
+  *final_value = def;
+
+  if ((folded_casts
+       && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
+       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
+      || cond_overflow_p)
+    *rewrite_overflow = true;
+  else
+    *rewrite_overflow = false;
+
+  return true;
+}
+
+/* Given a loop closed PHI, replace it with a new assignment from its
+   FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to
+   rewrite expressions in FINAL_VALUE to avoid overflow UB.  When FINAL_VALUE
+   is constant, we could just propagate the constant, however, sometimes we
+   have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set
+   to true.  */
+
+static void
+apply_scev_final_value_replacement (gphi *phi, tree final_value,
+				    bool rewrite_overflow, bool always_keep)
+{
+  tree rslt = gimple_phi_result (phi);
+  gphi_iterator psi = gsi_for_phi (phi);
+  gimple_stmt_iterator gsi;
+  location_t loc = gimple_phi_arg_location (phi, 0);
+  basic_block bb = gimple_bb (phi);
+  gimple_seq stmts;
+
+  /* This is a degenerate PHI with only one argument.  */
+  gcc_assert (gimple_phi_num_args (phi) == 1);
+
+  remove_phi_node (&psi, false);
+
+  /* Propagate constants immediately.  */
+  if (CONSTANT_CLASS_P (final_value)
+      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
     {
-      gphi *phi = psi.phi ();
-      tree rslt = PHI_RESULT (phi);
-      tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
-      tree def = phidef;
-      if (virtual_operand_p (def))
-	{
-	  gsi_next (&psi);
-	  continue;
-	}
+      replace_uses_by (rslt, final_value);
+
+      /* Sometimes, we need to leave an unused initialization around to avoid
+	 invalidating the SCEV cache.  */
+      if (!always_keep)
+	return;
+    }
+
+  tree def = force_gimple_operand (final_value, &stmts, false, NULL_TREE);
+  gassign *ass = gimple_build_assign (rslt, def);
+
+  gimple_set_location (ass, loc);
+  gimple_seq_add_stmt (&stmts, ass);
 
-      if (!POINTER_TYPE_P (TREE_TYPE (def))
-	  && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
+  /* If def's type has undefined overflow and there were folded casts, rewrite
+     all stmts added for def into arithmetics with defined overflow
+     behavior.  */
+  if (rewrite_overflow)
+    {
+      gsi = gsi_start (stmts);
+      while (!gsi_end_p (gsi))
 	{
-	  gsi_next (&psi);
-	  continue;
+	  gimple *stmt = gsi_stmt (gsi);
+	  if (is_gimple_assign (stmt)
+	      && arith_code_with_undefined_signed_overflow
+				(gimple_assign_rhs_code (stmt)))
+	    rewrite_to_defined_overflow (&gsi);
+	  gsi_next (&gsi);
 	}
+    }
 
-      bool folded_casts;
-      def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
-					      &folded_casts);
+  gsi = gsi_after_labels (bb);
+  gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+}
 
-      tree bitinv_def, bit_def;
-      unsigned HOST_WIDE_INT niter_num;
+/* Do final value replacement for LOOP, return true if we did anything.  */
+
+bool
+final_value_replacement_loop (class loop *loop)
+{
+  /* If we do not know exact number of iterations of the loop, we cannot
+     replace the final value.  */
+  edge exit = single_exit (loop);
+  if (!exit || number_of_latch_executions (loop) == chrec_dont_know)
+    return false;
 
-      if (def != chrec_dont_know)
-	def = compute_overall_effect_of_inner_loop (ex_loop, def);
+  /* Ensure that it is possible to insert new statements somewhere.  */
+  if (!single_pred_p (exit->dest))
+    split_loop_exit_edge (exit);
 
-      /* Handle bitop with invariant induction expression.
+  bool any = false;
+  gphi_iterator psi;
+  for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
+    {
+      gphi *phi = psi.phi ();
+      tree rslt = PHI_RESULT (phi);
+      tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
+      bool rewrite_overflow;
 
-	.i.e
-	for (int i =0 ;i < 32; i++)
-	  tmp &= bit2;
-	if bit2 is an invariant in loop which could simple to
-	tmp &= bit2.  */
-      else if ((bitinv_def
-		= analyze_and_compute_bitop_with_inv_effect (loop,
-							     phidef, niter)))
-	def = bitinv_def;
-
-      /* Handle bitwise induction expression.
-
-	 .i.e.
-	 for (int i = 0; i != 64; i+=3)
-	   res &= ~(1UL << i);
-
-	 RES can't be analyzed out by SCEV because it is not polynomially
-	 expressible, but in fact final value of RES can be replaced by
-	 RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
-	 being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
-      else if (tree_fits_uhwi_p (niter)
-	       && (niter_num = tree_to_uhwi (niter)) != 0
-	       && niter_num < TYPE_PRECISION (TREE_TYPE (phidef))
-	       && (bit_def
-		   = analyze_and_compute_bitwise_induction_effect (loop,
-								   phidef,
-								   niter_num)))
-	def = bit_def;
-
-      bool cond_overflow_p;
-      if (!tree_does_not_contain_chrecs (def)
-	  || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
-	  /* Moving the computation from the loop may prolong life range
-	     of some ssa names, which may cause problems if they appear
-	     on abnormal edges.  */
-	  || contains_abnormal_ssa_name_p (def)
-	  /* Do not emit expensive expressions.  The rationale is that
-	     when someone writes a code like
-
-	     while (n > 45) n -= 45;
-
-	     he probably knows that n is not large, and does not want it
-	     to be turned into n %= 45.  */
-	  || expression_expensive_p (def, &cond_overflow_p))
+      if (!simple_scev_final_value (loop, def, &def, &rewrite_overflow))
 	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    {
-	      fprintf (dump_file, "not replacing:\n  ");
-	      print_gimple_stmt (dump_file, phi, 0);
-	      fprintf (dump_file, "\n");
-	    }
 	  gsi_next (&psi);
 	  continue;
 	}
@@ -3904,43 +3978,11 @@  final_value_replacement_loop (class loop *loop)
 	 to a GIMPLE sequence or to a statement list (keeping this a
 	 GENERIC interface).  */
       def = unshare_expr (def);
-      auto loc = gimple_phi_arg_location (phi, exit->dest_idx);
-      remove_phi_node (&psi, false);
-
-      /* Propagate constants immediately, but leave an unused initialization
-	 around to avoid invalidating the SCEV cache.  */
-      if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
-	replace_uses_by (rslt, def);
-
-      /* Create the replacement statements.  */
-      gimple_seq stmts;
-      def = force_gimple_operand (def, &stmts, false, NULL_TREE);
-      gassign *ass = gimple_build_assign (rslt, def);
-      gimple_set_location (ass, loc);
-      gimple_seq_add_stmt (&stmts, ass);
-
-      /* If def's type has undefined overflow and there were folded
-	 casts, rewrite all stmts added for def into arithmetics
-	 with defined overflow behavior.  */
-      if ((folded_casts
-	   && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
-	   && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
-	  || cond_overflow_p)
-	{
-	  gimple_stmt_iterator gsi2;
-	  gsi2 = gsi_start (stmts);
-	  while (!gsi_end_p (gsi2))
-	    {
-	      gimple *stmt = gsi_stmt (gsi2);
-	      if (is_gimple_assign (stmt)
-		  && arith_code_with_undefined_signed_overflow
-		       (gimple_assign_rhs_code (stmt)))
-		rewrite_to_defined_overflow (&gsi2);
-	      gsi_next (&gsi2);
-	    }
-	}
-      gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
-      gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+
+      /* Advance iterator before the PHI is removed.  */
+      gsi_next (&psi);
+      apply_scev_final_value_replacement (phi, def, rewrite_overflow, true);
+
       if (dump_file)
 	{
 	  fprintf (dump_file, " final stmt:\n  ");
-- 
2.17.1