diff mbox series

[#5/7] extend ifcombine_replace_cond to handle noncontiguous ifcombine (was: Re: [PATCH] fold fold_truth_andor field merging into ifcombine)

Message ID orfrokv1kz.fsf_-_@lxoliva.fsfla.org
State New
Headers show
Series [#1/7] allow vuses in ifcombine blocks (was: Re: [PATCH] fold fold_truth_andor field merging into ifcombine) | expand

Commit Message

Alexandre Oliva Oct. 25, 2024, 11:55 a.m. UTC
Prepare to handle noncontiguous ifcombine, introducing logic to modify
the outer condition when needed.  There are two cases worth
mentioning:

- when blocks are noncontiguous, we have to place the combined
  condition in the outer block to avoid pessimizing carefully crafted
  short-circuited tests;

- even when blocks are contiguous, we prepare for situations in which
  the combined condition has two tests, one to be placed in outer and
  the other in inner.  This circumstance will not come up when
  noncontiguous ifcombine is first enabled, but it will when
  an improved fold_truth_andor is integrated with ifcombine.

Combining the condition from inner into outer may require moving SSA
DEFs used in the inner condition, and the changes implement this as
well.


for  gcc/ChangeLog

	* tree-ssa-ifcombine.cc: Include bitmap.h.
	(ifcombine_mark_ssa_name): New.
	(struct ifcombine_mark_ssa_name_t): New.
	(ifcombine_mark_ssa_name_walk): New.
	(ifcombine_replace_cond): Prepare to handle noncontiguous and
	split-condition ifcombine.
---
 gcc/tree-ssa-ifcombine.cc |  173 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 168 insertions(+), 5 deletions(-)

Comments

Richard Biener Oct. 30, 2024, 2:17 p.m. UTC | #1
On Fri, Oct 25, 2024 at 4:39 PM Alexandre Oliva <oliva@adacore.com> wrote:
>
>
> Prepare to handle noncontiguous ifcombine, introducing logic to modify
> the outer condition when needed.  There are two cases worth
> mentioning:
>
> - when blocks are noncontiguous, we have to place the combined
>   condition in the outer block to avoid pessimizing carefully crafted
>   short-circuited tests;
>
> - even when blocks are contiguous, we prepare for situations in which
>   the combined condition has two tests, one to be placed in outer and
>   the other in inner.  This circumstance will not come up when
>   noncontiguous ifcombine is first enabled, but it will when
>   an improved fold_truth_andor is integrated with ifcombine.
>
> Combining the condition from inner into outer may require moving SSA
> DEFs used in the inner condition, and the changes implement this as
> well.
>
>
> for  gcc/ChangeLog
>
>         * tree-ssa-ifcombine.cc: Include bitmap.h.
>         (ifcombine_mark_ssa_name): New.
>         (struct ifcombine_mark_ssa_name_t): New.
>         (ifcombine_mark_ssa_name_walk): New.
>         (ifcombine_replace_cond): Prepare to handle noncontiguous and
>         split-condition ifcombine.
> ---
>  gcc/tree-ssa-ifcombine.cc |  173 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 168 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> index b5b72be29bbf9..71c7c9074e94a 100644
> --- a/gcc/tree-ssa-ifcombine.cc
> +++ b/gcc/tree-ssa-ifcombine.cc
> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa.h"
>  #include "attribs.h"
>  #include "asan.h"
> +#include "bitmap.h"
>
>  #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
>  #define LOGICAL_OP_NON_SHORT_CIRCUIT \
> @@ -460,17 +461,57 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
>      }
>  }
>
> -/* Replace the conditions in INNER_COND with COND.
> -   Replace OUTER_COND with a constant.  */
> +/* Set NAME's bit in USED if OUTER dominates it.  */
> +
> +static void
> +ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer)
> +{
> +  if (SSA_NAME_IS_DEFAULT_DEF (name))
> +    return;
> +
> +  gimple *def = SSA_NAME_DEF_STMT (name);
> +  basic_block bb = gimple_bb (def);
> +  if (!dominated_by_p (CDI_DOMINATORS, bb, outer))
> +    return;
> +
> +  bitmap_set_bit (used, SSA_NAME_VERSION (name));
> +}
> +
> +/* Data structure passed to ifcombine_mark_ssa_name.  */
> +struct ifcombine_mark_ssa_name_t
> +{
> +  /* SSA_NAMEs that have been referenced.  */
> +  bitmap used;
> +  /* Dominating block of DEFs that might need moving.  */
> +  basic_block outer;
> +};
> +
> +/* Mark in DATA->used any SSA_NAMEs used in *t.  */
> +
> +static tree
> +ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_)
> +{
> +  ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_;
> +
> +  if (*t && TREE_CODE (*t) == SSA_NAME)
> +    ifcombine_mark_ssa_name (data->used, *t, data->outer);
> +
> +  return NULL;
> +}
> +
> +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2.
> +   COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND
> +   replaced with a constant, but if there are intervening blocks, it's best to
> +   adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND.  */
>
>  static bool
>  ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
>                         gcond *outer_cond, bool outer_inv,
>                         tree cond, bool must_canon, tree cond2)
>  {
> -  bool result_inv = inner_inv;
> -
> -  gcc_checking_assert (!cond2);
> +  bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond))
> +                          != gimple_bb (outer_cond));
> +  bool result_inv = outer_p ? outer_inv : inner_inv;
>
>    if (result_inv)
>      cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
> @@ -480,6 +521,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
>    else if (must_canon)
>      return false;
>
> +  if (outer_p)
> +    {
> +      {
> +       auto_bitmap used;

As you are only doing bitmap_set_bit/bitmap_bit_p consider doing

   bitmap_tree_view (used);

to get O(log N) worst-case behavior rather than O(N), not that I expect it
to make a difference in practice.  But we don't have any artificial
limit on the number
of stmts in the middle block, right?

Otherwise OK (tree view at your discretion).

Thanks,
Richard.

> +       basic_block outer_bb = gimple_bb (outer_cond);
> +
> +       /* Mark SSA DEFs that are referenced by cond and may thus need to be
> +          moved to outer.  */
> +       {
> +         ifcombine_mark_ssa_name_t data = { used, outer_bb };
> +         walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL);
> +       }
> +
> +       if (!bitmap_empty_p (used))
> +         {
> +           /* Iterate up from inner_cond, moving DEFs identified as used by
> +              cond, and marking USEs in the DEFs for moving as well.  */
> +           gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond);
> +           for (basic_block bb = gimple_bb (inner_cond);
> +                bb != outer_bb; bb = single_pred (bb))
> +             {
> +               for (gimple_stmt_iterator gsitr = gsi_last_bb (bb);
> +                    !gsi_end_p (gsitr); gsi_prev (&gsitr))
> +                 {
> +                   gimple *stmt = gsi_stmt (gsitr);
> +                   bool move = false;
> +                   tree t;
> +                   ssa_op_iter it;
> +
> +                   FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_DEF)
> +                     if (bitmap_bit_p (used, SSA_NAME_VERSION (t)))
> +                       {
> +                         move = true;
> +                         break;
> +                       }
> +
> +                   if (!move)
> +                     continue;
> +
> +                   /* Mark uses in STMT before moving it.  */
> +                   FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_USE)
> +                     ifcombine_mark_ssa_name (used, t, outer_bb);
> +
> +                   gsi_move_before (&gsitr, &gsins, GSI_NEW_STMT);
> +                 }
> +
> +               /* Surprisingly, there may be PHI nodes in single-predecessor
> +                  bocks, as in pr50682.C.  Fortunately, since they can't
> +                  involve back edges, there won't be references to parallel
> +                  nodes that we'd have to pay special attention to to keep
> +                  them parallel.  We can't move the PHI nodes, but we can turn
> +                  them into assignments.  */
> +               for (gphi_iterator gsi = gsi_start_phis (bb);
> +                    !gsi_end_p (gsi);)
> +                 {
> +                   gphi *phi = gsi.phi ();
> +
> +                   gcc_assert (gimple_phi_num_args (phi) == 1);
> +                   tree def = gimple_phi_result (phi);
> +
> +                   if (!bitmap_bit_p (used, SSA_NAME_VERSION (def)))
> +                     {
> +                       gsi_next (&gsi);
> +                       continue;
> +                     }
> +
> +                   /* Mark uses in STMT before moving it.  */
> +                   use_operand_p use_p;
> +                   ssa_op_iter it;
> +                   FOR_EACH_PHI_ARG (use_p, phi, it, SSA_OP_USE)
> +                     ifcombine_mark_ssa_name (used, USE_FROM_PTR (use_p),
> +                                              outer_bb);
> +
> +                   tree use = gimple_phi_arg_def (phi, 0);
> +                   location_t loc = gimple_phi_arg_location (phi, 0);
> +
> +                   remove_phi_node (&gsi, false);
> +
> +                   gassign *a = gimple_build_assign (def, use);
> +                   gimple_set_location (a, loc);
> +                   gsi_insert_before (&gsins, a, GSI_NEW_STMT);
> +                 }
> +             }
> +         }
> +      }
> +
> +      if (!is_gimple_condexpr_for_cond (cond))
> +       {
> +         gimple_stmt_iterator gsi = gsi_for_stmt (outer_cond);
> +         cond = force_gimple_operand_gsi_1 (&gsi, cond,
> +                                            is_gimple_condexpr_for_cond,
> +                                            NULL, true, GSI_SAME_STMT);
> +       }
> +
> +      /* Leave CFG optimization to cfg_cleanup.  */
> +      gimple_cond_set_condition_from_tree (outer_cond, cond);
> +      update_stmt (outer_cond);
> +
> +      if (cond2)
> +       {
> +         if (inner_inv)
> +           cond2 = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond2), cond2);
> +
> +         if (tree tcanon = canonicalize_cond_expr_cond (cond2))
> +           cond2 = tcanon;
> +         if (!is_gimple_condexpr_for_cond (cond2))
> +           {
> +             gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond);
> +             cond2 = force_gimple_operand_gsi_1 (&gsi, cond2,
> +                                                 is_gimple_condexpr_for_cond,
> +                                                 NULL, true, GSI_SAME_STMT);
> +           }
> +         gimple_cond_set_condition_from_tree (inner_cond, cond2);
> +       }
> +      else
> +       gimple_cond_set_condition_from_tree (inner_cond,
> +                                            inner_inv
> +                                            ? boolean_false_node
> +                                            : boolean_true_node);
> +      update_stmt (inner_cond);
> +    }
> +  else
>      {
>        if (!is_gimple_condexpr_for_cond (cond))
>         {
>
> --
> Alexandre Oliva, happy hacker            https://FSFLA.org/blogs/lxo/
>    Free Software Activist                   GNU Toolchain Engineer
> More tolerance and less prejudice are key for inclusion and diversity
> Excluding neuro-others for not behaving ""normal"" is *not* inclusive
Alexandre Oliva Nov. 2, 2024, 5:09 a.m. UTC | #2
On Oct 30, 2024, Richard Biener <richard.guenther@gmail.com> wrote:

> As you are only doing bitmap_set_bit/bitmap_bit_p consider doing

>    bitmap_tree_view (used);

Ah, nice, I didn't know about this alternate representation.  Thanks.

> But we don't have any artificial limit on the number of stmts in the
> middle block, right?

Yeah, and I really expect that the count of SSA defs referenced by the
combined condition to be very low.  Only one file in an all-languages
bootstrap needs a third bit in USED for the cond, and no file uses more
than 4 bits in USED for all defs that might need moving.

OTOH, this one file in gm2 combines (_1 & _2) [!= 0] and (_1 & _3) into
((_2 | _3) & _1), and I suspect the _3 def that needs moving could be
arbitrarily complex.

I suppose there should be add some limit on how many stmts to move to
enable a combined cond, either a static limit, or some dynamic limit
based on costs and probabilities of moving vs remaining stmts.
diff mbox series

Patch

diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index b5b72be29bbf9..71c7c9074e94a 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -42,6 +42,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa.h"
 #include "attribs.h"
 #include "asan.h"
+#include "bitmap.h"
 
 #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT
 #define LOGICAL_OP_NON_SHORT_CIRCUIT \
@@ -460,17 +461,57 @@  update_profile_after_ifcombine (basic_block inner_cond_bb,
     }
 }
 
-/* Replace the conditions in INNER_COND with COND.
-   Replace OUTER_COND with a constant.  */
+/* Set NAME's bit in USED if OUTER dominates it.  */
+
+static void
+ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer)
+{
+  if (SSA_NAME_IS_DEFAULT_DEF (name))
+    return;
+
+  gimple *def = SSA_NAME_DEF_STMT (name);
+  basic_block bb = gimple_bb (def);
+  if (!dominated_by_p (CDI_DOMINATORS, bb, outer))
+    return;
+
+  bitmap_set_bit (used, SSA_NAME_VERSION (name));
+}
+
+/* Data structure passed to ifcombine_mark_ssa_name.  */
+struct ifcombine_mark_ssa_name_t
+{
+  /* SSA_NAMEs that have been referenced.  */
+  bitmap used;
+  /* Dominating block of DEFs that might need moving.  */
+  basic_block outer;
+};
+
+/* Mark in DATA->used any SSA_NAMEs used in *t.  */
+
+static tree
+ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_)
+{
+  ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_;
+
+  if (*t && TREE_CODE (*t) == SSA_NAME)
+    ifcombine_mark_ssa_name (data->used, *t, data->outer);
+
+  return NULL;
+}
+
+/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2.
+   COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND
+   replaced with a constant, but if there are intervening blocks, it's best to
+   adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND.  */
 
 static bool
 ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
 			gcond *outer_cond, bool outer_inv,
 			tree cond, bool must_canon, tree cond2)
 {
-  bool result_inv = inner_inv;
-
-  gcc_checking_assert (!cond2);
+  bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond))
+			   != gimple_bb (outer_cond));
+  bool result_inv = outer_p ? outer_inv : inner_inv;
 
   if (result_inv)
     cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
@@ -480,6 +521,128 @@  ifcombine_replace_cond (gcond *inner_cond, bool inner_inv,
   else if (must_canon)
     return false;
 
+  if (outer_p)
+    {
+      {
+	auto_bitmap used;
+	basic_block outer_bb = gimple_bb (outer_cond);
+
+	/* Mark SSA DEFs that are referenced by cond and may thus need to be
+	   moved to outer.  */
+	{
+	  ifcombine_mark_ssa_name_t data = { used, outer_bb };
+	  walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL);
+	}
+
+	if (!bitmap_empty_p (used))
+	  {
+	    /* Iterate up from inner_cond, moving DEFs identified as used by
+	       cond, and marking USEs in the DEFs for moving as well.  */
+	    gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond);
+	    for (basic_block bb = gimple_bb (inner_cond);
+		 bb != outer_bb; bb = single_pred (bb))
+	      {
+		for (gimple_stmt_iterator gsitr = gsi_last_bb (bb);
+		     !gsi_end_p (gsitr); gsi_prev (&gsitr))
+		  {
+		    gimple *stmt = gsi_stmt (gsitr);
+		    bool move = false;
+		    tree t;
+		    ssa_op_iter it;
+
+		    FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_DEF)
+		      if (bitmap_bit_p (used, SSA_NAME_VERSION (t)))
+			{
+			  move = true;
+			  break;
+			}
+
+		    if (!move)
+		      continue;
+
+		    /* Mark uses in STMT before moving it.  */
+		    FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_USE)
+		      ifcombine_mark_ssa_name (used, t, outer_bb);
+
+		    gsi_move_before (&gsitr, &gsins, GSI_NEW_STMT);
+		  }
+
+		/* Surprisingly, there may be PHI nodes in single-predecessor
+		   bocks, as in pr50682.C.  Fortunately, since they can't
+		   involve back edges, there won't be references to parallel
+		   nodes that we'd have to pay special attention to to keep
+		   them parallel.  We can't move the PHI nodes, but we can turn
+		   them into assignments.  */
+		for (gphi_iterator gsi = gsi_start_phis (bb);
+		     !gsi_end_p (gsi);)
+		  {
+		    gphi *phi = gsi.phi ();
+
+		    gcc_assert (gimple_phi_num_args (phi) == 1);
+		    tree def = gimple_phi_result (phi);
+
+		    if (!bitmap_bit_p (used, SSA_NAME_VERSION (def)))
+		      {
+			gsi_next (&gsi);
+			continue;
+		      }
+
+		    /* Mark uses in STMT before moving it.  */
+		    use_operand_p use_p;
+		    ssa_op_iter it;
+		    FOR_EACH_PHI_ARG (use_p, phi, it, SSA_OP_USE)
+		      ifcombine_mark_ssa_name (used, USE_FROM_PTR (use_p),
+					       outer_bb);
+
+		    tree use = gimple_phi_arg_def (phi, 0);
+		    location_t loc = gimple_phi_arg_location (phi, 0);
+
+		    remove_phi_node (&gsi, false);
+
+		    gassign *a = gimple_build_assign (def, use);
+		    gimple_set_location (a, loc);
+		    gsi_insert_before (&gsins, a, GSI_NEW_STMT);
+		  }
+	      }
+	  }
+      }
+
+      if (!is_gimple_condexpr_for_cond (cond))
+	{
+	  gimple_stmt_iterator gsi = gsi_for_stmt (outer_cond);
+	  cond = force_gimple_operand_gsi_1 (&gsi, cond,
+					     is_gimple_condexpr_for_cond,
+					     NULL, true, GSI_SAME_STMT);
+	}
+
+      /* Leave CFG optimization to cfg_cleanup.  */
+      gimple_cond_set_condition_from_tree (outer_cond, cond);
+      update_stmt (outer_cond);
+
+      if (cond2)
+	{
+	  if (inner_inv)
+	    cond2 = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond2), cond2);
+
+	  if (tree tcanon = canonicalize_cond_expr_cond (cond2))
+	    cond2 = tcanon;
+	  if (!is_gimple_condexpr_for_cond (cond2))
+	    {
+	      gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond);
+	      cond2 = force_gimple_operand_gsi_1 (&gsi, cond2,
+						  is_gimple_condexpr_for_cond,
+						  NULL, true, GSI_SAME_STMT);
+	    }
+	  gimple_cond_set_condition_from_tree (inner_cond, cond2);
+	}
+      else
+	gimple_cond_set_condition_from_tree (inner_cond,
+					     inner_inv
+					     ? boolean_false_node
+					     : boolean_true_node);
+      update_stmt (inner_cond);
+    }
+  else
     {
       if (!is_gimple_condexpr_for_cond (cond))
 	{