Message ID | orfrokv1kz.fsf_-_@lxoliva.fsfla.org |
---|---|
State | New |
Headers | show |
Series | [#1/7] allow vuses in ifcombine blocks (was: Re: [PATCH] fold fold_truth_andor field merging into ifcombine) | expand |
On Fri, Oct 25, 2024 at 4:39 PM Alexandre Oliva <oliva@adacore.com> wrote: > > > Prepare to handle noncontiguous ifcombine, introducing logic to modify > the outer condition when needed. There are two cases worth > mentioning: > > - when blocks are noncontiguous, we have to place the combined > condition in the outer block to avoid pessimizing carefully crafted > short-circuited tests; > > - even when blocks are contiguous, we prepare for situations in which > the combined condition has two tests, one to be placed in outer and > the other in inner. This circumstance will not come up when > noncontiguous ifcombine is first enabled, but it will when > an improved fold_truth_andor is integrated with ifcombine. > > Combining the condition from inner into outer may require moving SSA > DEFs used in the inner condition, and the changes implement this as > well. > > > for gcc/ChangeLog > > * tree-ssa-ifcombine.cc: Include bitmap.h. > (ifcombine_mark_ssa_name): New. > (struct ifcombine_mark_ssa_name_t): New. > (ifcombine_mark_ssa_name_walk): New. > (ifcombine_replace_cond): Prepare to handle noncontiguous and > split-condition ifcombine. > --- > gcc/tree-ssa-ifcombine.cc | 173 ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 168 insertions(+), 5 deletions(-) > > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc > index b5b72be29bbf9..71c7c9074e94a 100644 > --- a/gcc/tree-ssa-ifcombine.cc > +++ b/gcc/tree-ssa-ifcombine.cc > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-ssa.h" > #include "attribs.h" > #include "asan.h" > +#include "bitmap.h" > > #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT > #define LOGICAL_OP_NON_SHORT_CIRCUIT \ > @@ -460,17 +461,57 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, > } > } > > -/* Replace the conditions in INNER_COND with COND. > - Replace OUTER_COND with a constant. */ > +/* Set NAME's bit in USED if OUTER dominates it. */ > + > +static void > +ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer) > +{ > + if (SSA_NAME_IS_DEFAULT_DEF (name)) > + return; > + > + gimple *def = SSA_NAME_DEF_STMT (name); > + basic_block bb = gimple_bb (def); > + if (!dominated_by_p (CDI_DOMINATORS, bb, outer)) > + return; > + > + bitmap_set_bit (used, SSA_NAME_VERSION (name)); > +} > + > +/* Data structure passed to ifcombine_mark_ssa_name. */ > +struct ifcombine_mark_ssa_name_t > +{ > + /* SSA_NAMEs that have been referenced. */ > + bitmap used; > + /* Dominating block of DEFs that might need moving. */ > + basic_block outer; > +}; > + > +/* Mark in DATA->used any SSA_NAMEs used in *t. */ > + > +static tree > +ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) > +{ > + ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_; > + > + if (*t && TREE_CODE (*t) == SSA_NAME) > + ifcombine_mark_ssa_name (data->used, *t, data->outer); > + > + return NULL; > +} > + > +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2. > + COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND > + replaced with a constant, but if there are intervening blocks, it's best to > + adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND. */ > > static bool > ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, > gcond *outer_cond, bool outer_inv, > tree cond, bool must_canon, tree cond2) > { > - bool result_inv = inner_inv; > - > - gcc_checking_assert (!cond2); > + bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond)) > + != gimple_bb (outer_cond)); > + bool result_inv = outer_p ? outer_inv : inner_inv; > > if (result_inv) > cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); > @@ -480,6 +521,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, > else if (must_canon) > return false; > > + if (outer_p) > + { > + { > + auto_bitmap used; As you are only doing bitmap_set_bit/bitmap_bit_p consider doing bitmap_tree_view (used); to get O(log N) worst-case behavior rather than O(N), not that I expect it to make a difference in practice. But we don't have any artificial limit on the number of stmts in the middle block, right? Otherwise OK (tree view at your discretion). Thanks, Richard. > + basic_block outer_bb = gimple_bb (outer_cond); > + > + /* Mark SSA DEFs that are referenced by cond and may thus need to be > + moved to outer. */ > + { > + ifcombine_mark_ssa_name_t data = { used, outer_bb }; > + walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL); > + } > + > + if (!bitmap_empty_p (used)) > + { > + /* Iterate up from inner_cond, moving DEFs identified as used by > + cond, and marking USEs in the DEFs for moving as well. */ > + gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond); > + for (basic_block bb = gimple_bb (inner_cond); > + bb != outer_bb; bb = single_pred (bb)) > + { > + for (gimple_stmt_iterator gsitr = gsi_last_bb (bb); > + !gsi_end_p (gsitr); gsi_prev (&gsitr)) > + { > + gimple *stmt = gsi_stmt (gsitr); > + bool move = false; > + tree t; > + ssa_op_iter it; > + > + FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_DEF) > + if (bitmap_bit_p (used, SSA_NAME_VERSION (t))) > + { > + move = true; > + break; > + } > + > + if (!move) > + continue; > + > + /* Mark uses in STMT before moving it. */ > + FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_USE) > + ifcombine_mark_ssa_name (used, t, outer_bb); > + > + gsi_move_before (&gsitr, &gsins, GSI_NEW_STMT); > + } > + > + /* Surprisingly, there may be PHI nodes in single-predecessor > + bocks, as in pr50682.C. Fortunately, since they can't > + involve back edges, there won't be references to parallel > + nodes that we'd have to pay special attention to to keep > + them parallel. We can't move the PHI nodes, but we can turn > + them into assignments. */ > + for (gphi_iterator gsi = gsi_start_phis (bb); > + !gsi_end_p (gsi);) > + { > + gphi *phi = gsi.phi (); > + > + gcc_assert (gimple_phi_num_args (phi) == 1); > + tree def = gimple_phi_result (phi); > + > + if (!bitmap_bit_p (used, SSA_NAME_VERSION (def))) > + { > + gsi_next (&gsi); > + continue; > + } > + > + /* Mark uses in STMT before moving it. */ > + use_operand_p use_p; > + ssa_op_iter it; > + FOR_EACH_PHI_ARG (use_p, phi, it, SSA_OP_USE) > + ifcombine_mark_ssa_name (used, USE_FROM_PTR (use_p), > + outer_bb); > + > + tree use = gimple_phi_arg_def (phi, 0); > + location_t loc = gimple_phi_arg_location (phi, 0); > + > + remove_phi_node (&gsi, false); > + > + gassign *a = gimple_build_assign (def, use); > + gimple_set_location (a, loc); > + gsi_insert_before (&gsins, a, GSI_NEW_STMT); > + } > + } > + } > + } > + > + if (!is_gimple_condexpr_for_cond (cond)) > + { > + gimple_stmt_iterator gsi = gsi_for_stmt (outer_cond); > + cond = force_gimple_operand_gsi_1 (&gsi, cond, > + is_gimple_condexpr_for_cond, > + NULL, true, GSI_SAME_STMT); > + } > + > + /* Leave CFG optimization to cfg_cleanup. */ > + gimple_cond_set_condition_from_tree (outer_cond, cond); > + update_stmt (outer_cond); > + > + if (cond2) > + { > + if (inner_inv) > + cond2 = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond2), cond2); > + > + if (tree tcanon = canonicalize_cond_expr_cond (cond2)) > + cond2 = tcanon; > + if (!is_gimple_condexpr_for_cond (cond2)) > + { > + gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond); > + cond2 = force_gimple_operand_gsi_1 (&gsi, cond2, > + is_gimple_condexpr_for_cond, > + NULL, true, GSI_SAME_STMT); > + } > + gimple_cond_set_condition_from_tree (inner_cond, cond2); > + } > + else > + gimple_cond_set_condition_from_tree (inner_cond, > + inner_inv > + ? boolean_false_node > + : boolean_true_node); > + update_stmt (inner_cond); > + } > + else > { > if (!is_gimple_condexpr_for_cond (cond)) > { > > -- > Alexandre Oliva, happy hacker https://FSFLA.org/blogs/lxo/ > Free Software Activist GNU Toolchain Engineer > More tolerance and less prejudice are key for inclusion and diversity > Excluding neuro-others for not behaving ""normal"" is *not* inclusive
On Oct 30, 2024, Richard Biener <richard.guenther@gmail.com> wrote: > As you are only doing bitmap_set_bit/bitmap_bit_p consider doing > bitmap_tree_view (used); Ah, nice, I didn't know about this alternate representation. Thanks. > But we don't have any artificial limit on the number of stmts in the > middle block, right? Yeah, and I really expect that the count of SSA defs referenced by the combined condition to be very low. Only one file in an all-languages bootstrap needs a third bit in USED for the cond, and no file uses more than 4 bits in USED for all defs that might need moving. OTOH, this one file in gm2 combines (_1 & _2) [!= 0] and (_1 & _3) into ((_2 | _3) & _1), and I suspect the _3 def that needs moving could be arbitrarily complex. I suppose there should be add some limit on how many stmts to move to enable a combined cond, either a static limit, or some dynamic limit based on costs and probabilities of moving vs remaining stmts.
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index b5b72be29bbf9..71c7c9074e94a 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa.h" #include "attribs.h" #include "asan.h" +#include "bitmap.h" #ifndef LOGICAL_OP_NON_SHORT_CIRCUIT #define LOGICAL_OP_NON_SHORT_CIRCUIT \ @@ -460,17 +461,57 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, } } -/* Replace the conditions in INNER_COND with COND. - Replace OUTER_COND with a constant. */ +/* Set NAME's bit in USED if OUTER dominates it. */ + +static void +ifcombine_mark_ssa_name (bitmap used, tree name, basic_block outer) +{ + if (SSA_NAME_IS_DEFAULT_DEF (name)) + return; + + gimple *def = SSA_NAME_DEF_STMT (name); + basic_block bb = gimple_bb (def); + if (!dominated_by_p (CDI_DOMINATORS, bb, outer)) + return; + + bitmap_set_bit (used, SSA_NAME_VERSION (name)); +} + +/* Data structure passed to ifcombine_mark_ssa_name. */ +struct ifcombine_mark_ssa_name_t +{ + /* SSA_NAMEs that have been referenced. */ + bitmap used; + /* Dominating block of DEFs that might need moving. */ + basic_block outer; +}; + +/* Mark in DATA->used any SSA_NAMEs used in *t. */ + +static tree +ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) +{ + ifcombine_mark_ssa_name_t *data = (ifcombine_mark_ssa_name_t *)data_; + + if (*t && TREE_CODE (*t) == SSA_NAME) + ifcombine_mark_ssa_name (data->used, *t, data->outer); + + return NULL; +} + +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2. + COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND + replaced with a constant, but if there are intervening blocks, it's best to + adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND. */ static bool ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, gcond *outer_cond, bool outer_inv, tree cond, bool must_canon, tree cond2) { - bool result_inv = inner_inv; - - gcc_checking_assert (!cond2); + bool outer_p = cond2 || (single_pred (gimple_bb (inner_cond)) + != gimple_bb (outer_cond)); + bool result_inv = outer_p ? outer_inv : inner_inv; if (result_inv) cond = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); @@ -480,6 +521,128 @@ ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, else if (must_canon) return false; + if (outer_p) + { + { + auto_bitmap used; + basic_block outer_bb = gimple_bb (outer_cond); + + /* Mark SSA DEFs that are referenced by cond and may thus need to be + moved to outer. */ + { + ifcombine_mark_ssa_name_t data = { used, outer_bb }; + walk_tree (&cond, ifcombine_mark_ssa_name_walk, &data, NULL); + } + + if (!bitmap_empty_p (used)) + { + /* Iterate up from inner_cond, moving DEFs identified as used by + cond, and marking USEs in the DEFs for moving as well. */ + gimple_stmt_iterator gsins = gsi_for_stmt (outer_cond); + for (basic_block bb = gimple_bb (inner_cond); + bb != outer_bb; bb = single_pred (bb)) + { + for (gimple_stmt_iterator gsitr = gsi_last_bb (bb); + !gsi_end_p (gsitr); gsi_prev (&gsitr)) + { + gimple *stmt = gsi_stmt (gsitr); + bool move = false; + tree t; + ssa_op_iter it; + + FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_DEF) + if (bitmap_bit_p (used, SSA_NAME_VERSION (t))) + { + move = true; + break; + } + + if (!move) + continue; + + /* Mark uses in STMT before moving it. */ + FOR_EACH_SSA_TREE_OPERAND (t, stmt, it, SSA_OP_USE) + ifcombine_mark_ssa_name (used, t, outer_bb); + + gsi_move_before (&gsitr, &gsins, GSI_NEW_STMT); + } + + /* Surprisingly, there may be PHI nodes in single-predecessor + bocks, as in pr50682.C. Fortunately, since they can't + involve back edges, there won't be references to parallel + nodes that we'd have to pay special attention to to keep + them parallel. We can't move the PHI nodes, but we can turn + them into assignments. */ + for (gphi_iterator gsi = gsi_start_phis (bb); + !gsi_end_p (gsi);) + { + gphi *phi = gsi.phi (); + + gcc_assert (gimple_phi_num_args (phi) == 1); + tree def = gimple_phi_result (phi); + + if (!bitmap_bit_p (used, SSA_NAME_VERSION (def))) + { + gsi_next (&gsi); + continue; + } + + /* Mark uses in STMT before moving it. */ + use_operand_p use_p; + ssa_op_iter it; + FOR_EACH_PHI_ARG (use_p, phi, it, SSA_OP_USE) + ifcombine_mark_ssa_name (used, USE_FROM_PTR (use_p), + outer_bb); + + tree use = gimple_phi_arg_def (phi, 0); + location_t loc = gimple_phi_arg_location (phi, 0); + + remove_phi_node (&gsi, false); + + gassign *a = gimple_build_assign (def, use); + gimple_set_location (a, loc); + gsi_insert_before (&gsins, a, GSI_NEW_STMT); + } + } + } + } + + if (!is_gimple_condexpr_for_cond (cond)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (outer_cond); + cond = force_gimple_operand_gsi_1 (&gsi, cond, + is_gimple_condexpr_for_cond, + NULL, true, GSI_SAME_STMT); + } + + /* Leave CFG optimization to cfg_cleanup. */ + gimple_cond_set_condition_from_tree (outer_cond, cond); + update_stmt (outer_cond); + + if (cond2) + { + if (inner_inv) + cond2 = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (cond2), cond2); + + if (tree tcanon = canonicalize_cond_expr_cond (cond2)) + cond2 = tcanon; + if (!is_gimple_condexpr_for_cond (cond2)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond); + cond2 = force_gimple_operand_gsi_1 (&gsi, cond2, + is_gimple_condexpr_for_cond, + NULL, true, GSI_SAME_STMT); + } + gimple_cond_set_condition_from_tree (inner_cond, cond2); + } + else + gimple_cond_set_condition_from_tree (inner_cond, + inner_inv + ? boolean_false_node + : boolean_true_node); + update_stmt (inner_cond); + } + else { if (!is_gimple_condexpr_for_cond (cond)) {