Message ID | ZOdXuCEkw7On4xBo@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
Series | Check that passes do not forget to define profile | expand |
On Thu, Aug 24, 2023 at 3:15 PM Jan Hubicka via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > this patch extends verifier to check that all probabilities and counts are > initialized if profile is supposed to be present. This is a bit complicated > by the posibility that we inline !flag_guess_branch_probability function > into function with profile defined and in this case we need to stop > verification. For this reason I added flag to cfg structure tracking this. > > Bootstrapped/regtested x86_64-linux, comitted. Couldn't we have massaged profile_status to avoid extra full_profile? Aka add PROFILE_{READ,GUESSED}_PARTIAL? > gcc/ChangeLog: > > * cfg.h (struct control_flow_graph): New field full_profile. > * auto-profile.cc (afdo_annotate_cfg): Set full_profile to true. > * cfg.cc (init_flow): Set full_profile to false. > * graphite.cc (graphite_transform_loops): Set full_profile to false. > * lto-streamer-in.cc (input_cfg): Initialize full_profile flag. > * predict.cc (pass_profile::execute): Set full_profile to true. > * symtab-thunks.cc (expand_thunk): Set full_profile to true. > * tree-cfg.cc (gimple_verify_flow_info): Verify that profile is full > if full_profile is set. > * tree-inline.cc (initialize_cfun): Initialize full_profile. > (expand_call_inline): Combine full_profile. > > > diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc > index e3af3555e75..ff3b763945c 100644 > --- a/gcc/auto-profile.cc > +++ b/gcc/auto-profile.cc > @@ -1578,6 +1578,7 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) > } > update_max_bb_count (); > profile_status_for_fn (cfun) = PROFILE_READ; > + cfun->cfg->full_profile = true; > if (flag_value_profile_transformations) > { > gimple_value_profile_transformations (); > diff --git a/gcc/cfg.cc b/gcc/cfg.cc > index 9eb9916f61a..b7865f14e7f 100644 > --- a/gcc/cfg.cc > +++ b/gcc/cfg.cc > @@ -81,6 +81,7 @@ init_flow (struct function *the_fun) > = ENTRY_BLOCK_PTR_FOR_FN (the_fun); > the_fun->cfg->edge_flags_allocated = EDGE_ALL_FLAGS; > the_fun->cfg->bb_flags_allocated = BB_ALL_FLAGS; > + the_fun->cfg->full_profile = false; > } > > /* Helper function for remove_edge and free_cffg. Frees edge structure > diff --git a/gcc/cfg.h b/gcc/cfg.h > index a0e944979c8..53e2553012c 100644 > --- a/gcc/cfg.h > +++ b/gcc/cfg.h > @@ -78,6 +78,9 @@ struct GTY(()) control_flow_graph { > /* Dynamically allocated edge/bb flags. */ > int edge_flags_allocated; > int bb_flags_allocated; > + > + /* Set if the profile is computed on every edge and basic block. */ > + bool full_profile; > }; > > > diff --git a/gcc/graphite.cc b/gcc/graphite.cc > index 19f8975ffa2..2b387d5b016 100644 > --- a/gcc/graphite.cc > +++ b/gcc/graphite.cc > @@ -512,6 +512,8 @@ graphite_transform_loops (void) > > if (changed) > { > + /* FIXME: Graphite does not update profile meaningfully currently. */ > + cfun->cfg->full_profile = false; > cleanup_tree_cfg (); > profile_status_for_fn (cfun) = PROFILE_ABSENT; > release_recorded_exits (cfun); > diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc > index 0cce14414ca..d3128fcebe4 100644 > --- a/gcc/lto-streamer-in.cc > +++ b/gcc/lto-streamer-in.cc > @@ -1030,6 +1030,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, > basic_block p_bb; > unsigned int i; > int index; > + bool full_profile = false; > > init_empty_tree_cfg_for_function (fn); > > @@ -1071,6 +1072,8 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, > data_in->location_cache.input_location_and_block (&e->goto_locus, > &bp, ib, data_in); > e->probability = profile_probability::stream_in (ib); > + if (!e->probability.initialized_p ()) > + full_profile = false; > > } > > @@ -1145,6 +1148,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, > > /* Rebuild the loop tree. */ > flow_loops_find (loops); > + cfun->cfg->full_profile = full_profile; > } > > > diff --git a/gcc/predict.cc b/gcc/predict.cc > index 5a1a561cc24..396746cbfd1 100644 > --- a/gcc/predict.cc > +++ b/gcc/predict.cc > @@ -4131,6 +4131,7 @@ pass_profile::execute (function *fun) > scev_initialize (); > > tree_estimate_probability (false); > + cfun->cfg->full_profile = true; > > if (nb_loops > 1) > scev_finalize (); > diff --git a/gcc/symtab-thunks.cc b/gcc/symtab-thunks.cc > index 4c04235c41b..23ead0d2138 100644 > --- a/gcc/symtab-thunks.cc > +++ b/gcc/symtab-thunks.cc > @@ -648,6 +648,7 @@ expand_thunk (cgraph_node *node, bool output_asm_thunks, > ? PROFILE_READ : PROFILE_GUESSED; > /* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */ > TREE_ASM_WRITTEN (thunk_fndecl) = false; > + cfun->cfg->full_profile = true; > delete_unreachable_blocks (); > update_ssa (TODO_update_ssa); > checking_verify_flow_info (); > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > index 272d5ce321e..ffab7518b15 100644 > --- a/gcc/tree-cfg.cc > +++ b/gcc/tree-cfg.cc > @@ -5684,6 +5684,26 @@ gimple_verify_flow_info (void) > error ("fallthru to exit from bb %d", e->src->index); > err = true; > } > + if (cfun->cfg->full_profile > + && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()) > + { > + error ("entry block count not initialized"); > + err = true; > + } > + if (cfun->cfg->full_profile > + && !EXIT_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()) > + { > + error ("exit block count not initialized"); > + err = true; > + } > + if (cfun->cfg->full_profile > + && !single_succ_edge > + (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability.initialized_p ()) > + { > + error ("probability of edge from entry block not initialized"); > + err = true; > + } > + > > FOR_EACH_BB_FN (bb, cfun) > { > @@ -5691,6 +5711,22 @@ gimple_verify_flow_info (void) > > stmt = NULL; > > + if (cfun->cfg->full_profile) > + { > + if (!bb->count.initialized_p ()) > + { > + error ("count of bb %d not initialized", bb->index); > + err = true; > + } > + FOR_EACH_EDGE (e, ei, bb->succs) > + if (!e->probability.initialized_p ()) > + { > + error ("probability of edge %d->%d not initialized", > + bb->index, e->dest->index); > + err = true; > + } > + } > + > /* Skip labels on the start of basic block. */ > for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc > index 954b39ae1c6..1d98d96df71 100644 > --- a/gcc/tree-inline.cc > +++ b/gcc/tree-inline.cc > @@ -2815,6 +2815,7 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, profile_count count) > init_empty_tree_cfg (); > > profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun); > + cfun->cfg->full_profile = src_cfun->cfg->full_profile; > > profile_count num = count; > profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; > @@ -4953,6 +4954,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, > id->src_cfun = DECL_STRUCT_FUNCTION (fn); > id->reset_location = DECL_IGNORED_P (fn); > id->call_stmt = call_stmt; > + cfun->cfg->full_profile &= id->src_cfun->cfg->full_profile; > > /* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic > variables to be added to IFN_GOMP_SIMT_ENTER argument list. */
> On Thu, Aug 24, 2023 at 3:15 PM Jan Hubicka via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Hi, > > this patch extends verifier to check that all probabilities and counts are > > initialized if profile is supposed to be present. This is a bit complicated > > by the posibility that we inline !flag_guess_branch_probability function > > into function with profile defined and in this case we need to stop > > verification. For this reason I added flag to cfg structure tracking this. > > > > Bootstrapped/regtested x86_64-linux, comitted. > > Couldn't we have massaged profile_status to avoid extra full_profile? > Aka add PROFILE_{READ,GUESSED}_PARTIAL? I am working in direction of removing profile_status. We mostly use it to determine whether profile is reliable (or present at all). This is available locally in profile quality info of profile_count and profile_probability. Most existing tests of that value goes wrong when we inline functions with one profile status to functions with another, so they should be replaced by more local tests. Honza
Hi Jan,
These new checks are too strong for AutoFDO. For example, the edge probabilities are not guaranteed to be initialized (see afdo_calculate_branch_prob).
This currently breaks autoprofiledbootstrap build.
I suggest removing
cfun->cfg->full_profile = true;
from auto-profile.cc.
Eugene
-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+erozen=microsoft.com@gcc.gnu.org> On Behalf Of Jan Hubicka via Gcc-patches
Sent: Thursday, August 24, 2023 6:15 AM
To: gcc-patches@gcc.gnu.org
Subject: [EXTERNAL] Check that passes do not forget to define profile
Hi,
this patch extends verifier to check that all probabilities and counts are initialized if profile is supposed to be present. This is a bit complicated by the posibility that we inline !flag_guess_branch_probability function into function with profile defined and in this case we need to stop verification. For this reason I added flag to cfg structure tracking this.
Bootstrapped/regtested x86_64-linux, comitted.
gcc/ChangeLog:
* cfg.h (struct control_flow_graph): New field full_profile.
* auto-profile.cc (afdo_annotate_cfg): Set full_profile to true.
* cfg.cc (init_flow): Set full_profile to false.
* graphite.cc (graphite_transform_loops): Set full_profile to false.
* lto-streamer-in.cc (input_cfg): Initialize full_profile flag.
* predict.cc (pass_profile::execute): Set full_profile to true.
* symtab-thunks.cc (expand_thunk): Set full_profile to true.
* tree-cfg.cc (gimple_verify_flow_info): Verify that profile is full
if full_profile is set.
* tree-inline.cc (initialize_cfun): Initialize full_profile.
(expand_call_inline): Combine full_profile.
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index e3af3555e75..ff3b763945c 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -1578,6 +1578,7 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
}
update_max_bb_count ();
profile_status_for_fn (cfun) = PROFILE_READ;
+ cfun->cfg->full_profile = true;
if (flag_value_profile_transformations)
{
gimple_value_profile_transformations (); diff --git a/gcc/cfg.cc b/gcc/cfg.cc index 9eb9916f61a..b7865f14e7f 100644
--- a/gcc/cfg.cc
+++ b/gcc/cfg.cc
@@ -81,6 +81,7 @@ init_flow (struct function *the_fun)
= ENTRY_BLOCK_PTR_FOR_FN (the_fun);
the_fun->cfg->edge_flags_allocated = EDGE_ALL_FLAGS;
the_fun->cfg->bb_flags_allocated = BB_ALL_FLAGS;
+ the_fun->cfg->full_profile = false;
}
/* Helper function for remove_edge and free_cffg. Frees edge structure diff --git a/gcc/cfg.h b/gcc/cfg.h index a0e944979c8..53e2553012c 100644
--- a/gcc/cfg.h
+++ b/gcc/cfg.h
@@ -78,6 +78,9 @@ struct GTY(()) control_flow_graph {
/* Dynamically allocated edge/bb flags. */
int edge_flags_allocated;
int bb_flags_allocated;
+
+ /* Set if the profile is computed on every edge and basic block. */
+ bool full_profile;
};
diff --git a/gcc/graphite.cc b/gcc/graphite.cc index 19f8975ffa2..2b387d5b016 100644
--- a/gcc/graphite.cc
+++ b/gcc/graphite.cc
@@ -512,6 +512,8 @@ graphite_transform_loops (void)
if (changed)
{
+ /* FIXME: Graphite does not update profile meaningfully currently. */
+ cfun->cfg->full_profile = false;
cleanup_tree_cfg ();
profile_status_for_fn (cfun) = PROFILE_ABSENT;
release_recorded_exits (cfun);
diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc index 0cce14414ca..d3128fcebe4 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1030,6 +1030,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in,
basic_block p_bb;
unsigned int i;
int index;
+ bool full_profile = false;
init_empty_tree_cfg_for_function (fn);
@@ -1071,6 +1072,8 @@ input_cfg (class lto_input_block *ib, class data_in *data_in,
data_in->location_cache.input_location_and_block (&e->goto_locus,
&bp, ib, data_in);
e->probability = profile_probability::stream_in (ib);
+ if (!e->probability.initialized_p ())
+ full_profile = false;
}
@@ -1145,6 +1148,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in,
/* Rebuild the loop tree. */
flow_loops_find (loops);
+ cfun->cfg->full_profile = full_profile;
}
diff --git a/gcc/predict.cc b/gcc/predict.cc index 5a1a561cc24..396746cbfd1 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -4131,6 +4131,7 @@ pass_profile::execute (function *fun)
scev_initialize ();
tree_estimate_probability (false);
+ cfun->cfg->full_profile = true;
if (nb_loops > 1)
scev_finalize ();
diff --git a/gcc/symtab-thunks.cc b/gcc/symtab-thunks.cc index 4c04235c41b..23ead0d2138 100644
--- a/gcc/symtab-thunks.cc
+++ b/gcc/symtab-thunks.cc
@@ -648,6 +648,7 @@ expand_thunk (cgraph_node *node, bool output_asm_thunks,
? PROFILE_READ : PROFILE_GUESSED;
/* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */
TREE_ASM_WRITTEN (thunk_fndecl) = false;
+ cfun->cfg->full_profile = true;
delete_unreachable_blocks ();
update_ssa (TODO_update_ssa);
checking_verify_flow_info ();
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index 272d5ce321e..ffab7518b15 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -5684,6 +5684,26 @@ gimple_verify_flow_info (void)
error ("fallthru to exit from bb %d", e->src->index);
err = true;
}
+ if (cfun->cfg->full_profile
+ && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
+ {
+ error ("entry block count not initialized");
+ err = true;
+ }
+ if (cfun->cfg->full_profile
+ && !EXIT_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ())
+ {
+ error ("exit block count not initialized");
+ err = true;
+ }
+ if (cfun->cfg->full_profile
+ && !single_succ_edge
+ (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability.initialized_p ())
+ {
+ error ("probability of edge from entry block not initialized");
+ err = true;
+ }
+
FOR_EACH_BB_FN (bb, cfun)
{
@@ -5691,6 +5711,22 @@ gimple_verify_flow_info (void)
stmt = NULL;
+ if (cfun->cfg->full_profile)
+ {
+ if (!bb->count.initialized_p ())
+ {
+ error ("count of bb %d not initialized", bb->index);
+ err = true;
+ }
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (!e->probability.initialized_p ())
+ {
+ error ("probability of edge %d->%d not initialized",
+ bb->index, e->dest->index);
+ err = true;
+ }
+ }
+
/* Skip labels on the start of basic block. */
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc index 954b39ae1c6..1d98d96df71 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -2815,6 +2815,7 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, profile_count count)
init_empty_tree_cfg ();
profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun);
+ cfun->cfg->full_profile = src_cfun->cfg->full_profile;
profile_count num = count;
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; @@ -4953,6 +4954,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id,
id->src_cfun = DECL_STRUCT_FUNCTION (fn);
id->reset_location = DECL_IGNORED_P (fn);
id->call_stmt = call_stmt;
+ cfun->cfg->full_profile &= id->src_cfun->cfg->full_profile;
/* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
variables to be added to IFN_GOMP_SIMT_ENTER argument list. */
Hi Honza, My current patch set for AArch64 VLA omp codegen started failing on gcc.dg/gomp/pr87898.c after this. I traced it back to 'move_sese_region_to_fn' in tree/cfg.cc not setting count for the bb created. I was able to 'fix' it locally by setting the count of the new bb to the accumulation of e->count () of all the entry_endges (if initialized). I'm however not even close to certain that's the right approach, attached patch for illustration. Kind regards, Andre On 24/08/2023 14:14, Jan Hubicka via Gcc-patches wrote: > Hi, > this patch extends verifier to check that all probabilities and counts are > initialized if profile is supposed to be present. This is a bit complicated > by the posibility that we inline !flag_guess_branch_probability function > into function with profile defined and in this case we need to stop > verification. For this reason I added flag to cfg structure tracking this. > > Bootstrapped/regtested x86_64-linux, comitted. > > gcc/ChangeLog: > > * cfg.h (struct control_flow_graph): New field full_profile. > * auto-profile.cc (afdo_annotate_cfg): Set full_profile to true. > * cfg.cc (init_flow): Set full_profile to false. > * graphite.cc (graphite_transform_loops): Set full_profile to false. > * lto-streamer-in.cc (input_cfg): Initialize full_profile flag. > * predict.cc (pass_profile::execute): Set full_profile to true. > * symtab-thunks.cc (expand_thunk): Set full_profile to true. > * tree-cfg.cc (gimple_verify_flow_info): Verify that profile is full > if full_profile is set. > * tree-inline.cc (initialize_cfun): Initialize full_profile. > (expand_call_inline): Combine full_profile. > > > diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc > index e3af3555e75..ff3b763945c 100644 > --- a/gcc/auto-profile.cc > +++ b/gcc/auto-profile.cc > @@ -1578,6 +1578,7 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) > } > update_max_bb_count (); > profile_status_for_fn (cfun) = PROFILE_READ; > + cfun->cfg->full_profile = true; > if (flag_value_profile_transformations) > { > gimple_value_profile_transformations (); > diff --git a/gcc/cfg.cc b/gcc/cfg.cc > index 9eb9916f61a..b7865f14e7f 100644 > --- a/gcc/cfg.cc > +++ b/gcc/cfg.cc > @@ -81,6 +81,7 @@ init_flow (struct function *the_fun) > = ENTRY_BLOCK_PTR_FOR_FN (the_fun); > the_fun->cfg->edge_flags_allocated = EDGE_ALL_FLAGS; > the_fun->cfg->bb_flags_allocated = BB_ALL_FLAGS; > + the_fun->cfg->full_profile = false; > } > > /* Helper function for remove_edge and free_cffg. Frees edge structure > diff --git a/gcc/cfg.h b/gcc/cfg.h > index a0e944979c8..53e2553012c 100644 > --- a/gcc/cfg.h > +++ b/gcc/cfg.h > @@ -78,6 +78,9 @@ struct GTY(()) control_flow_graph { > /* Dynamically allocated edge/bb flags. */ > int edge_flags_allocated; > int bb_flags_allocated; > + > + /* Set if the profile is computed on every edge and basic block. */ > + bool full_profile; > }; > > > diff --git a/gcc/graphite.cc b/gcc/graphite.cc > index 19f8975ffa2..2b387d5b016 100644 > --- a/gcc/graphite.cc > +++ b/gcc/graphite.cc > @@ -512,6 +512,8 @@ graphite_transform_loops (void) > > if (changed) > { > + /* FIXME: Graphite does not update profile meaningfully currently. */ > + cfun->cfg->full_profile = false; > cleanup_tree_cfg (); > profile_status_for_fn (cfun) = PROFILE_ABSENT; > release_recorded_exits (cfun); > diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc > index 0cce14414ca..d3128fcebe4 100644 > --- a/gcc/lto-streamer-in.cc > +++ b/gcc/lto-streamer-in.cc > @@ -1030,6 +1030,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, > basic_block p_bb; > unsigned int i; > int index; > + bool full_profile = false; > > init_empty_tree_cfg_for_function (fn); > > @@ -1071,6 +1072,8 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, > data_in->location_cache.input_location_and_block (&e->goto_locus, > &bp, ib, data_in); > e->probability = profile_probability::stream_in (ib); > + if (!e->probability.initialized_p ()) > + full_profile = false; > > } > > @@ -1145,6 +1148,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, > > /* Rebuild the loop tree. */ > flow_loops_find (loops); > + cfun->cfg->full_profile = full_profile; > } > > > diff --git a/gcc/predict.cc b/gcc/predict.cc > index 5a1a561cc24..396746cbfd1 100644 > --- a/gcc/predict.cc > +++ b/gcc/predict.cc > @@ -4131,6 +4131,7 @@ pass_profile::execute (function *fun) > scev_initialize (); > > tree_estimate_probability (false); > + cfun->cfg->full_profile = true; > > if (nb_loops > 1) > scev_finalize (); > diff --git a/gcc/symtab-thunks.cc b/gcc/symtab-thunks.cc > index 4c04235c41b..23ead0d2138 100644 > --- a/gcc/symtab-thunks.cc > +++ b/gcc/symtab-thunks.cc > @@ -648,6 +648,7 @@ expand_thunk (cgraph_node *node, bool output_asm_thunks, > ? PROFILE_READ : PROFILE_GUESSED; > /* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */ > TREE_ASM_WRITTEN (thunk_fndecl) = false; > + cfun->cfg->full_profile = true; > delete_unreachable_blocks (); > update_ssa (TODO_update_ssa); > checking_verify_flow_info (); > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > index 272d5ce321e..ffab7518b15 100644 > --- a/gcc/tree-cfg.cc > +++ b/gcc/tree-cfg.cc > @@ -5684,6 +5684,26 @@ gimple_verify_flow_info (void) > error ("fallthru to exit from bb %d", e->src->index); > err = true; > } > + if (cfun->cfg->full_profile > + && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()) > + { > + error ("entry block count not initialized"); > + err = true; > + } > + if (cfun->cfg->full_profile > + && !EXIT_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()) > + { > + error ("exit block count not initialized"); > + err = true; > + } > + if (cfun->cfg->full_profile > + && !single_succ_edge > + (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability.initialized_p ()) > + { > + error ("probability of edge from entry block not initialized"); > + err = true; > + } > + > > FOR_EACH_BB_FN (bb, cfun) > { > @@ -5691,6 +5711,22 @@ gimple_verify_flow_info (void) > > stmt = NULL; > > + if (cfun->cfg->full_profile) > + { > + if (!bb->count.initialized_p ()) > + { > + error ("count of bb %d not initialized", bb->index); > + err = true; > + } > + FOR_EACH_EDGE (e, ei, bb->succs) > + if (!e->probability.initialized_p ()) > + { > + error ("probability of edge %d->%d not initialized", > + bb->index, e->dest->index); > + err = true; > + } > + } > + > /* Skip labels on the start of basic block. */ > for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc > index 954b39ae1c6..1d98d96df71 100644 > --- a/gcc/tree-inline.cc > +++ b/gcc/tree-inline.cc > @@ -2815,6 +2815,7 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, profile_count count) > init_empty_tree_cfg (); > > profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun); > + cfun->cfg->full_profile = src_cfun->cfg->full_profile; > > profile_count num = count; > profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; > @@ -4953,6 +4954,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, > id->src_cfun = DECL_STRUCT_FUNCTION (fn); > id->reset_location = DECL_IGNORED_P (fn); > id->call_stmt = call_stmt; > + cfun->cfg->full_profile &= id->src_cfun->cfg->full_profile; > > /* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic > variables to be added to IFN_GOMP_SIMT_ENTER argument list. */ diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index ffab7518b1568b58e610e26feb9e3cab18ddb3c2..32fc47ae683164bf8fac477fbe6e2c998382e754 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -8160,11 +8160,15 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, bb = create_empty_bb (entry_pred[0]); if (current_loops) add_bb_to_loop (bb, loop); + profile_count count = profile_count::zero (); for (i = 0; i < num_entry_edges; i++) { e = make_edge (entry_pred[i], bb, entry_flag[i]); e->probability = entry_prob[i]; + if (e->count ().initialized_p ()) + count += e->count (); } + bb->count = count; for (i = 0; i < num_exit_edges; i++) {
> Hi Honza, > > My current patch set for AArch64 VLA omp codegen started failing on > gcc.dg/gomp/pr87898.c after this. I traced it back to > 'move_sese_region_to_fn' in tree/cfg.cc not setting count for the bb > created. > > I was able to 'fix' it locally by setting the count of the new bb to the > accumulation of e->count () of all the entry_endges (if initialized). I'm > however not even close to certain that's the right approach, attached patch > for illustration. > > Kind regards, > Andre > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > index ffab7518b1568b58e610e26feb9e3cab18ddb3c2..32fc47ae683164bf8fac477fbe6e2c998382e754 100644 > --- a/gcc/tree-cfg.cc > +++ b/gcc/tree-cfg.cc > @@ -8160,11 +8160,15 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, > bb = create_empty_bb (entry_pred[0]); > if (current_loops) > add_bb_to_loop (bb, loop); > + profile_count count = profile_count::zero (); > for (i = 0; i < num_entry_edges; i++) > { > e = make_edge (entry_pred[i], bb, entry_flag[i]); > e->probability = entry_prob[i]; > + if (e->count ().initialized_p ()) > + count += e->count (); > } > + bb->count = count; This looks generally right - if you create a BB you need to set its count and unless it has self-loop that is the sum of counts of incommping edges. However the initialized_p check should be unnecessary: if one of entry edges to BB is uninitialized, the + operation will make bb count uninitialized too, which is OK. Honza > > for (i = 0; i < num_exit_edges; i++) > {
So OK to commit this? This patch makes sure the profile_count information is initialized for the new bb created in move_sese_region_to_fn. gcc/ChangeLog: * tree-cfg.cc (move_sese_region_to_fn): Initialize profile_count for new basic block. Bootstrapped and regression tested on aarch64-unknown-linux-gnu and x86_64-pc-linux-gnu. On 04/10/2023 12:02, Jan Hubicka wrote: >> Hi Honza, >> >> My current patch set for AArch64 VLA omp codegen started failing on >> gcc.dg/gomp/pr87898.c after this. I traced it back to >> 'move_sese_region_to_fn' in tree/cfg.cc not setting count for the bb >> created. >> >> I was able to 'fix' it locally by setting the count of the new bb to the >> accumulation of e->count () of all the entry_endges (if initialized). I'm >> however not even close to certain that's the right approach, attached patch >> for illustration. >> >> Kind regards, >> Andre >> diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > >> index ffab7518b1568b58e610e26feb9e3cab18ddb3c2..32fc47ae683164bf8fac477fbe6e2c998382e754 100644 >> --- a/gcc/tree-cfg.cc >> +++ b/gcc/tree-cfg.cc >> @@ -8160,11 +8160,15 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, >> bb = create_empty_bb (entry_pred[0]); >> if (current_loops) >> add_bb_to_loop (bb, loop); >> + profile_count count = profile_count::zero (); >> for (i = 0; i < num_entry_edges; i++) >> { >> e = make_edge (entry_pred[i], bb, entry_flag[i]); >> e->probability = entry_prob[i]; >> + if (e->count ().initialized_p ()) >> + count += e->count (); >> } >> + bb->count = count; > > This looks generally right - if you create a BB you need to set its > count and unless it has self-loop that is the sum of counts of > incommping edges. > > However the initialized_p check should be unnecessary: if one of entry > edges to BB is uninitialized, the + operation will make bb count > uninitialized too, which is OK. > > Honza >> >> for (i = 0; i < num_exit_edges; i++) >> { > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index ffab7518b1568b58e610e26feb9e3cab18ddb3c2..ffeb20b717aead756844c5f48c2cc23f5e9f14a6 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -8160,11 +8160,14 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, bb = create_empty_bb (entry_pred[0]); if (current_loops) add_bb_to_loop (bb, loop); + profile_count count = profile_count::zero (); for (i = 0; i < num_entry_edges; i++) { e = make_edge (entry_pred[i], bb, entry_flag[i]); e->probability = entry_prob[i]; + count += e->count (); } + bb->count = count; for (i = 0; i < num_exit_edges; i++) {
> So OK to commit this? > > This patch makes sure the profile_count information is initialized for the > new > bb created in move_sese_region_to_fn. > > gcc/ChangeLog: > > * tree-cfg.cc (move_sese_region_to_fn): Initialize profile_count for > new basic block. > > Bootstrapped and regression tested on aarch64-unknown-linux-gnu and > x86_64-pc-linux-gnu. This is OK, thanks! Honza > > On 04/10/2023 12:02, Jan Hubicka wrote: > > > Hi Honza, > > > > > > My current patch set for AArch64 VLA omp codegen started failing on > > > gcc.dg/gomp/pr87898.c after this. I traced it back to > > > 'move_sese_region_to_fn' in tree/cfg.cc not setting count for the bb > > > created. > > > > > > I was able to 'fix' it locally by setting the count of the new bb to the > > > accumulation of e->count () of all the entry_endges (if initialized). I'm > > > however not even close to certain that's the right approach, attached patch > > > for illustration. > > > > > > Kind regards, > > > Andre > > > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > > > > > index ffab7518b1568b58e610e26feb9e3cab18ddb3c2..32fc47ae683164bf8fac477fbe6e2c998382e754 100644 > > > --- a/gcc/tree-cfg.cc > > > +++ b/gcc/tree-cfg.cc > > > @@ -8160,11 +8160,15 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, > > > bb = create_empty_bb (entry_pred[0]); > > > if (current_loops) > > > add_bb_to_loop (bb, loop); > > > + profile_count count = profile_count::zero (); > > > for (i = 0; i < num_entry_edges; i++) > > > { > > > e = make_edge (entry_pred[i], bb, entry_flag[i]); > > > e->probability = entry_prob[i]; > > > + if (e->count ().initialized_p ()) > > > + count += e->count (); > > > } > > > + bb->count = count; > > > > This looks generally right - if you create a BB you need to set its > > count and unless it has self-loop that is the sum of counts of > > incommping edges. > > > > However the initialized_p check should be unnecessary: if one of entry > > edges to BB is uninitialized, the + operation will make bb count > > uninitialized too, which is OK. > > > > Honza > > > for (i = 0; i < num_exit_edges; i++) > > > { > > > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > index ffab7518b1568b58e610e26feb9e3cab18ddb3c2..ffeb20b717aead756844c5f48c2cc23f5e9f14a6 100644 > --- a/gcc/tree-cfg.cc > +++ b/gcc/tree-cfg.cc > @@ -8160,11 +8160,14 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, > bb = create_empty_bb (entry_pred[0]); > if (current_loops) > add_bb_to_loop (bb, loop); > + profile_count count = profile_count::zero (); > for (i = 0; i < num_entry_edges; i++) > { > e = make_edge (entry_pred[i], bb, entry_flag[i]); > e->probability = entry_prob[i]; > + count += e->count (); > } > + bb->count = count; > > for (i = 0; i < num_exit_edges; i++) > {
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index e3af3555e75..ff3b763945c 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -1578,6 +1578,7 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) } update_max_bb_count (); profile_status_for_fn (cfun) = PROFILE_READ; + cfun->cfg->full_profile = true; if (flag_value_profile_transformations) { gimple_value_profile_transformations (); diff --git a/gcc/cfg.cc b/gcc/cfg.cc index 9eb9916f61a..b7865f14e7f 100644 --- a/gcc/cfg.cc +++ b/gcc/cfg.cc @@ -81,6 +81,7 @@ init_flow (struct function *the_fun) = ENTRY_BLOCK_PTR_FOR_FN (the_fun); the_fun->cfg->edge_flags_allocated = EDGE_ALL_FLAGS; the_fun->cfg->bb_flags_allocated = BB_ALL_FLAGS; + the_fun->cfg->full_profile = false; } /* Helper function for remove_edge and free_cffg. Frees edge structure diff --git a/gcc/cfg.h b/gcc/cfg.h index a0e944979c8..53e2553012c 100644 --- a/gcc/cfg.h +++ b/gcc/cfg.h @@ -78,6 +78,9 @@ struct GTY(()) control_flow_graph { /* Dynamically allocated edge/bb flags. */ int edge_flags_allocated; int bb_flags_allocated; + + /* Set if the profile is computed on every edge and basic block. */ + bool full_profile; }; diff --git a/gcc/graphite.cc b/gcc/graphite.cc index 19f8975ffa2..2b387d5b016 100644 --- a/gcc/graphite.cc +++ b/gcc/graphite.cc @@ -512,6 +512,8 @@ graphite_transform_loops (void) if (changed) { + /* FIXME: Graphite does not update profile meaningfully currently. */ + cfun->cfg->full_profile = false; cleanup_tree_cfg (); profile_status_for_fn (cfun) = PROFILE_ABSENT; release_recorded_exits (cfun); diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc index 0cce14414ca..d3128fcebe4 100644 --- a/gcc/lto-streamer-in.cc +++ b/gcc/lto-streamer-in.cc @@ -1030,6 +1030,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, basic_block p_bb; unsigned int i; int index; + bool full_profile = false; init_empty_tree_cfg_for_function (fn); @@ -1071,6 +1072,8 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, data_in->location_cache.input_location_and_block (&e->goto_locus, &bp, ib, data_in); e->probability = profile_probability::stream_in (ib); + if (!e->probability.initialized_p ()) + full_profile = false; } @@ -1145,6 +1148,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, /* Rebuild the loop tree. */ flow_loops_find (loops); + cfun->cfg->full_profile = full_profile; } diff --git a/gcc/predict.cc b/gcc/predict.cc index 5a1a561cc24..396746cbfd1 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -4131,6 +4131,7 @@ pass_profile::execute (function *fun) scev_initialize (); tree_estimate_probability (false); + cfun->cfg->full_profile = true; if (nb_loops > 1) scev_finalize (); diff --git a/gcc/symtab-thunks.cc b/gcc/symtab-thunks.cc index 4c04235c41b..23ead0d2138 100644 --- a/gcc/symtab-thunks.cc +++ b/gcc/symtab-thunks.cc @@ -648,6 +648,7 @@ expand_thunk (cgraph_node *node, bool output_asm_thunks, ? PROFILE_READ : PROFILE_GUESSED; /* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks. */ TREE_ASM_WRITTEN (thunk_fndecl) = false; + cfun->cfg->full_profile = true; delete_unreachable_blocks (); update_ssa (TODO_update_ssa); checking_verify_flow_info (); diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index 272d5ce321e..ffab7518b15 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -5684,6 +5684,26 @@ gimple_verify_flow_info (void) error ("fallthru to exit from bb %d", e->src->index); err = true; } + if (cfun->cfg->full_profile + && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()) + { + error ("entry block count not initialized"); + err = true; + } + if (cfun->cfg->full_profile + && !EXIT_BLOCK_PTR_FOR_FN (cfun)->count.initialized_p ()) + { + error ("exit block count not initialized"); + err = true; + } + if (cfun->cfg->full_profile + && !single_succ_edge + (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability.initialized_p ()) + { + error ("probability of edge from entry block not initialized"); + err = true; + } + FOR_EACH_BB_FN (bb, cfun) { @@ -5691,6 +5711,22 @@ gimple_verify_flow_info (void) stmt = NULL; + if (cfun->cfg->full_profile) + { + if (!bb->count.initialized_p ()) + { + error ("count of bb %d not initialized", bb->index); + err = true; + } + FOR_EACH_EDGE (e, ei, bb->succs) + if (!e->probability.initialized_p ()) + { + error ("probability of edge %d->%d not initialized", + bb->index, e->dest->index); + err = true; + } + } + /* Skip labels on the start of basic block. */ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc index 954b39ae1c6..1d98d96df71 100644 --- a/gcc/tree-inline.cc +++ b/gcc/tree-inline.cc @@ -2815,6 +2815,7 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, profile_count count) init_empty_tree_cfg (); profile_status_for_fn (cfun) = profile_status_for_fn (src_cfun); + cfun->cfg->full_profile = src_cfun->cfg->full_profile; profile_count num = count; profile_count den = ENTRY_BLOCK_PTR_FOR_FN (src_cfun)->count; @@ -4953,6 +4954,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, id->src_cfun = DECL_STRUCT_FUNCTION (fn); id->reset_location = DECL_IGNORED_P (fn); id->call_stmt = call_stmt; + cfun->cfg->full_profile &= id->src_cfun->cfg->full_profile; /* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic variables to be added to IFN_GOMP_SIMT_ENTER argument list. */