Message ID | ri6o8mue77d.fsf@suse.cz |
---|---|
State | New |
Headers | show |
Series | [3/n] ipa: Simplify interface of ipa_call_context::estimate_size_and_time | expand |
> Hi, > > this patch changes ipa_call_context::estimate_size_and_time to store > its results into member fields of the ipa_call_context class instead > into pointers it receives as parameters so that it can compute ore > stuff without cluttering the interface even further. > > Bootstrapped and tested on x86_64-linux. OK for master on top of the > previous two patches? ipa_call_context is intended to be structure holding all parameters that are needed to produce the estimates (size/time/hints). Adding the actual estimates there would duplicate it with cache. What about keeping them separate and inventing ipa_call_estimates structure to hold the reults? Honza > > Thanks, > > Martin > > > gcc/ChangeLog: > > 2020-08-28 Martin Jambor <mjambor@suse.cz> > > * ipa-fnsummary.h (class ipa_call_context): Changed declaration of > estimate_size_and_time to accept two booleans. Added an overload > of the method without any parameters. New fields m_size, > m_min_size, m_time, m_nonspecialized_time and m_hints. > * ipa-cp.c (hint_time_bonus): Changed the second parameter from > just hints to a const reference to ipa_call_context. > (perform_estimation_of_a_value): Adjusted to the new interface of > ipa_call_context::estimate_size_and_time. > * ipa-fnsummary.c (ipa_call_context::estimate_size_and_time): > Modified to store results into member fields of the class. > * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to the > new interface of ipa_call_context::estimate_size_and_time. > (do_estimate_edge_size): Likewise. > (do_estimate_edge_hints): Likewise. > --- > gcc/ipa-cp.c | 41 ++++++++++++++++----------------- > gcc/ipa-fnsummary.c | 48 +++++++++++++++++++-------------------- > gcc/ipa-fnsummary.h | 33 +++++++++++++++++++++++---- > gcc/ipa-inline-analysis.c | 45 ++++++++++++++++++------------------ > 4 files changed, 94 insertions(+), 73 deletions(-) > > diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c > index e37e71bd24f..010ecfc6b43 100644 > --- a/gcc/ipa-cp.c > +++ b/gcc/ipa-cp.c > @@ -3182,12 +3182,13 @@ devirtualization_time_bonus (struct cgraph_node *node, > return res; > } > > -/* Return time bonus incurred because of HINTS. */ > +/* Return time bonus incurred because of hints stored in CTX. */ > > static int > -hint_time_bonus (cgraph_node *node, ipa_hints hints) > +hint_time_bonus (cgraph_node *node, const ipa_call_context &ctx) > { > int result = 0; > + ipa_hints hints = ctx.m_hints; > if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) > result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); > return result; > @@ -3387,16 +3388,14 @@ perform_estimation_of_a_value (cgraph_node *node, ipa_call_arg_values *avals, > int removable_params_cost, int est_move_cost, > ipcp_value_base *val) > { > - int size, time_benefit; > - sreal time, base_time; > - ipa_hints hints; > + int time_benefit; > > ipa_call_context ctx = ipa_call_context::for_cloned_node (node, avals); > - ctx.estimate_size_and_time (&size, NULL, &time, &base_time, &hints); > + ctx.estimate_size_and_time (); > > - base_time -= time; > - if (base_time > 65535) > - base_time = 65535; > + sreal time_delta = ctx.m_nonspecialized_time - ctx.m_time; > + if (time_delta > 65535) > + time_delta = 65535; > > /* Extern inline functions have no cloning local time benefits because they > will be inlined anyway. The only reason to clone them is if it enables > @@ -3404,11 +3403,12 @@ perform_estimation_of_a_value (cgraph_node *node, ipa_call_arg_values *avals, > if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl)) > time_benefit = 0; > else > - time_benefit = base_time.to_int () > + time_benefit = time_delta.to_int () > + devirtualization_time_bonus (node, avals) > - + hint_time_bonus (node, hints) > + + hint_time_bonus (node, ctx) > + removable_params_cost + est_move_cost; > > + int size = ctx.m_size; > gcc_checking_assert (size >=0); > /* The inliner-heuristics based estimates may think that in certain > contexts some functions do not have any size at all but we want > @@ -3463,24 +3463,22 @@ estimate_local_effects (struct cgraph_node *node) > || (removable_params_cost && node->can_change_signature)) > { > struct caller_statistics stats; > - ipa_hints hints; > - sreal time, base_time; > - int size; > > init_caller_stats (&stats); > node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, > false); > ipa_call_context ctx = ipa_call_context::for_cloned_node (node, &avals); > - ctx.estimate_size_and_time (&size, NULL, &time, &base_time, &hints); > + ctx.estimate_size_and_time (); > > - time -= devirt_bonus; > - time -= hint_time_bonus (node, hints); > - time -= removable_params_cost; > - size -= stats.n_calls * removable_params_cost; > + sreal time = ctx.m_nonspecialized_time - ctx.m_time; > + time += devirt_bonus; > + time += hint_time_bonus (node, ctx); > + time += removable_params_cost; > + int size = ctx.m_size - stats.n_calls * removable_params_cost; > > if (dump_file) > fprintf (dump_file, " - context independent values, size: %i, " > - "time_benefit: %f\n", size, (base_time - time).to_double ()); > + "time_benefit: %f\n", size, (time).to_double ()); > > if (size <= 0 || node->local) > { > @@ -3491,8 +3489,7 @@ estimate_local_effects (struct cgraph_node *node) > "known contexts, code not going to grow.\n"); > } > else if (good_cloning_opportunity_p (node, > - MIN ((base_time - time).to_int (), > - 65536), > + MIN ((time).to_int (), 65536), > stats.freq_sum, stats.count_sum, > size)) > { > diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c > index 2a0f8ad37b2..eb58b143d1c 100644 > --- a/gcc/ipa-fnsummary.c > +++ b/gcc/ipa-fnsummary.c > @@ -3281,18 +3281,19 @@ ipa_call_context > { > } > > -/* Estimate size and time needed to execute call in the given context. > - Additionally determine hints determined by the context. Finally compute > - minimal size needed for the call that is independent on the call context and > - can be used for fast estimates. Return the values in RET_SIZE, > - RET_MIN_SIZE, RET_TIME and RET_HINTS. */ > +/* Estimate size needed to execute call in the given context and store it to > + m_size, compute minimal size needed for the call that is independent on the > + call context and can be used for fast estimates and store it to m_min_size. > + > + If EST_TIMES is true, estimate time needed to execute call in the given > + context, store it to m_time, and time needed when also calculating things > + known to be constant in this context and store it to m_nonspecialized_time. > + > + If EST_HINTS is true, also determine hints for this context and store them > + to m_hints. */ > > void > -ipa_call_context::estimate_size_and_time (int *ret_size, > - int *ret_min_size, > - sreal *ret_time, > - sreal *ret_nonspecialized_time, > - ipa_hints *ret_hints) > +ipa_call_context::estimate_size_and_time (bool est_times, bool est_hints) > { > class ipa_fn_summary *info = ipa_fn_summaries->get (m_node); > size_time_entry *e; > @@ -3322,8 +3323,8 @@ ipa_call_context::estimate_size_and_time (int *ret_size, > > if (m_node->callees || m_node->indirect_calls) > estimate_calls_size_and_time (m_node, &size, &min_size, > - ret_time ? &time : NULL, > - ret_hints ? &hints : NULL, m_possible_truths, > + est_times ? &time : NULL, > + est_hints ? &hints : NULL, m_possible_truths, > m_avals); > > sreal nonspecialized_time = time; > @@ -3350,7 +3351,7 @@ ipa_call_context::estimate_size_and_time (int *ret_size, > known to be constant in a specialized setting. */ > if (nonconst) > size += e->size; > - if (!ret_time) > + if (!est_times) > continue; > nonspecialized_time += e->time; > if (!nonconst) > @@ -3390,7 +3391,7 @@ ipa_call_context::estimate_size_and_time (int *ret_size, > if (time > nonspecialized_time) > time = nonspecialized_time; > > - if (ret_hints) > + if (est_hints) > { > if (info->loop_iterations > && !info->loop_iterations->evaluate (m_possible_truths)) > @@ -3410,16 +3411,15 @@ ipa_call_context::estimate_size_and_time (int *ret_size, > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size, > time.to_double (), nonspecialized_time.to_double ()); > - if (ret_time) > - *ret_time = time; > - if (ret_nonspecialized_time) > - *ret_nonspecialized_time = nonspecialized_time; > - if (ret_size) > - *ret_size = size; > - if (ret_min_size) > - *ret_min_size = min_size; > - if (ret_hints) > - *ret_hints = hints; > + m_size = size; > + m_min_size = min_size; > + if (est_times) > + { > + m_time = time; > + m_nonspecialized_time = nonspecialized_time; > + } > + if (est_hints) > + m_hints = hints; > return; > } > > diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h > index 74a78d31391..6253e375e3b 100644 > --- a/gcc/ipa-fnsummary.h > +++ b/gcc/ipa-fnsummary.h > @@ -312,16 +312,41 @@ public: > : m_node(NULL) > { > } > - void estimate_size_and_time (int *ret_size, int *ret_min_size, > - sreal *ret_time, > - sreal *ret_nonspecialized_time, > - ipa_hints *ret_hints); > + void estimate_size_and_time (bool est_times, bool est_hints); > + > + /* Estimate everything about a call in this context. */ > + void estimate_size_and_time () > + { > + estimate_size_and_time (true, true); > + } > + > void store_to_cache (ipa_node_context_cache_entry *cache) const; > bool equivalent_to_p (const ipa_node_context_cache_entry &cache) const; > bool exists_p () > { > return m_node != NULL; > } > + > + > + /* The following metrics fields only contain valid contents after they have > + been calculated by estimate_size_and_time (provided the function was > + instructed to compute them). */ > + > + /* Estimated size needed to execute call in the given context. */ > + int m_size = INT_MIN; > + /* Minimal size needed for the call that is independent on the call context > + and can be used for fast estimates. */ > + int m_min_size = INT_MIN; > + > + /* Estimated time needed to execute call in the given context. */ > + sreal m_time; > + /* Estimated time needed to execute the function when not ignoring > + computations known to be constant in this context. */ > + sreal m_nonspecialized_time; > + > + /* Further discovered reasons why to inline or specialize the give calls. */ > + ipa_hints m_hints = -1; > + > private: > /* Called function. */ > cgraph_node *m_node; > diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c > index 9788ee346ac..706e4fffe4a 100644 > --- a/gcc/ipa-inline-analysis.c > +++ b/gcc/ipa-inline-analysis.c > @@ -428,16 +428,10 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) > && !opt_for_fn (callee->decl, flag_profile_partial_training) > && !callee->count.ipa_p ()) > { > - sreal chk_time, chk_nonspec_time; > - int chk_size, chk_min_size; > - > - ipa_hints chk_hints; > - ctx.estimate_size_and_time (&chk_size, &chk_min_size, > - &chk_time, &chk_nonspec_time, > - &chk_hints); > - gcc_assert (chk_size == size && chk_time == time > - && chk_nonspec_time == nonspec_time > - && chk_hints == hints); > + ctx.estimate_size_and_time (); > + gcc_assert (ctx.m_size == size && ctx.m_time == time > + && ctx.m_nonspecialized_time == nonspec_time > + && ctx.m_hints == hints); > } > } > else > @@ -450,18 +444,26 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) > else > e = node_context_cache->get_create (callee); > > - ctx.estimate_size_and_time (&size, &min_size, > - &time, &nonspec_time, &hints); > + ctx.estimate_size_and_time (); > ctx.store_to_cache (&e->entry); > + size = ctx.m_size; > e->entry.m_size = size; > + time = ctx.m_time; > e->entry.m_time = time; > + nonspec_time = ctx.m_nonspecialized_time; > e->entry.m_nonspec_time = nonspec_time; > + hints = ctx.m_hints; > e->entry.m_hints = hints; > } > } > else > - ctx.estimate_size_and_time (&size, &min_size, > - &time, &nonspec_time, &hints); > + { > + ctx.estimate_size_and_time (); > + size = ctx.m_size; > + time = ctx.m_time; > + nonspec_time = ctx.m_nonspecialized_time; > + hints = ctx.m_hints; > + } > > /* When we have profile feedback, we can quite safely identify hot > edges and for those we disable size limits. Don't do that when > @@ -524,7 +526,6 @@ ipa_remove_from_growth_caches (struct cgraph_edge *edge) > int > do_estimate_edge_size (struct cgraph_edge *edge) > { > - int size; > ipa_call_arg_values avals; > > /* When we do caching, use do_estimate_edge_time to populate the entry. */ > @@ -532,7 +533,7 @@ do_estimate_edge_size (struct cgraph_edge *edge) > if (edge_growth_cache != NULL) > { > do_estimate_edge_time (edge); > - size = edge_growth_cache->get (edge)->size; > + int size = edge_growth_cache->get (edge)->size; > gcc_checking_assert (size); > return size - (size > 0); > } > @@ -541,8 +542,8 @@ do_estimate_edge_size (struct cgraph_edge *edge) > gcc_checking_assert (edge->inline_failed); > ipa_call_context ctx = ipa_call_context::for_inlined_edge (edge, vNULL, > &avals); > - ctx.estimate_size_and_time (&size, NULL, NULL, NULL, NULL); > - return size; > + ctx.estimate_size_and_time (false, false); > + return ctx.m_size; > } > > > @@ -552,7 +553,6 @@ do_estimate_edge_size (struct cgraph_edge *edge) > ipa_hints > do_estimate_edge_hints (struct cgraph_edge *edge) > { > - ipa_hints hints; > ipa_call_arg_values avals; > > /* When we do caching, use do_estimate_edge_time to populate the entry. */ > @@ -560,7 +560,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge) > if (edge_growth_cache != NULL) > { > do_estimate_edge_time (edge); > - hints = edge_growth_cache->get (edge)->hints; > + ipa_hints hints = edge_growth_cache->get (edge)->hints; > gcc_checking_assert (hints); > return hints - 1; > } > @@ -569,9 +569,8 @@ do_estimate_edge_hints (struct cgraph_edge *edge) > gcc_checking_assert (edge->inline_failed); > ipa_call_context ctx = ipa_call_context::for_inlined_edge (edge, vNULL, > &avals); > - ctx.estimate_size_and_time (NULL, NULL, NULL, NULL, &hints); > - hints |= simple_edge_hints (edge); > - return hints; > + ctx.estimate_size_and_time (false, true); > + return ctx.m_hints | simple_edge_hints (edge); > } > > /* Estimate the size of NODE after inlining EDGE which should be an > -- > 2.28.0 >
Hi, On Sat, Aug 29 2020, Jan Hubicka wrote: >> Hi, >> >> this patch changes ipa_call_context::estimate_size_and_time to store >> its results into member fields of the ipa_call_context class instead >> into pointers it receives as parameters so that it can compute ore >> stuff without cluttering the interface even further. >> >> Bootstrapped and tested on x86_64-linux. OK for master on top of the >> previous two patches? > > ipa_call_context is intended to be structure holding all parameters that > are needed to produce the estimates (size/time/hints). even today it only "holds" the data when it resides in the RCU cache, otherwise it points to data "owned" by the caller. Admittedly, my first patch makes the cache data structure separate, making ipa_class_context only a utility for calculating the estimates - but given how all the code is structured, it does not really work as the grand encapsulator of all context data when passing it from a function to function. > Adding the actual estimates there would duplicate it with cache. The first patch in the series makes the cache items not contain ipa_call_context directly, so in my patch series at least, the estimates are not duplicated. > What > about keeping them separate and inventing ipa_call_estimates structure > to hold the reults? I can but unless you do not like the first patch and want me to re-write it or just not do anything like it, I don't think it matters because the structures will almost always lie next to each other on the user's stack. Martin >> >> >> gcc/ChangeLog: >> >> 2020-08-28 Martin Jambor <mjambor@suse.cz> >> >> * ipa-fnsummary.h (class ipa_call_context): Changed declaration of >> estimate_size_and_time to accept two booleans. Added an overload >> of the method without any parameters. New fields m_size, >> m_min_size, m_time, m_nonspecialized_time and m_hints. >> * ipa-cp.c (hint_time_bonus): Changed the second parameter from >> just hints to a const reference to ipa_call_context. >> (perform_estimation_of_a_value): Adjusted to the new interface of >> ipa_call_context::estimate_size_and_time. >> * ipa-fnsummary.c (ipa_call_context::estimate_size_and_time): >> Modified to store results into member fields of the class. >> * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to the >> new interface of ipa_call_context::estimate_size_and_time. >> (do_estimate_edge_size): Likewise. >> (do_estimate_edge_hints): Likewise.
> Hi, > > On Sat, Aug 29 2020, Jan Hubicka wrote: > >> Hi, > >> > >> this patch changes ipa_call_context::estimate_size_and_time to store > >> its results into member fields of the ipa_call_context class instead > >> into pointers it receives as parameters so that it can compute ore > >> stuff without cluttering the interface even further. > >> > >> Bootstrapped and tested on x86_64-linux. OK for master on top of the > >> previous two patches? > > > > ipa_call_context is intended to be structure holding all parameters that > > are needed to produce the estimates (size/time/hints). > > even today it only "holds" the data when it resides in the RCU cache, > otherwise it points to data "owned" by the caller. Admittedly, my first > patch makes the cache data structure separate, making ipa_class_context > only a utility for calculating the estimates - but given how all the > code is structured, it does not really work as the grand encapsulator of > all context data when passing it from a function to function. > > > Adding the actual estimates there would duplicate it with cache. > > The first patch in the series makes the cache items not contain > ipa_call_context directly, so in my patch series at least, the estimates > are not duplicated. ipa_call_context defines the context estimates depends on. This puts all the info to one place and makes the cache well defined - it assigns contexts to estimates. From this point of view I do not quite like duplicating this logic (copying things into different datastructure) and making contxt to also contain the esitmates since these are, well, not context of the call. I am happy with merging the analysis results into something like class function_body_estimate holding all the values. Games with the ownerhip you mention above was not original plan. While perfing inliner I noticed that we spend measurable time in malloc and that mostly goes to alocaitng the vectors (which we did for long time). Perhaps cleaner solution is to have ipa_context_base which is derived by ipa_context and ipa_cached_context where first preallocats on stack while second allocates on heap? Honza > > > What > > about keeping them separate and inventing ipa_call_estimates structure > > to hold the reults? > > I can but unless you do not like the first patch and want me to re-write > it or just not do anything like it, I don't think it matters because the > structures will almost always lie next to each other on the user's > stack. > > Martin > > >> > >> > >> gcc/ChangeLog: > >> > >> 2020-08-28 Martin Jambor <mjambor@suse.cz> > >> > >> * ipa-fnsummary.h (class ipa_call_context): Changed declaration of > >> estimate_size_and_time to accept two booleans. Added an overload > >> of the method without any parameters. New fields m_size, > >> m_min_size, m_time, m_nonspecialized_time and m_hints. > >> * ipa-cp.c (hint_time_bonus): Changed the second parameter from > >> just hints to a const reference to ipa_call_context. > >> (perform_estimation_of_a_value): Adjusted to the new interface of > >> ipa_call_context::estimate_size_and_time. > >> * ipa-fnsummary.c (ipa_call_context::estimate_size_and_time): > >> Modified to store results into member fields of the class. > >> * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to the > >> new interface of ipa_call_context::estimate_size_and_time. > >> (do_estimate_edge_size): Likewise. > >> (do_estimate_edge_hints): Likewise.
Hi, On Sat, Aug 29 2020, Jan Hubicka wrote: >> Hi, >> >> On Sat, Aug 29 2020, Jan Hubicka wrote: >> >> Hi, >> >> >> >> this patch changes ipa_call_context::estimate_size_and_time to store >> >> its results into member fields of the ipa_call_context class instead >> >> into pointers it receives as parameters so that it can compute ore >> >> stuff without cluttering the interface even further. >> >> >> >> Bootstrapped and tested on x86_64-linux. OK for master on top of the >> >> previous two patches? >> > >> > ipa_call_context is intended to be structure holding all parameters that >> > are needed to produce the estimates (size/time/hints). >> >> even today it only "holds" the data when it resides in the RCU cache, >> otherwise it points to data "owned" by the caller. Admittedly, my first >> patch makes the cache data structure separate, making ipa_class_context >> only a utility for calculating the estimates - but given how all the >> code is structured, it does not really work as the grand encapsulator of >> all context data when passing it from a function to function. >> >> > Adding the actual estimates there would duplicate it with cache. >> >> The first patch in the series makes the cache items not contain >> ipa_call_context directly, so in my patch series at least, the estimates >> are not duplicated. > > ipa_call_context defines the context estimates depends on. This puts > all the info to one place and makes the cache well defined - it assigns > contexts to estimates. From this point of view I do not quite like > duplicating this logic (copying things into different datastructure) and > making contxt to also contain the esitmates since these are, well, not > context of the call. > > I am happy with merging the analysis results into something like > class function_body_estimate holding all the values. > > Games with the ownerhip you mention above was not original plan. > While perfing inliner I noticed that we spend measurable time in malloc > and that mostly goes to alocaitng the vectors (which we did for long > time). Perhaps cleaner solution is to have > ipa_context_base which is derived by ipa_context and ipa_cached_context > where first preallocats on stack while second allocates on heap? All right, but let's start from the basic objective of the patch. Do we want to have something like ipa_call_arg_values? I hope that the answer to this question is yes. I certainly hope that we want to get rid of passing around each vector as an individual parameter. The one alternative I can think of would be to make each function that now receives the three or four vectors either a method of ipa_call_context or receive ipa_call_context as the parameter. That however does not fit naturally to uses in 1) ipa_fn_summary_t::duplicate, 2) ipa_merge_fn_summary_after_inlining and 3) anywhere in IPA-CP where I'd like the pass to continue with the pass owning the vectors and constructing contexts for each change. If the answer to the above question is yes, then we can have ipa_call_arg_values containing pointers to the vectors - which would be part of ipa_call_context or it can be even derived from it - and which would be constructible from ipa_auto_call_arg_values, which would contain the vectors as autovecs. This would also help me unify the information for IPA-CP clones. Then ipa_cached_call_context would not need to be a different type, it would only allocate the vectors almost like before although I'd prefer it to be, just so that the release function is specific to this kind of context. Just the vec structures themselves would be on the heap. Would that work? Note that, however, I still think that the most context-consumer friendly interface would be to have static function ipa_call_context::for_inlined_edge (see my 2nd patch) which would fetch a context either from a cache or create it without the user knowing anything about the cache at all. But then in order to cache the estimates, they would have to be part of the context... but I do not insist, we can leave the caching explicit. Anyway, please let me know what you think about the above plan. Martin > > Honza >> >> > What >> > about keeping them separate and inventing ipa_call_estimates structure >> > to hold the reults? >> >> I can but unless you do not like the first patch and want me to re-write >> it or just not do anything like it, I don't think it matters because the >> structures will almost always lie next to each other on the user's >> stack. >> >> Martin >> >> >> >> >> >> >> gcc/ChangeLog: >> >> >> >> 2020-08-28 Martin Jambor <mjambor@suse.cz> >> >> >> >> * ipa-fnsummary.h (class ipa_call_context): Changed declaration of >> >> estimate_size_and_time to accept two booleans. Added an overload >> >> of the method without any parameters. New fields m_size, >> >> m_min_size, m_time, m_nonspecialized_time and m_hints. >> >> * ipa-cp.c (hint_time_bonus): Changed the second parameter from >> >> just hints to a const reference to ipa_call_context. >> >> (perform_estimation_of_a_value): Adjusted to the new interface of >> >> ipa_call_context::estimate_size_and_time. >> >> * ipa-fnsummary.c (ipa_call_context::estimate_size_and_time): >> >> Modified to store results into member fields of the class. >> >> * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to the >> >> new interface of ipa_call_context::estimate_size_and_time. >> >> (do_estimate_edge_size): Likewise. >> >> (do_estimate_edge_hints): Likewise.
> > All right, but let's start from the basic objective of the patch. Do > we want to have something like ipa_call_arg_values? OK, i suppose you patch was motivated by two things: 1) combine results of the estimates to one place (reducing number of parameters in estimate_size_and_time) which can be done by inventing ipa_estimates datastructure 2) combine the different things we track on values into one datastructure (ipa_call_agg_values). Which essentially will reduce API for estimate_ipcp_clone_size_and_time and evaluate_properties_for_edge and the construtor of ipa_call_context ipa_call_context is meant to do the following 1) hold precisely the info on which estimates depends on. "unnecesary" info is thrown away to make comparsions quick and memory use down. However the notion of unnecesariness is closely tied into what summary contains. I.e. the info still can be useful for propagation. 2) be able to produce estimates from that info 3) be able to compare itself with other context to handle the context cache. In longer term I want to keep ability to have multiple contextes per function (i.e. add hash method) and update corresponding estimates in cache incrementally/recompute if function changes. I implemented that but it did not seem to pay back in compile time tests enough to be merged. However I epxpect this to change in future as we track more useful info and units keep growing. > > I hope that the answer to this question is yes. I certainly hope that > we want to get rid of passing around each vector as an individual > parameter. The one alternative I can think of would be to make each > function that now receives the three or four vectors either a method of > ipa_call_context or receive ipa_call_context as the parameter. That > however does not fit naturally to uses in 1) ipa_fn_summary_t::duplicate, > 2) ipa_merge_fn_summary_after_inlining and 3) anywhere in IPA-CP where > I'd like the pass to continue with the pass owning the vectors and > constructing contexts for each change. Yep, because these functions compute info context is build from :) I understnad that you want to have a common datastructure to handle known information about parameters. Have way to construct it using the two ways we use (in inliner and in ipa-cp) and feed it into context. If you merge the four vectors and clauses into ipa_call_agg_values which has auto_vecs of prealocated size, so if it is homed on stack, it will mostly avoid calling malloc, then you can pass it to the constructor of context and turn evaluate_properties_for_edge and estimate_ipcp_clone_size_and_time to perhaps member function used to fill them up. We can still play the games about ownership of the vectors needed to keep malloc calls down. > > If the answer to the above question is yes, then we can have > ipa_call_arg_values containing pointers to the vectors - which would be > part of ipa_call_context or it can be even derived from it - and which > would be constructible from ipa_auto_call_arg_values, which would > contain the vectors as autovecs. This would also help me unify the > information for IPA-CP clones. > > Then ipa_cached_call_context would not need to be a different type, it > would only allocate the vectors almost like before although I'd prefer > it to be, just so that the release function is specific to this kind of > context. Just the vec structures themselves would be on the heap. > Would that work? > > Note that, however, I still think that the most context-consumer > friendly interface would be to have static function > ipa_call_context::for_inlined_edge (see my 2nd patch) which would fetch > a context either from a cache or create it without the user knowing > anything about the cache at all. But then in order to cache the > estimates, they would have to be part of the context... but I do not > insist, we can leave the caching explicit. It seems to me that keeping the context separate from info about arguments is doable and would be cleaner (since it is not meant to be holder for info about arguments). Does that seem to make sense? Honza > > Anyway, please let me know what you think about the above plan. > > Martin > > > > > > > Honza > >> > >> > What > >> > about keeping them separate and inventing ipa_call_estimates structure > >> > to hold the reults? > >> > >> I can but unless you do not like the first patch and want me to re-write > >> it or just not do anything like it, I don't think it matters because the > >> structures will almost always lie next to each other on the user's > >> stack. > >> > >> Martin > >> > >> >> > >> >> > >> >> gcc/ChangeLog: > >> >> > >> >> 2020-08-28 Martin Jambor <mjambor@suse.cz> > >> >> > >> >> * ipa-fnsummary.h (class ipa_call_context): Changed declaration of > >> >> estimate_size_and_time to accept two booleans. Added an overload > >> >> of the method without any parameters. New fields m_size, > >> >> m_min_size, m_time, m_nonspecialized_time and m_hints. > >> >> * ipa-cp.c (hint_time_bonus): Changed the second parameter from > >> >> just hints to a const reference to ipa_call_context. > >> >> (perform_estimation_of_a_value): Adjusted to the new interface of > >> >> ipa_call_context::estimate_size_and_time. > >> >> * ipa-fnsummary.c (ipa_call_context::estimate_size_and_time): > >> >> Modified to store results into member fields of the class. > >> >> * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to the > >> >> new interface of ipa_call_context::estimate_size_and_time. > >> >> (do_estimate_edge_size): Likewise. > >> >> (do_estimate_edge_hints): Likewise.
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index e37e71bd24f..010ecfc6b43 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -3182,12 +3182,13 @@ devirtualization_time_bonus (struct cgraph_node *node, return res; } -/* Return time bonus incurred because of HINTS. */ +/* Return time bonus incurred because of hints stored in CTX. */ static int -hint_time_bonus (cgraph_node *node, ipa_hints hints) +hint_time_bonus (cgraph_node *node, const ipa_call_context &ctx) { int result = 0; + ipa_hints hints = ctx.m_hints; if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); return result; @@ -3387,16 +3388,14 @@ perform_estimation_of_a_value (cgraph_node *node, ipa_call_arg_values *avals, int removable_params_cost, int est_move_cost, ipcp_value_base *val) { - int size, time_benefit; - sreal time, base_time; - ipa_hints hints; + int time_benefit; ipa_call_context ctx = ipa_call_context::for_cloned_node (node, avals); - ctx.estimate_size_and_time (&size, NULL, &time, &base_time, &hints); + ctx.estimate_size_and_time (); - base_time -= time; - if (base_time > 65535) - base_time = 65535; + sreal time_delta = ctx.m_nonspecialized_time - ctx.m_time; + if (time_delta > 65535) + time_delta = 65535; /* Extern inline functions have no cloning local time benefits because they will be inlined anyway. The only reason to clone them is if it enables @@ -3404,11 +3403,12 @@ perform_estimation_of_a_value (cgraph_node *node, ipa_call_arg_values *avals, if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl)) time_benefit = 0; else - time_benefit = base_time.to_int () + time_benefit = time_delta.to_int () + devirtualization_time_bonus (node, avals) - + hint_time_bonus (node, hints) + + hint_time_bonus (node, ctx) + removable_params_cost + est_move_cost; + int size = ctx.m_size; gcc_checking_assert (size >=0); /* The inliner-heuristics based estimates may think that in certain contexts some functions do not have any size at all but we want @@ -3463,24 +3463,22 @@ estimate_local_effects (struct cgraph_node *node) || (removable_params_cost && node->can_change_signature)) { struct caller_statistics stats; - ipa_hints hints; - sreal time, base_time; - int size; init_caller_stats (&stats); node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); ipa_call_context ctx = ipa_call_context::for_cloned_node (node, &avals); - ctx.estimate_size_and_time (&size, NULL, &time, &base_time, &hints); + ctx.estimate_size_and_time (); - time -= devirt_bonus; - time -= hint_time_bonus (node, hints); - time -= removable_params_cost; - size -= stats.n_calls * removable_params_cost; + sreal time = ctx.m_nonspecialized_time - ctx.m_time; + time += devirt_bonus; + time += hint_time_bonus (node, ctx); + time += removable_params_cost; + int size = ctx.m_size - stats.n_calls * removable_params_cost; if (dump_file) fprintf (dump_file, " - context independent values, size: %i, " - "time_benefit: %f\n", size, (base_time - time).to_double ()); + "time_benefit: %f\n", size, (time).to_double ()); if (size <= 0 || node->local) { @@ -3491,8 +3489,7 @@ estimate_local_effects (struct cgraph_node *node) "known contexts, code not going to grow.\n"); } else if (good_cloning_opportunity_p (node, - MIN ((base_time - time).to_int (), - 65536), + MIN ((time).to_int (), 65536), stats.freq_sum, stats.count_sum, size)) { diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 2a0f8ad37b2..eb58b143d1c 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -3281,18 +3281,19 @@ ipa_call_context { } -/* Estimate size and time needed to execute call in the given context. - Additionally determine hints determined by the context. Finally compute - minimal size needed for the call that is independent on the call context and - can be used for fast estimates. Return the values in RET_SIZE, - RET_MIN_SIZE, RET_TIME and RET_HINTS. */ +/* Estimate size needed to execute call in the given context and store it to + m_size, compute minimal size needed for the call that is independent on the + call context and can be used for fast estimates and store it to m_min_size. + + If EST_TIMES is true, estimate time needed to execute call in the given + context, store it to m_time, and time needed when also calculating things + known to be constant in this context and store it to m_nonspecialized_time. + + If EST_HINTS is true, also determine hints for this context and store them + to m_hints. */ void -ipa_call_context::estimate_size_and_time (int *ret_size, - int *ret_min_size, - sreal *ret_time, - sreal *ret_nonspecialized_time, - ipa_hints *ret_hints) +ipa_call_context::estimate_size_and_time (bool est_times, bool est_hints) { class ipa_fn_summary *info = ipa_fn_summaries->get (m_node); size_time_entry *e; @@ -3322,8 +3323,8 @@ ipa_call_context::estimate_size_and_time (int *ret_size, if (m_node->callees || m_node->indirect_calls) estimate_calls_size_and_time (m_node, &size, &min_size, - ret_time ? &time : NULL, - ret_hints ? &hints : NULL, m_possible_truths, + est_times ? &time : NULL, + est_hints ? &hints : NULL, m_possible_truths, m_avals); sreal nonspecialized_time = time; @@ -3350,7 +3351,7 @@ ipa_call_context::estimate_size_and_time (int *ret_size, known to be constant in a specialized setting. */ if (nonconst) size += e->size; - if (!ret_time) + if (!est_times) continue; nonspecialized_time += e->time; if (!nonconst) @@ -3390,7 +3391,7 @@ ipa_call_context::estimate_size_and_time (int *ret_size, if (time > nonspecialized_time) time = nonspecialized_time; - if (ret_hints) + if (est_hints) { if (info->loop_iterations && !info->loop_iterations->evaluate (m_possible_truths)) @@ -3410,16 +3411,15 @@ ipa_call_context::estimate_size_and_time (int *ret_size, if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size, time.to_double (), nonspecialized_time.to_double ()); - if (ret_time) - *ret_time = time; - if (ret_nonspecialized_time) - *ret_nonspecialized_time = nonspecialized_time; - if (ret_size) - *ret_size = size; - if (ret_min_size) - *ret_min_size = min_size; - if (ret_hints) - *ret_hints = hints; + m_size = size; + m_min_size = min_size; + if (est_times) + { + m_time = time; + m_nonspecialized_time = nonspecialized_time; + } + if (est_hints) + m_hints = hints; return; } diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index 74a78d31391..6253e375e3b 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -312,16 +312,41 @@ public: : m_node(NULL) { } - void estimate_size_and_time (int *ret_size, int *ret_min_size, - sreal *ret_time, - sreal *ret_nonspecialized_time, - ipa_hints *ret_hints); + void estimate_size_and_time (bool est_times, bool est_hints); + + /* Estimate everything about a call in this context. */ + void estimate_size_and_time () + { + estimate_size_and_time (true, true); + } + void store_to_cache (ipa_node_context_cache_entry *cache) const; bool equivalent_to_p (const ipa_node_context_cache_entry &cache) const; bool exists_p () { return m_node != NULL; } + + + /* The following metrics fields only contain valid contents after they have + been calculated by estimate_size_and_time (provided the function was + instructed to compute them). */ + + /* Estimated size needed to execute call in the given context. */ + int m_size = INT_MIN; + /* Minimal size needed for the call that is independent on the call context + and can be used for fast estimates. */ + int m_min_size = INT_MIN; + + /* Estimated time needed to execute call in the given context. */ + sreal m_time; + /* Estimated time needed to execute the function when not ignoring + computations known to be constant in this context. */ + sreal m_nonspecialized_time; + + /* Further discovered reasons why to inline or specialize the give calls. */ + ipa_hints m_hints = -1; + private: /* Called function. */ cgraph_node *m_node; diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index 9788ee346ac..706e4fffe4a 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -428,16 +428,10 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) && !opt_for_fn (callee->decl, flag_profile_partial_training) && !callee->count.ipa_p ()) { - sreal chk_time, chk_nonspec_time; - int chk_size, chk_min_size; - - ipa_hints chk_hints; - ctx.estimate_size_and_time (&chk_size, &chk_min_size, - &chk_time, &chk_nonspec_time, - &chk_hints); - gcc_assert (chk_size == size && chk_time == time - && chk_nonspec_time == nonspec_time - && chk_hints == hints); + ctx.estimate_size_and_time (); + gcc_assert (ctx.m_size == size && ctx.m_time == time + && ctx.m_nonspecialized_time == nonspec_time + && ctx.m_hints == hints); } } else @@ -450,18 +444,26 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) else e = node_context_cache->get_create (callee); - ctx.estimate_size_and_time (&size, &min_size, - &time, &nonspec_time, &hints); + ctx.estimate_size_and_time (); ctx.store_to_cache (&e->entry); + size = ctx.m_size; e->entry.m_size = size; + time = ctx.m_time; e->entry.m_time = time; + nonspec_time = ctx.m_nonspecialized_time; e->entry.m_nonspec_time = nonspec_time; + hints = ctx.m_hints; e->entry.m_hints = hints; } } else - ctx.estimate_size_and_time (&size, &min_size, - &time, &nonspec_time, &hints); + { + ctx.estimate_size_and_time (); + size = ctx.m_size; + time = ctx.m_time; + nonspec_time = ctx.m_nonspecialized_time; + hints = ctx.m_hints; + } /* When we have profile feedback, we can quite safely identify hot edges and for those we disable size limits. Don't do that when @@ -524,7 +526,6 @@ ipa_remove_from_growth_caches (struct cgraph_edge *edge) int do_estimate_edge_size (struct cgraph_edge *edge) { - int size; ipa_call_arg_values avals; /* When we do caching, use do_estimate_edge_time to populate the entry. */ @@ -532,7 +533,7 @@ do_estimate_edge_size (struct cgraph_edge *edge) if (edge_growth_cache != NULL) { do_estimate_edge_time (edge); - size = edge_growth_cache->get (edge)->size; + int size = edge_growth_cache->get (edge)->size; gcc_checking_assert (size); return size - (size > 0); } @@ -541,8 +542,8 @@ do_estimate_edge_size (struct cgraph_edge *edge) gcc_checking_assert (edge->inline_failed); ipa_call_context ctx = ipa_call_context::for_inlined_edge (edge, vNULL, &avals); - ctx.estimate_size_and_time (&size, NULL, NULL, NULL, NULL); - return size; + ctx.estimate_size_and_time (false, false); + return ctx.m_size; } @@ -552,7 +553,6 @@ do_estimate_edge_size (struct cgraph_edge *edge) ipa_hints do_estimate_edge_hints (struct cgraph_edge *edge) { - ipa_hints hints; ipa_call_arg_values avals; /* When we do caching, use do_estimate_edge_time to populate the entry. */ @@ -560,7 +560,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge) if (edge_growth_cache != NULL) { do_estimate_edge_time (edge); - hints = edge_growth_cache->get (edge)->hints; + ipa_hints hints = edge_growth_cache->get (edge)->hints; gcc_checking_assert (hints); return hints - 1; } @@ -569,9 +569,8 @@ do_estimate_edge_hints (struct cgraph_edge *edge) gcc_checking_assert (edge->inline_failed); ipa_call_context ctx = ipa_call_context::for_inlined_edge (edge, vNULL, &avals); - ctx.estimate_size_and_time (NULL, NULL, NULL, NULL, &hints); - hints |= simple_edge_hints (edge); - return hints; + ctx.estimate_size_and_time (false, true); + return ctx.m_hints | simple_edge_hints (edge); } /* Estimate the size of NODE after inlining EDGE which should be an