diff mbox series

Incremental update of inline summaries

Message ID 20191121081235.ysbjffrssjjduqmu@kam.mff.cuni.cz
State New
Headers show
Series Incremental update of inline summaries | expand

Commit Message

Jan Hubicka Nov. 21, 2019, 8:12 a.m. UTC
Hi,
this patch implements the incremental update of calls summary after inlining.
This is done by simply subtracting the call size/time of the optimized-out call
and accounting all callees.  I did not implement incremental update across
indirect call and speculation updates, but it would be easy to do so in case
this turns out to be performance problem.

Overall this saves about 10% of WPA time for cc1 with ipa_update_overall_fn_summary
going from 12% to 1.2%.

-   41.87%     0.38%  lto1-wpa         lto1              [.] inline_small_functions
   - 41.49% inline_small_functions
      - 27.16% update_callee_keys
         - 7.85% can_inline_edge_p
            - 1.55% is_tm_pure
                 0.81% special_function_p
                 0.71% flags_from_decl_or_type
              1.33% sanitize_flags_p
              0.64% cgraph_node::get_availability
           4.74% want_inline_small_function_p
         - 4.51% update_edge_key
              3.51% edge_badness
         - 3.93% can_inline_edge_by_limits_p
              0.76% estimate_size_after_inlining
           0.96% cgraph_node::get_availability
           0.51% sanitize_flags_p
      - 5.86% update_caller_keys
         - 4.93% want_inline_small_function_p
            - 4.36% do_estimate_edge_size
               - 4.35% do_estimate_edge_time
                  + 2.04% ipa_call_context::estimate_size_and_time
                  + 1.83% evaluate_properties_for_edge
           0.72% can_inline_edge_p
      - 3.36% inline_call
         + 1.31% clone_inlined_nodes
           1.28% ipa_merge_fn_summary_after_inlining
        1.41% fibonacci_heap<sreal, cgraph_edge>::extract_minimum_node
      - 0.85% estimate_growth
         - 0.84% do_estimate_growth_1
            - 0.71% do_estimate_edge_size
                 0.52% evaluate_properties_for_edge
      - 0.83% want_inline_small_function_p
         - 0.62% do_estimate_edge_size
              0.61% do_estimate_edge_time

Bootstrapped/regtested x86_64-linux, comitted.

Honza

	* ipa-fnsummary.c (ipa_fn_summary::account_size_time): Allow
	negative time in calls summary; correct roundoff errors
	leading to negative times.
	(ipa_merge_fn_summary_after_inlining): Update calls size time table
	if present.
	(ipa_update_overall_fn_summary): Add RESET parameter.
	* ipa-fnsummary.h (ipa_update_overall_fn_summary): Update prototype.
	* ipa-inline-transform.c (inline_call): Enable incremental updates.
diff mbox series

Patch

Index: ipa-fnsummary.c
===================================================================
--- ipa-fnsummary.c	(revision 278540)
+++ ipa-fnsummary.c	(working copy)
@@ -176,7 +176,8 @@  ipa_fn_summary::account_size_time (int s
   if (!size && time == 0 && table)
     return;
 
-  gcc_assert (time >= 0);
+  /* Only for calls we are unaccounting what we previously recoreded.  */
+  gcc_checking_assert (time >= 0 || call);
 
   for (i = 0; vec_safe_iterate (table, i, &e); i++)
     if (e->exec_predicate == exec_pred
@@ -226,6 +227,10 @@  ipa_fn_summary::account_size_time (int s
     {
       e->size += size;
       e->time += time;
+      gcc_checking_assert (e->time >= -1);
+      /* Tolerate small roundoff issues.  */
+      if (e->time < 0)
+	e->time = 0;
     }
 }
 
@@ -3897,6 +3902,21 @@  ipa_merge_fn_summary_after_inlining (str
     info->estimated_stack_size = peak;
 
   inline_update_callee_summaries (edge->callee, es->loop_depth);
+  if (info->call_size_time_table)
+    {
+      int edge_size = 0;
+      sreal edge_time = 0;
+
+      estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, vNULL,
+		      		   vNULL, vNULL, 0);
+      /* Unaccount size and time of the optimized out call.  */
+      info->account_size_time (-edge_size, -edge_time,
+	 		       es->predicate ? *es->predicate : true,
+	 		       es->predicate ? *es->predicate : true,
+			       true);
+      /* Account new calls.  */
+      summarize_calls_size_and_time (edge->callee, info);
+    }
 
   /* Free summaries that are not maintained for inline clones/edges.  */
   ipa_call_summaries->remove (edge);
@@ -3905,10 +3925,11 @@  ipa_merge_fn_summary_after_inlining (str
 }
 
 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
-   overall size and time.  Recompute it.  */
+   overall size and time.  Recompute it.
+   If RESET is true also recompute call_time_size_table.  */
 
 void
-ipa_update_overall_fn_summary (struct cgraph_node *node)
+ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
 {
   class ipa_fn_summary *info = ipa_fn_summaries->get (node);
   class ipa_size_summary *size_info = ipa_size_summaries->get (node);
@@ -3923,7 +3944,8 @@  ipa_update_overall_fn_summary (struct cg
       info->time += e->time;
     }
   info->min_size = (*info->size_time_table)[0].size;
-  vec_free (info->call_size_time_table);
+  if (reset)
+    vec_free (info->call_size_time_table);
   if (node->callees || node->indirect_calls)
     estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
 				  &info->time, NULL,
Index: ipa-fnsummary.h
===================================================================
--- ipa-fnsummary.h	(revision 278540)
+++ ipa-fnsummary.h	(working copy)
@@ -358,7 +358,7 @@  void estimate_ipcp_clone_size_and_time (
 					int *, sreal *, sreal *,
 				        ipa_hints *);
 void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge);
-void ipa_update_overall_fn_summary (struct cgraph_node *node);
+void ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset = true);
 void compute_fn_summary (struct cgraph_node *, bool);
 
 
Index: ipa-inline-transform.c
===================================================================
--- ipa-inline-transform.c	(revision 278540)
+++ ipa-inline-transform.c	(working copy)
@@ -489,9 +489,9 @@  inline_call (struct cgraph_edge *e, bool
     mark_all_inlined_calls_cdtor (e->callee);
   if (opt_for_fn (e->caller->decl, optimize))
     new_edges_found = ipa_propagate_indirect_call_infos (curr, new_edges);
-  check_speculations (e->callee, new_edges);
+  bool removed_p = check_speculations (e->callee, new_edges);
   if (update_overall_summary)
-    ipa_update_overall_fn_summary (to);
+    ipa_update_overall_fn_summary (to, new_edges_found || removed_p);
   else
     /* Update self size by the estimate so overall function growth limits
        work for further inlining into this function.  Before inlining
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 278540)
+++ ipa-inline.c	(working copy)
@@ -672,14 +672,29 @@  want_early_inline_function_p (struct cgr
     }
   else
     {
-      int growth = estimate_edge_growth (e);
+      /* First take care of very large functions.  */
+      int min_growth = estimate_min_edge_growth (e), growth = 0;
       int n;
       int early_inlining_insns = opt_for_fn (e->caller->decl, optimize) >= 3
 				 ? param_early_inlining_insns
 				 : param_early_inlining_insns_o2;
 
+      if (min_growth > early_inlining_insns)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
+			     "  will not early inline: %C->%C, "
+			     "call is cold and code would grow "
+			     "at least by %i\n",
+			     e->caller, callee,
+			     min_growth);
+	  want_inline = false;
+	}
+      else
+        growth = estimate_edge_growth (e);
+
 
-      if (growth <= param_max_inline_insns_size)
+      if (!want_inline || growth <= param_max_inline_insns_size)
 	;
       else if (!e->maybe_hot_p ())
 	{
Index: ipa-inline.h
===================================================================
--- ipa-inline.h	(revision 278540)
+++ ipa-inline.h	(working copy)
@@ -82,6 +82,16 @@  estimate_edge_size (struct cgraph_edge *
 /* Return estimated callee growth after inlining EDGE.  */
 
 static inline int
+estimate_min_edge_growth (struct cgraph_edge *edge)
+{
+  ipa_call_summary *s = ipa_call_summaries->get (edge);
+  struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
+  return (ipa_fn_summaries->get (callee)->min_size - s->call_stmt_size);
+}
+
+/* Return estimated callee growth after inlining EDGE.  */
+
+static inline int
 estimate_edge_growth (struct cgraph_edge *edge)
 {
   ipa_call_summary *s = ipa_call_summaries->get (edge);