diff mbox

[Google] AutoFDO cleanup patch

Message ID CAO2gOZXZCyONfh0yn1MGEVRTQ-QfjQgEdpZoZUxovr4d1U20_Q@mail.gmail.com
State New
Headers show

Commit Message

Dehao Chen May 13, 2013, 1:31 a.m. UTC
I've added some more cleanup to make AutoFDO isolated as well as
consistent wil 4.8.

Bootstrapped and passed regression test and benchmark performance test.

OK for google branches?

Thanks,
Dehao

http://codereview.appspot.com/9250047

On Sat, May 11, 2013 at 9:30 PM, Xinliang David Li <davidxl@google.com> wrote:
> Looks good.
>
> David
>
> On Sat, May 11, 2013 at 3:53 PM, Dehao Chen <dehao@google.com> wrote:
>> In AutoFDO, we early-inline callsites that was inlined in profiling
>> runs regardless of the size limit. With this change, the existing
>> ipa-inline tunings for AutoFDO is unnecessary: it's fine to just use
>> the traditional FDO based heuristic. This patch cleans up the original
>> tunings and make it easier to port to gcc 4_8.
>>
>> Bootstrapped and passed all regression test. And passed benchmark
>> performance tests.
>>
>> Is it ok for google-4_7 branch?
>>
>> Thanks,
>> Dehao
>>
>> http://codereview.appspot.com/9250047
diff mbox

Patch

Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c	(revision 198796)
+++ gcc/ipa-inline.c	(working copy)
@@ -481,21 +481,13 @@  edge_hot_enough_p (struct cgraph_edge *edge)
 {
   if (cgraph_maybe_hot_edge_p (edge))
     return true;
-  if (flag_auto_profile)
-    {
-      gcov_type callsite_total_count;
-      /* Check if total sample counts in the callee is available.  */
-      if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true)
-	  && maybe_hot_count_p (callsite_total_count))
-	return true;
-      /* We disable hot-caller heuristic if the callee's entry count is
-	 0 because in this case we do not have enough information to
-	 calculate the scaling factor.  */
-      if (edge->callee->count == 0 && edge->callee->max_bb_count > 0)
-	return false;
-      /* In AutoFDO, if the preivous few heuristic fail, we will fall
-	 back to use hot-caller heuristic as is used by FDO.  */
-    }
+
+  /* We disable hot-caller heuristic if the callee's entry count is
+     0 because in this case we do not have enough information to
+     calculate the scaling factor.  */
+  if (flag_auto_profile && edge->callee->count == 0
+      && edge->callee->max_bb_count > 0)
+    return false;
   if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER)
       && maybe_hot_count_p (edge->caller->max_bb_count))
     return true;
@@ -874,36 +866,14 @@  edge_badness (struct cgraph_edge *edge, bool dump)
   else if (max_count)
     {
       int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
-      if (flag_auto_profile && edge->count == 0)
-	{
-	  gcov_type callsite_count;
-	  if (afdo_get_callsite_count (edge, &callsite_count, NULL, false))
-	    edge->count = callsite_count;
-	  if (edge->count > max_count)
-	    max_count = edge->count;
-	}
       badness =
 	((int)
 	 ((double) edge->count * INT_MIN / 2 / max_count / 512) *
 	 relative_time_benefit (callee_info, edge, time_growth)) / growth;
-      if (flag_auto_profile && profile_info->sum_all > 0)
-	{
-	  gcov_type callsite_total_count;
-	  if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true))
-	    {
-	      gcov_type afdo_badness =
-		((int)
-		 ((double) callsite_total_count * INT_MIN / 2 /
-		 profile_info->sum_all / 64) *
-		 relative_time_benefit (callee_info, edge, time_growth)) / growth;
-	      if (afdo_badness < badness)
-		badness = afdo_badness;
-	    }
-	}
 
       /* Be sure that insanity of the profile won't lead to increasing counts
 	 in the scalling and thus to overflow in the computation above.  */
-      gcc_assert (flag_auto_profile || max_count >= edge->count);
+      gcc_assert (max_count >= edge->count);
       if (dump)
 	{
 	  fprintf (dump_file,
@@ -1534,7 +1504,6 @@  inline_small_functions (void)
       }
 
   gcc_assert (in_lto_p
-	      || flag_auto_profile
 	      || !max_count
 	      || (profile_info && flag_branch_probabilities));
 
@@ -1568,7 +1537,7 @@  inline_small_functions (void)
 	 of date value on it, we re-insert it now.  */
       current_badness = edge_badness (edge, false);
       gcc_assert (cached_badness == current_badness);
-      gcc_assert (flag_auto_profile || current_badness >= badness);
+      gcc_assert (current_badness >= badness);
       if (current_badness != badness)
 	{
 	  edge->aux = fibheap_insert (heap, current_badness, edge);
Index: gcc/cgraph.c
===================================================================
--- gcc/cgraph.c	(revision 198796)
+++ gcc/cgraph.c	(working copy)
@@ -2326,11 +2326,6 @@  cgraph_clone_node (struct cgraph_node *n, tree dec
                               / n->count) * count;
   new_node->is_versioned_clone = n->is_versioned_clone;
   new_node->frequency = n->frequency;
-  /* In AutoFDO, a cloned callee may be hot even when the original
-     function is profiled cold.  */
-  if (flag_auto_profile && count > 0
-      && new_node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
-    new_node->frequency = NODE_FREQUENCY_NORMAL;
   new_node->clone = n->clone;
   new_node->clone.tree_map = 0;
   if (n->count)
Index: gcc/auto-profile.c
===================================================================
--- gcc/auto-profile.c	(revision 198796)
+++ gcc/auto-profile.c	(working copy)
@@ -25,6 +25,8 @@  along with GCC; see the file COPYING3.  If not see
 
 #include "config.h"
 #include "system.h"
+#include "coretypes.h"
+#include "tree.h"
 #include "flags.h"	      /* for auto_profile_file.  */
 #include "basic-block.h"      /* for gcov_type.	 */
 #include "diagnostic-core.h"  /* for inform ().  */
@@ -178,9 +180,6 @@  static htab_t function_htab;
 /* Hash table to hold stack information.  */
 static htab_t stack_htab;
 
-/* Hash table to hold inline scale information.  */
-static htab_t stack_scale_htab;
-
 /* Hash table to hold assembler name to bfd name mapping.  */
 static htab_t bfd_name_htab;
 
@@ -525,7 +524,7 @@  read_aux_modules (void)
       if ((aux_entry->lang & GCOV_MODULE_ASM_STMTS)
 	   && flag_ripa_disallow_asm_modules)
 	{
-	  if (flag_opt_info >= OPT_INFO_MIN)
+	  if (flag_opt_info)
 	    inform (0, "Not importing %s: contains "
 		    "assembler statements", aux_entry->name);
 	  continue;
@@ -533,7 +532,7 @@  read_aux_modules (void)
       afdo_add_module (&module_infos[curr_module], aux_entry, false);
       if (incompatible_cl_args (module_infos[0], module_infos[curr_module]))
 	{
-	  if (flag_opt_info >= OPT_INFO_MIN)
+	  if (flag_opt_info)
 	    inform (0, "Not importing %s: command-line"
 		    " arguments not compatible with primary module",
 		    aux_entry->name);
@@ -815,142 +814,6 @@  afdo_add_bfd_name_mapping (const char *as_name, co
     free (entry);
 }
 
-/* When EDGE is inlined, the callee is cloned recursively. This function
-   updates the copy scale recursively along the callee. STACK stores the
-   call stack info from the original inlined edge to the caller of EDGE.
-
-   E.g. foo calls bar with call count 100;
-	bar calls baz with call count 300;
-	bar has an entry count of 400, baz has an entry count of 1000;
-   Initial callgraph looks like:
-     foo --(100)--> bar(400)
-     bar --(300)--> baz(1000)
-
-   Consider baz is first inlined into bar, we will have a call graph like:
-     foo --(100)--> bar(400)
-     bar --(300)--> baz.clone(300)
-     baz(700)
-   At this point, a copyscale mapping is added:
-     (bar->baz) --> 0.3
-
-   Consider bar is then inlined into foo, we will have a call graph like:
-     foo --(100)--> bar.clone(100)
-     bar.clone --(75)-->baz.clone_2(75)
-     bar --(225)->baz.clone(225)
-     baz(700)
-   At this point, two copyscale mappings are added:
-     (foo->bar) --> 0.25
-     (foo->bar->baz)  --> 0.25 * 0.3
-*/
-
-static void
-afdo_propagate_copy_scale (struct cgraph_edge *edge, struct gcov_stack *stack)
-{
-  struct gcov_stack *new_stack, *entry, **stack_slot;
-  struct cgraph_edge *e;
-
-  if (edge->callee->global.inlined_to == NULL)
-    return;
-  if (stack->count == 0)
-    return;
-
-  new_stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
-  new_stack->func_name =
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->caller->decl));
-  new_stack->callee_name =
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
-  new_stack->size = get_inline_stack_size_by_stmt (edge->call_stmt);
-  new_stack->stack = (struct gcov_callsite_pos *) xmalloc (
-      sizeof (struct gcov_callsite_pos) * (new_stack->size + stack->size));
-  get_inline_stack_by_stmt (edge->call_stmt, edge->caller->decl,
-			    new_stack->stack, false);
-  entry = (struct gcov_stack *) htab_find (stack_scale_htab, new_stack);
-  if (entry == NULL)
-    {
-      free (new_stack->stack);
-      free (new_stack);
-      return;
-    }
-
-  new_stack->func_name = stack->func_name;
-  new_stack->count = entry->count * stack->count / REG_BR_PROB_BASE;
-  memcpy (new_stack->stack + new_stack->size,
-	  stack->stack, stack->size * sizeof (struct gcov_callsite_pos));
-  new_stack->size += stack->size;
-  stack_slot = (struct gcov_stack **)
-      htab_find_slot (stack_scale_htab, new_stack, INSERT);
-  if (!*stack_slot)
-    *stack_slot = new_stack;
-  else
-    (*stack_slot)->count = MAX ((*stack_slot)->count, new_stack->count);
-
-  for (e = edge->callee->callees; e; e = e->next_callee)
-    afdo_propagate_copy_scale (e, new_stack);
-}
-
-/* For an inlined EDGE, the scale (i.e. edge->count / edge->callee->count)
-   is recorded in a hash map.  */
-
-void
-afdo_add_copy_scale (struct cgraph_edge *edge)
-{
-  struct gcov_stack *stack;
-  struct gcov_stack **stack_slot;
-  int scale;
-  int size = get_inline_stack_size_by_edge (edge);
-  struct cgraph_node *n = edge->callee->clone_of;
-  struct cgraph_edge *e;
-  gcov_type sum_cloned_count;
-
-  if (edge->callee->clone_of)
-    {
-      n = edge->callee->clone_of->clones;
-      sum_cloned_count = edge->callee->clone_of->count;
-    }
-  else
-    {
-      n = edge->callee->clones;
-      sum_cloned_count = edge->callee->count;
-    }
-
-  for (; n; n = n->next_sibling_clone)
-    sum_cloned_count += n->count;
-  if (sum_cloned_count > 0)
-    scale = (double) edge->count * REG_BR_PROB_BASE / sum_cloned_count;
-  else if (edge->caller->count == 0 && edge->caller->max_bb_count == 0)
-    scale = 0;
-  else
-    scale = REG_BR_PROB_BASE;
-  if (scale > REG_BR_PROB_BASE)
-    scale = REG_BR_PROB_BASE;
-
-  if (size == 0)
-    return;
-  stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack));
-  stack->func_name
-      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
-	edge->caller->global.inlined_to ?
-	    edge->caller->global.inlined_to->decl : edge->caller->decl));
-  stack->callee_name
-      = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
-  stack->size = size;
-  stack->stack = (struct gcov_callsite_pos *)
-      xmalloc (sizeof (struct gcov_callsite_pos) * size);
-  stack->count = scale;
-
-  get_inline_stack_by_edge (edge, stack->stack);
-
-  stack_slot = (struct gcov_stack **)
-      htab_find_slot (stack_scale_htab, stack, INSERT);
-  if (!*stack_slot)
-    *stack_slot = stack;
-  else
-    (*stack_slot)->count = MAX ((*stack_slot)->count, stack->count);
-
-  for (e = edge->callee->callees; e; e = e->next_callee)
-    afdo_propagate_copy_scale (e, stack);
-}
-
 /* For a given POS_STACK with SIZE, get the COUNT, MAX_COUNT, NUM_INST,
    HIST_SIZE and HIST for the inline stack. If CALLEE_NAME is non-null,
    the COUNT/MAX_COUNT represents the total/max count in the inline stack.
@@ -964,56 +827,24 @@  get_stack_count (struct gcov_callsite_pos *pos_sta
 		 gcov_type *count, gcov_type *max_count, gcov_type *num_inst,
 		 gcov_unsigned_t *hist_size, struct gcov_hist **hist)
 {
-  int i;
-
-  for (i = 0; i < size; i++)
+  struct gcov_stack stack, *entry;
+  stack.func_name = pos_stack[size - 1].func;
+  stack.callee_name = callee_name;
+  stack.stack = pos_stack;
+  stack.size = size;
+  entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
+  if (entry)
     {
-      struct gcov_stack stack, *entry;
-      stack.func_name = pos_stack[size - i - 1].func;
-      stack.callee_name = callee_name;
-      stack.stack = pos_stack;
-      stack.size = size - i;
-      entry = (struct gcov_stack *) htab_find (stack_htab, &stack);
-      if (entry)
+      *count = entry->count;
+      *num_inst = entry->num_inst;
+      if (max_count)
+	*max_count = entry->max_count;
+      if (hist_size)
 	{
-	  if (i == 0)
-	    {
-	      *count = entry->count;
-	      *num_inst = entry->num_inst;
-	      if (max_count)
-		*max_count = entry->max_count;
-	      if (hist_size)
-		{
-		  *hist_size = entry->hist_size;
-		  *hist = entry->hist;
-		}
-	      return true;
-	    }
-	  else
-	    {
-	      struct gcov_stack scale_stack, *scale_entry;
-	      scale_stack.stack = pos_stack + size - i;
-	      scale_stack.size = i;
-	      scale_stack.func_name = pos_stack[size - 1].func;
-	      scale_stack.callee_name = stack.func_name;
-	      scale_entry = (struct gcov_stack *)
-		  htab_find (stack_scale_htab, &scale_stack);
-	      if (scale_entry)
-		{
-		  *count = entry->count * scale_entry->count
-			   / REG_BR_PROB_BASE;
-		  *num_inst = entry->num_inst;
-		  if (max_count)
-		    *max_count = entry->max_count;
-		  if (hist_size)
-		    {
-		      *hist_size = entry->hist_size;
-		      *hist = entry->hist;
-		    }
-		  return true;
-		}
-	    }
+	  *hist_size = entry->hist_size;
+	  *hist = entry->hist;
 	}
+      return true;
     }
   *count = 0;
   *num_inst = 0;
@@ -1057,14 +888,14 @@  get_stmt_count (gimple stmt, gcov_type *count, gco
 /* For a given EDGE, if IS_TOTAL is true, save EDGE->callee's total count
    to COUNT, otherwise save EDGE's count to COUNT.  */
 
-bool
-afdo_get_callsite_count (struct cgraph_edge *edge, gcov_type *count,
-			 gcov_type *max_count, bool is_total)
+static bool
+get_callsite_count (struct cgraph_edge *edge, gcov_type *count,
+		    gcov_type *max_count)
 {
   struct gcov_callsite_pos *pos_stack;
   gcov_type num_inst;
-  const char *callee_name = is_total ?
-      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)) : NULL;
+  const char *callee_name =
+      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl));
   int size = get_inline_stack_size_by_edge (edge);
 
   if (size == 0)
@@ -1074,19 +905,15 @@  get_stmt_count (gimple stmt, gcov_type *count, gco
 
   get_inline_stack_by_edge (edge, pos_stack);
 
-  if (!is_total)
-    pos_stack[0].discr =
-	get_discriminator_from_locus (gimple_location (edge->call_stmt));
-
   return get_stack_count (pos_stack, callee_name,
 			  size, count, max_count, &num_inst, NULL, NULL);
 }
 
 /* For a given BB, return its execution count, and annotate value profile
-   on statements if ANNOTATE_VPT is true.  */
+   on statements.  */
 
-gcov_type
-afdo_get_bb_count (basic_block bb, bool annotate_vpt)
+static gcov_type
+afdo_get_bb_count (basic_block bb)
 {
   gimple_stmt_iterator gsi;
   gcov_type max_count = 0;
@@ -1103,7 +930,7 @@  get_stmt_count (gimple stmt, gcov_type *count, gco
 	  if (count > max_count)
 	    max_count = count;
 	  has_annotated = true;
-	  if (annotate_vpt && hist_size > 0)
+	  if (hist_size > 0)
 	    afdo_vpt (stmt, hist, hist_size);
 	}
     }
@@ -1126,16 +953,24 @@  afdo_annotate_cfg (void)
 
   FOR_EACH_BB (bb)
     {
-      bb->count = afdo_get_bb_count (bb, true);
+      bb->count = afdo_get_bb_count (bb);
       if (bb->count > max_count)
 	max_count = bb->count;
     }
   if (ENTRY_BLOCK_PTR->count > ENTRY_BLOCK_PTR->next_bb->count)
-    ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+    {
+      ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count;
+      ENTRY_BLOCK_PTR->next_bb->flags |= BB_ANNOTATED;
+    }
+  if (ENTRY_BLOCK_PTR->count > EXIT_BLOCK_PTR->prev_bb->count)
+    {
+      EXIT_BLOCK_PTR->prev_bb->count = ENTRY_BLOCK_PTR->count;
+      EXIT_BLOCK_PTR->prev_bb->flags |= BB_ANNOTATED;
+    }
   if (max_count > 0)
     {
-      counts_to_freqs ();
       afdo_calculate_branch_prob ();
+      counts_to_freqs ();
       profile_status = PROFILE_READ;
     }
   if (flag_value_profile_transformations)
@@ -1416,13 +1251,6 @@  init_auto_profile (void)
 				  0,
 				  xcalloc,
 				  free);
-  /* Initialize the stack scale hash table.  */
-  stack_scale_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
-				  afdo_stack_hash,
-				  afdo_stack_eq,
-				  0,
-				  xcalloc,
-				  free);
   /* Initialize the bfd name mapping table.  */
   bfd_name_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE,
 				     afdo_bfd_name_hash,
@@ -1477,7 +1305,6 @@  end_auto_profile (void)
   free (file_names);
   htab_delete (function_htab);
   htab_delete (stack_htab);
-  htab_delete (stack_scale_htab);
   htab_delete (bfd_name_htab);
   htab_delete (module_htab);
   profile_info = NULL;
@@ -1845,7 +1672,7 @@  bool
 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
 {
   gcov_type count, max_count;
-  if (afdo_get_callsite_count (edge, &count, &max_count, true))
+  if (get_callsite_count (edge, &count, &max_count))
     {
       bool is_hot;
       const struct gcov_ctr_summary *saved_profile_info = profile_info;
Index: gcc/auto-profile.h
===================================================================
--- gcc/auto-profile.h	(revision 198796)
+++ gcc/auto-profile.h	(working copy)
@@ -32,19 +32,10 @@  extern void afdo_set_current_function_count (void)
 /* Add the assembly_name to bfd_name mapping.  */
 extern void afdo_add_bfd_name_mapping (const char *, const char *);
 
-/* Add copy scale for an inlined edge to stack_scale_map.  */
-extern void afdo_add_copy_scale (struct cgraph_edge *);
-
 /* Calculate branch probability in both AutoFDO pass and after inlining.  */
 extern void afdo_calculate_branch_prob (void);
 
-/* Calculate total sample count of an inlined callsite.  */
-extern bool afdo_get_callsite_count (struct cgraph_edge *, gcov_type *,
-				     gcov_type *, bool);
-
 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
 extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
 
-/* Calculate basic block count.  */
-extern gcov_type afdo_get_bb_count (basic_block, bool);
 #endif /* AUTO_PROFILE_H */
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 198796)
+++ gcc/Makefile.in	(working copy)
@@ -2309,7 +2309,7 @@  tree-inline.o : tree-inline.c $(CONFIG_H) $(SYSTEM
    intl.h $(FUNCTION_H) $(GIMPLE_H) \
    debug.h $(DIAGNOSTIC_H) $(EXCEPT_H) $(TREE_FLOW_H) tree-iterator.h tree-mudflap.h \
    $(IPA_PROP_H) value-prof.h $(TREE_PASS_H) $(TARGET_H) $(INTEGRATE_H) \
-   tree-pretty-print.h l-ipo.h $(AUTO_PROFILE_H)
+   tree-pretty-print.h l-ipo.h
 print-tree.o : print-tree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
    $(GGC_H) langhooks.h tree-iterator.h \
    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_PASS_H) gimple-pretty-print.h
@@ -3030,7 +3030,7 @@  ipa-inline-transform.o : ipa-inline-transform.c $(
    $(DIAGNOSTIC_H) $(PARAMS_H) $(TIMEVAR_H) $(TREE_PASS_H) \
    $(HASHTAB_H) $(COVERAGE_H) $(GGC_H) $(TREE_FLOW_H) $(IPA_PROP_H) \
    gimple-pretty-print.h ipa-inline.h $(LTO_STREAMER_H) tree-pass.h \
-   l-ipo.h $(AUTO_PROFILE_H)
+   l-ipo.h
 ipa-utils.o : ipa-utils.c $(IPA_UTILS_H) $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TM_H) $(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) langhooks.h \
    pointer-set.h $(GGC_H) $(GIMPLE_H) $(SPLAY_TREE_H) \
@@ -3054,9 +3054,9 @@  coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
    opts.h $(TREE_FLOW_H) $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h l-ipo.h dwarf2asm.h \
    $(DIAGNOSTIC_CORE_H) intl.h gt-coverage.h $(TARGET_H) $(AUTO_PROFILE_H)
 auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
-   $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) $(PROFILE_H) \
+   $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
    $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H) value-prof.h \
-   $(COVERAGE_H) $(AUTO_PROFILE_H)
+   $(COVERAGE_H) coretypes.h $(TREE_H) $(AUTO_PROFILE_H)
 cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
    $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) output.h $(FUNCTION_H) $(TREE_PASS_H) \
Index: gcc/tree-inline.c
===================================================================
--- gcc/tree-inline.c	(revision 198796)
+++ gcc/tree-inline.c	(working copy)
@@ -51,7 +51,6 @@  along with GCC; see the file COPYING3.  If not see
 #include "integrate.h"
 #include "langhooks.h"
 #include "l-ipo.h"
-#include "auto-profile.h"
 
 #include "rtl.h"	/* FIXME: For asm_str_count.  */
 
@@ -1826,19 +1825,6 @@  copy_bb (copy_body_data *id, basic_block bb, int f
       copy_gsi = gsi_last_bb (copy_basic_block);
     }
 
-  if (flag_auto_profile && profile_info)
-    {
-      /* If the same inline happens in the profile-collection binary, use
-	 that instance's profile count. Otherwise use the scaled count.
-	 Do *not* annotate value histogram on it because no value profile
-	 transformations will happen after ipa-inline.  */
-      gcov_type count = afdo_get_bb_count (copy_basic_block, false);
-      if (copy_basic_block->flags & BB_ANNOTATED)
-	copy_basic_block->count = count;
-      else if (bb->flags & BB_ANNOTATED)
-	copy_basic_block->flags |= BB_ANNOTATED;
-    }
-
   return copy_basic_block;
 }
 
@@ -2269,8 +2255,6 @@  copy_cfg_body (copy_body_data * id, gcov_type coun
   if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count)
     count_scale = (REG_BR_PROB_BASE * (double)count
 		   / ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count);
-  else if (flag_auto_profile && count == 0)
-    count_scale = 0;
   else
     count_scale = REG_BR_PROB_BASE;
   if (count_scale > REG_BR_PROB_BASE)
Index: gcc/predict.c
===================================================================
--- gcc/predict.c	(revision 198796)
+++ gcc/predict.c	(working copy)
@@ -2875,14 +2875,7 @@  rebuild_frequencies (void)
 	}
     }
   else if (profile_status == PROFILE_READ)
-    {
-      if (flag_auto_profile)
-	{
-	  afdo_calculate_branch_prob ();
-	  compute_function_frequency ();
-	}
-      counts_to_freqs ();
-    }
+    counts_to_freqs ();
   else
     gcc_unreachable ();
   timevar_pop (TV_REBUILD_FREQUENCIES);
Index: gcc/ipa-inline-transform.c
===================================================================
--- gcc/ipa-inline-transform.c	(revision 198796)
+++ gcc/ipa-inline-transform.c	(working copy)
@@ -47,7 +47,6 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "tree-pass.h"
 #include "l-ipo.h"
-#include "auto-profile.h"
 #include "diagnostic-core.h"
 #include "params.h"
 
@@ -139,23 +138,6 @@  void
 clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
 		     bool update_original, int *overall_size)
 {
-  bool has_callsite_profile = false;
-  gcov_type callsite_total_count, callsite_max_count; 
-
-  if (flag_auto_profile)
-    {
-      has_callsite_profile =
-	  afdo_get_callsite_count (e, &callsite_total_count,
-				   &callsite_max_count, true);
-      /* If the callsite is inlined in the profile-collection build,
-	 i.e. the cloned callee has its separate profile, we will use
-	 this separate profile to annotate the callee, and the real
-	 callee body will not be affected. Thus here we need to disable
-	 update_original.  */
-      if (has_callsite_profile)
-	update_original = false;
-    }
-
   if (duplicate)
     {
       /* We may eliminate the need for out-of-line copy to be output.
@@ -196,23 +178,6 @@  clone_inlined_nodes (struct cgraph_edge *e, bool d
 	}
     }
 
-  if (flag_auto_profile && has_callsite_profile)
-    {
-      /* The callee's total count will be non-zero if the callsite
-         was inlined in the profile-collection build, In this case,
-         the original callee may be label unlikely_executed, which
-         may prevent its callees being inlined. Thus we need to reset
-         its frequency to normal.  */
-      if (e->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
-	e->callee->frequency = NODE_FREQUENCY_NORMAL;
-      /* we do not have enough information to calculate the node count
-	 and max_bb_count. Thus we set them to the same value to make
-	 other optimizations aware that they are from cloned inline
-	 instances.  */
-      e->callee->count = callsite_total_count;
-      e->callee->max_bb_count = callsite_max_count;
-    }
-
   if (e->caller->global.inlined_to)
     e->callee->global.inlined_to = e->caller->global.inlined_to;
   else
@@ -417,8 +382,6 @@  inline_call (struct cgraph_edge *e, bool update_or
 
   clone_inlined_nodes (e, true, update_original, overall_size);
 
-  if (flag_auto_profile)
-    afdo_add_copy_scale (e);
 
   gcc_assert (curr->callee->global.inlined_to == to);