===================================================================
@@ -605,7 +605,7 @@ remove_unused_scope_block_p (tree scope)
;
/* When not generating debug info we can eliminate info on unused
variables. */
- else if (debug_info_level == DINFO_LEVEL_NONE)
+ else if (!flag_auto_profile && debug_info_level == DINFO_LEVEL_NONE)
{
/* Even for -g0 don't prune outer scopes from artificial
functions, otherwise diagnostics using tree_nonartificial_location
===================================================================
@@ -1802,7 +1802,7 @@ record_niter_bound (struct loop *loop, const wides
}
if (realistic
&& (!loop->any_estimate
- || wi::ltu_p (i_bound, loop->nb_iterations_estimate)))
+ || (!flag_auto_profile && wi::ltu_p (i_bound, loop->nb_iterations_estimate))))
{
loop->any_estimate = true;
loop->nb_iterations_estimate = i_bound;
===================================================================
@@ -449,6 +449,7 @@ extern simple_ipa_opt_pass *make_pass_ipa_lower_em
extern simple_ipa_opt_pass
*make_pass_ipa_function_and_variable_visibility (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tree_profile (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_auto_profile (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_early_local_passes (gcc::context *ctxt);
===================================================================
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic-color.h"
#include "context.h"
#include "pass_manager.h"
+#include "auto-profile.h"
#include "optabs.h"
#if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO)
@@ -663,6 +664,10 @@ compile_file (void)
targetm.asm_out.output_ident (ident_str);
}
+ /* Auto profile finalization. */
+ if (flag_auto_profile)
+ end_auto_profile ();
+
/* Invoke registered plugin callbacks. */
invoke_plugin_callbacks (PLUGIN_FINISH_UNIT, NULL);
===================================================================
@@ -365,7 +365,8 @@ Objective-C and Objective-C++ Dialects}.
@gccoptlist{-faggressive-loop-optimizations -falign-functions[=@var{n}] @gol
-falign-jumps[=@var{n}] @gol
-falign-labels[=@var{n}] -falign-loops[=@var{n}] @gol
--fassociative-math -fauto-inc-dec -fbranch-probabilities @gol
+-fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol
+-fauto-inc-dec -fbranch-probabilities @gol
-fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol
-fbtr-bb-exclusive -fcaller-saves @gol
-fcheck-data-deps -fcombine-stack-adjustments -fconserve-stack @gol
@@ -9164,6 +9165,19 @@ code.
If @var{path} is specified, GCC looks at the @var{path} to find
the profile feedback data files. See @option{-fprofile-dir}.
+
+@item -fauto-profile
+@itemx -fauto-profile=@var{path}
+@opindex fauto-profile
+Enable sampling based feedback directed optimizations, and optimizations
+generally profitable only with profile feedback available.
+
+The following options are enabled: @code{-fbranch-probabilities}, @code{-fvpt},
+@code{-funroll-loops}, @code{-fpeel-loops}, @code{-ftracer}, @code{-ftree-vectorize},
+@code{ftree-loop-distribute-patterns}
+
+If @var{path} is specified, GCC looks at the @var{path} to find
+the profile feedback data files.
@end table
The following options control compiler behavior regarding floating-point
===================================================================
@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see
#include "filenames.h"
#include "target.h"
#include "params.h"
+#include "auto-profile.h"
#include "gcov-io.h"
#include "gcov-io.c"
@@ -1208,7 +1209,9 @@ coverage_init (const char *filename)
bbg_file_stamp = local_tick;
- if (flag_branch_probabilities)
+ if (flag_auto_profile)
+ read_autofdo_file ();
+ else if (flag_branch_probabilities)
read_counts_file ();
/* Name of bbg file. */
===================================================================
@@ -895,6 +895,16 @@ fauto-inc-dec
Common Report Var(flag_auto_inc_dec) Init(1)
Generate auto-inc/dec instructions
+fauto-profile
+Common Report Var(flag_auto_profile) Optimization
+Use sample profile information for call graph node weights. The default
+profile file is fbdata.afdo in 'pwd'.
+
+fauto-profile=
+Common Joined RejectNegative Var(auto_profile_file)
+Use sample profile information for call graph node weights. The profile
+file is specified in the argument.
+
; -fcheck-bounds causes gcc to generate array bounds checks.
; For C, C++ and ObjC: defaults off.
; For Java: defaults to on.
===================================================================
@@ -89,6 +89,7 @@ DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr part
DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans")
DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference")
DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile")
+DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile")
DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const")
DEFTIMEVAR (TV_IPA_PTA , "ipa points-to")
DEFTIMEVAR (TV_IPA_SRA , "ipa SRA")
===================================================================
@@ -134,11 +134,10 @@ static bool gimple_divmod_fixed_value_transform (g
static bool gimple_mod_pow2_value_transform (gimple_stmt_iterator *);
static bool gimple_mod_subtract_transform (gimple_stmt_iterator *);
static bool gimple_stringops_transform (gimple_stmt_iterator *);
-static bool gimple_ic_transform (gimple_stmt_iterator *);
/* Allocate histogram value. */
-static histogram_value
+histogram_value
gimple_alloc_histogram_value (struct function *fun ATTRIBUTE_UNUSED,
enum hist_type type, gimple stmt, tree value)
{
@@ -1307,6 +1306,9 @@ del_node_map (void)
struct cgraph_node*
find_func_by_profile_id (int profile_id)
{
+ if (flag_auto_profile)
+ return cgraph_node::get_for_asmname (
+ get_identifier ((const char *)(size_t)profile_id));
cgraph_node **val = cgraph_node_map->get (profile_id);
if (val)
return *val;
@@ -1320,7 +1322,7 @@ find_func_by_profile_id (int profile_id)
may ICE. Here we only do very minimal sanity check just to make compiler happy.
Returns true if TARGET is considered ok for call CALL_STMT. */
-static bool
+bool
check_ic_target (gimple call_stmt, struct cgraph_node *target)
{
location_t locus;
@@ -1483,7 +1485,7 @@ gimple_ic (gimple icall_stmt, struct cgraph_node *
this call to:
*/
-static bool
+bool
gimple_ic_transform (gimple_stmt_iterator *gsi)
{
gimple stmt = gsi_stmt (*gsi);
===================================================================
@@ -75,6 +75,8 @@ typedef vec<histogram_value> histogram_values;
extern void gimple_find_values_to_profile (histogram_values *);
extern bool gimple_value_profile_transformations (void);
+histogram_value gimple_alloc_histogram_value (struct function *, enum hist_type,
+ gimple stmt, tree);
histogram_value gimple_histogram_value (struct function *, gimple);
histogram_value gimple_histogram_value_of_type (struct function *, gimple,
enum hist_type);
@@ -89,6 +91,8 @@ void verify_histograms (void);
void free_histograms (void);
void stringop_block_profile (gimple, unsigned int *, HOST_WIDE_INT *);
gimple gimple_ic (gimple, struct cgraph_node *, int, gcov_type, gcov_type);
+bool check_ic_target (gimple, struct cgraph_node *);
+bool gimple_ic_transform (gimple_stmt_iterator *);
/* In tree-profile.c. */
===================================================================
@@ -0,0 +1,1679 @@
+/* Read and annotate call graph profile from the auto profile data file.
+ Copyright (C) 2014. Free Software Foundation, Inc.
+ Contributed by Dehao Chen (dehao@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include <map>
+#include <vector>
+#include <set>
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "flags.h"
+#include "basic-block.h"
+#include "diagnostic-core.h"
+#include "gcov-io.h"
+#include "input.h"
+#include "profile.h"
+#include "langhooks.h"
+#include "opts.h"
+#include "tree-pass.h"
+#include "cfgloop.h"
+#include "tree-ssa-alias.h"
+#include "tree-cfg.h"
+#include "tree-cfgcleanup.h"
+#include "tree-ssa-operands.h"
+#include "tree-into-ssa.h"
+#include "internal-fn.h"
+#include "is-a.h"
+#include "gimple-expr.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimple-ssa.h"
+#include "cgraph.h"
+#include "value-prof.h"
+#include "coverage.h"
+#include "params.h"
+#include "ipa-inline.h"
+#include "tree-inline.h"
+#include "auto-profile.h"
+
+/* The following routines implements AutoFDO optimization.
+
+ This optimization uses sampling profiles to annotate basic block counts
+ and uses heuristics to estimate branch probabilities.
+
+ There are three phases in AutoFDO:
+
+ Phase 1: Read profile from the profile data file.
+ The following info is read from the profile datafile:
+ * string_table: a map between function name and its index.
+ * autofdo_source_profile: a map from function_instance name to
+ function_instance. This is represented as a forest of
+ function_instances.
+ * WorkingSet: a histogram of how many instructions are covered for a
+ given percentage of total cycles.
+
+ Phase 2: Early inline + VPT.
+ Early inline uses autofdo_source_profile to find if a callsite is:
+ * inlined in the profiled binary.
+ * callee body is hot in the profiling run.
+ If both condition satisfies, early inline will inline the callsite
+ regardless of the code growth.
+ Phase 2 is an iterative process. During each iteration, we also check
+ if an indirect callsite is promoted and inlined in the profiling run.
+ If yes, vpt will happen to force promote it and in the next iteration,
+ einline will inline the promoted callsite in the next iteration.
+
+ Phase 3: Annotate control flow graph.
+ AutoFDO uses a separate pass to:
+ * Annotate basic block count
+ * Estimate branch probability
+
+ After the above 3 phases, all profile is readily annotated on the GCC IR.
+ AutoFDO tries to reuse all FDO infrastructure as much as possible to make
+ use of the profile. E.g. it uses existing mechanism to calculate the basic
+ block/edge frequency, as well as the cgraph node/edge count.
+*/
+
+#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
+
+namespace autofdo
+{
+
+/* Represent a source location: (function_decl, lineno). */
+typedef std::pair<tree, unsigned> decl_lineno;
+
+/* Represent an inline stack. vector[0] is the leaf node. */
+typedef std::vector<decl_lineno> inline_stack;
+
+/* String array that stores function names. */
+typedef std::vector<const char *> string_vector;
+
+/* Map from function name's index in string_table to target's
+ execution count. */
+typedef std::map<unsigned, gcov_type> icall_target_map;
+
+/* Set of gimple stmts. Used to track if the stmt has already been promoted
+ to direct call. */
+typedef std::set<gimple> stmt_set;
+
+/* Represent count info of an inline stack. */
+struct count_info
+{
+ /* Sampled count of the inline stack. */
+ gcov_type count;
+
+ /* Map from indirect call target to its sample count. */
+ icall_target_map targets;
+
+ /* Whether this inline stack is already used in annotation.
+
+ Each inline stack should only be used to annotate IR once.
+ This will be enforced when instruction-level discriminator
+ is supported. */
+ bool annotated;
+};
+
+/* operator< for "const char *". */
+struct string_compare
+{
+ bool operator()(const char *a, const char *b) const
+ {
+ return strcmp (a, b) < 0;
+ }
+};
+
+/* Store a string array, indexed by string position in the array. */
+class string_table
+{
+public:
+ static string_table *create ();
+
+ /* For a given string, returns its index. */
+ int get_index (const char *name) const;
+
+ /* For a given decl, returns the index of the decl name. */
+ int get_index_by_decl (tree decl) const;
+
+ /* For a given index, returns the string. */
+ const char *get_name (int index) const;
+
+private:
+ string_table () {}
+ bool read ();
+
+ typedef std::map<const char *, unsigned, string_compare> string_index_map;
+ string_vector vector_;
+ string_index_map map_;
+};
+
+/* Profile of a function instance:
+ 1. total_count of the function.
+ 2. head_count of the function (only valid when function is a top-level
+ function_instance, i.e. it is the original copy instead of the
+ inlined copy).
+ 3. map from source location (decl_lineno) of the inlined callsite to
+ profile (count_info).
+ 4. map from callsite to callee function_instance. */
+class function_instance
+{
+public:
+ typedef std::vector<function_instance *> function_instance_stack;
+
+ /* Read the profile and return a function_instance with head count as
+ HEAD_COUNT. Recursively read callsites to create nested function_instances
+ too. STACK is used to track the recursive creation process. */
+ static function_instance *
+ read_function_instance (function_instance_stack *stack,
+ gcov_type head_count);
+
+ /* Recursively deallocate all callsites (nested function_instances). */
+ ~function_instance ();
+
+ /* Accessors. */
+ int
+ name () const
+ {
+ return name_;
+ }
+ gcov_type
+ total_count () const
+ {
+ return total_count_;
+ }
+ gcov_type
+ head_count () const
+ {
+ return head_count_;
+ }
+
+ /* Recursively traverse STACK starting from LEVEL to find the corresponding
+ function_instance. */
+ function_instance *get_function_instance (const inline_stack &stack,
+ unsigned level);
+
+ /* Store the profile info for LOC in INFO. Return TRUE if profile info
+ is found. */
+ bool get_count_info (location_t loc, count_info *info) const;
+
+ /* Read the inlined indirect call target profile for STMT and store it in
+ MAP, return the total count for all inlined indirect calls. */
+ gcov_type find_icall_target_map (gimple stmt, icall_target_map *map) const;
+
+ /* Sum of counts that is used during annotation. */
+ gcov_type total_annotated_count () const;
+
+ /* Mark LOC as annotated. */
+ void mark_annotated (location_t loc);
+
+private:
+ /* Callsite, represented as (decl_lineno, callee_function_name_index). */
+ typedef std::pair<unsigned, unsigned> callsite;
+
+ /* Map from callsite to callee function_instance. */
+ typedef std::map<callsite, function_instance *> callsite_map;
+
+ function_instance (unsigned name, gcov_type head_count)
+ : name_ (name), total_count_ (0), head_count_ (head_count)
+ {
+ }
+
+ /* Traverse callsites of the current function_instance to find one at the
+ location of LINENO and callee name represented in DECL. */
+ function_instance *get_function_instance_by_decl (unsigned lineno,
+ tree decl) const;
+
+ /* Map from source location (decl_lineno) to profile (count_info). */
+ typedef std::map<unsigned, count_info> position_count_map;
+
+ /* function_instance name index in the string_table. */
+ unsigned name_;
+
+ /* Total sample count. */
+ gcov_type total_count_;
+
+ /* Entry BB's sample count. */
+ gcov_type head_count_;
+
+ /* Map from callsite location to callee function_instance. */
+ callsite_map callsites;
+
+ /* Map from source location to count_info. */
+ position_count_map pos_counts;
+};
+
+/* Profile for all functions. */
+class autofdo_source_profile
+{
+public:
+ static autofdo_source_profile *
+ create ()
+ {
+ autofdo_source_profile *map = new autofdo_source_profile ();
+
+ if (map->read ())
+ return map;
+ delete map;
+ return NULL;
+ }
+
+ ~autofdo_source_profile ();
+
+ /* For a given DECL, returns the top-level function_instance. */
+ function_instance *get_function_instance_by_decl (tree decl) const;
+
+ /* Find count_info for a given gimple STMT. If found, store the count_info
+ in INFO and return true; otherwise return false. */
+ bool get_count_info (gimple stmt, count_info *info) const;
+
+ /* Find total count of the callee of EDGE. */
+ gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
+
+ /* Update value profile INFO for STMT from the inlined indirect callsite.
+ Return true if INFO is updated. */
+ bool update_inlined_ind_target (gimple stmt, count_info *info);
+
+ /* Mark LOC as annotated. */
+ void mark_annotated (location_t loc);
+
+private:
+ /* Map from function_instance name index (in string_table) to
+ function_instance. */
+ typedef std::map<unsigned, function_instance *> name_function_instance_map;
+
+ autofdo_source_profile () {}
+
+ /* Read AutoFDO profile and returns TRUE on success. */
+ bool read ();
+
+ /* Return the function_instance in the profile that correspond to the
+ inline STACK. */
+ function_instance *
+ get_function_instance_by_inline_stack (const inline_stack &stack) const;
+
+ name_function_instance_map map_;
+};
+
+/* Store the strings read from the profile data file. */
+static string_table *afdo_string_table;
+
+/* Store the AutoFDO source profile. */
+static autofdo_source_profile *afdo_source_profile;
+
+/* gcov_ctr_summary structure to store the profile_info. */
+static struct gcov_ctr_summary *afdo_profile_info;
+
+/* Helper functions. */
+
+/* Return the original name of NAME: strip the suffix that starts
+ with '.' */
+
+static const char *
+get_original_name (const char *name)
+{
+ char *ret = xstrdup (name);
+ char *find = strchr (ret, '.');
+ if (find != NULL)
+ *find = 0;
+ return ret;
+}
+
+/* Return the combined location, which is a 32bit integer in which
+ higher 16 bits stores the line offset of LOC to the start lineno
+ of DECL, The lower 16 bits stores the discrimnator. */
+
+static unsigned
+get_combined_location (location_t loc, tree decl)
+{
+ /* TODO: allow more bits for line and less bits for discriminator. */
+ return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
+}
+
+/* Return the function decl of a given lexical BLOCK. */
+
+static tree
+get_function_decl_from_block (tree block)
+{
+ tree decl;
+
+ if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
+ return NULL_TREE;
+
+ for (decl = BLOCK_ABSTRACT_ORIGIN (block);
+ decl && (TREE_CODE (decl) == BLOCK);
+ decl = BLOCK_ABSTRACT_ORIGIN (decl))
+ if (TREE_CODE (decl) == FUNCTION_DECL)
+ break;
+ return decl;
+}
+
+/* Store inline stack for STMT in STACK. */
+
+static void
+get_inline_stack (location_t locus, inline_stack *stack)
+{
+ if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
+ return;
+
+ tree block = LOCATION_BLOCK (locus);
+ if (block && TREE_CODE (block) == BLOCK)
+ {
+ int level = 0;
+ for (block = BLOCK_SUPERCONTEXT (block);
+ block && (TREE_CODE (block) == BLOCK);
+ block = BLOCK_SUPERCONTEXT (block))
+ {
+ location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
+ if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
+ continue;
+
+ tree decl = get_function_decl_from_block (block);
+ stack->push_back (
+ std::make_pair (decl, get_combined_location (locus, decl)));
+ locus = tmp_locus;
+ level++;
+ }
+ }
+ stack->push_back (
+ std::make_pair (current_function_decl,
+ get_combined_location (locus, current_function_decl)));
+}
+
+/* Return STMT's combined location, which is a 32bit integer in which
+ higher 16 bits stores the line offset of LOC to the start lineno
+ of DECL, The lower 16 bits stores the discrimnator. */
+
+static unsigned
+get_relative_location_for_stmt (gimple stmt)
+{
+ location_t locus = gimple_location (stmt);
+ if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
+ return UNKNOWN_LOCATION;
+
+ for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
+ block = BLOCK_SUPERCONTEXT (block))
+ if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
+ return get_combined_location (locus,
+ get_function_decl_from_block (block));
+ return get_combined_location (locus, current_function_decl);
+}
+
+/* Return true if BB contains indirect call. */
+
+static bool
+has_indirect_call (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
+ && (gimple_call_fn (stmt) == NULL
+ || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
+ return true;
+ }
+ return false;
+}
+
+/* Member functions for string_table. */
+
+string_table *
+string_table::create ()
+{
+ string_table *map = new string_table ();
+ if (map->read ())
+ return map;
+ delete map;
+ return NULL;
+}
+
+/* Return the index of a given function NAME. */
+
+int
+string_table::get_index (const char *name) const
+{
+ if (name == NULL)
+ return -1;
+ string_index_map::const_iterator iter = map_.find (name);
+ if (iter == map_.end ())
+ return -1;
+ else
+ return iter->second;
+}
+
+/* Return the index of a given function DECL. */
+
+int
+string_table::get_index_by_decl (tree decl) const
+{
+ const char *name
+ = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
+ int ret = get_index (name);
+ if (ret != -1)
+ return ret;
+ ret = get_index (lang_hooks.dwarf_name (decl, 0));
+ if (ret != -1)
+ return ret;
+ if (DECL_ABSTRACT_ORIGIN (decl))
+ return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
+ else
+ return -1;
+}
+
+/* Return the function name of a given INDEX. */
+
+const char *
+string_table::get_name (int index) const
+{
+ gcc_assert (index > 0 && index < (int)vector_.size ());
+ return vector_[index];
+}
+
+/* Read the string table. Return TRUE if reading is successful. */
+
+bool
+string_table::read ()
+{
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
+ return false;
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+ /* Read in the file name table. */
+ unsigned string_num = gcov_read_unsigned ();
+ for (unsigned i = 0; i < string_num; i++)
+ {
+ vector_.push_back (get_original_name (gcov_read_string ()));
+ map_[vector_.back ()] = i;
+ }
+ return true;
+}
+
+/* Member functions for function_instance. */
+
+function_instance::~function_instance ()
+{
+ for (callsite_map::iterator iter = callsites.begin ();
+ iter != callsites.end (); ++iter)
+ delete iter->second;
+}
+
+/* Traverse callsites of the current function_instance to find one at the
+ location of LINENO and callee name represented in DECL. */
+
+function_instance *
+function_instance::get_function_instance_by_decl (unsigned lineno,
+ tree decl) const
+{
+ int func_name_idx = afdo_string_table->get_index_by_decl (decl);
+ if (func_name_idx != -1)
+ {
+ callsite_map::const_iterator ret
+ = callsites.find (std::make_pair (lineno, func_name_idx));
+ if (ret != callsites.end ())
+ return ret->second;
+ }
+ func_name_idx
+ = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
+ if (func_name_idx != -1)
+ {
+ callsite_map::const_iterator ret
+ = callsites.find (std::make_pair (lineno, func_name_idx));
+ if (ret != callsites.end ())
+ return ret->second;
+ }
+ if (DECL_ABSTRACT_ORIGIN (decl))
+ return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
+ else
+ return NULL;
+}
+
+/* Recursively traverse STACK starting from LEVEL to find the corresponding
+ function_instance. */
+
+function_instance *
+function_instance::get_function_instance (const inline_stack &stack,
+ unsigned level)
+{
+ if (level == 0)
+ return this;
+ function_instance *s = get_function_instance_by_decl (
+ stack[level].second, stack[level - 1].first);
+ if (s)
+ return s->get_function_instance (stack, level - 1);
+ else
+ return NULL;
+}
+
+/* Store the profile info for LOC in INFO. Return TRUE if profile info
+ is found. */
+
+bool
+function_instance::get_count_info (location_t loc, count_info *info) const
+{
+ position_count_map::const_iterator iter = pos_counts.find (loc);
+ if (iter == pos_counts.end ())
+ return false;
+ *info = iter->second;
+ return true;
+}
+
+/* Mark LOC as annotated. */
+
+void
+function_instance::mark_annotated (location_t loc)
+{
+ position_count_map::iterator iter = pos_counts.find (loc);
+ if (iter == pos_counts.end ())
+ return;
+ iter->second.annotated = true;
+}
+
+/* Read the inlinied indirect call target profile for STMT and store it in
+ MAP, return the total count for all inlined indirect calls. */
+
+gcov_type
+function_instance::find_icall_target_map (gimple stmt,
+ icall_target_map *map) const
+{
+ gcov_type ret = 0;
+ unsigned stmt_offset = get_relative_location_for_stmt (stmt);
+
+ for (callsite_map::const_iterator iter = callsites.begin ();
+ iter != callsites.end (); ++iter)
+ {
+ unsigned callee = iter->second->name ();
+ /* Check if callsite location match the stmt. */
+ if (iter->first.first != stmt_offset)
+ continue;
+ struct cgraph_node *node = find_func_by_profile_id (
+ (unsigned long long)afdo_string_table->get_name (callee));
+ if (node == NULL)
+ continue;
+ if (!check_ic_target (stmt, node))
+ continue;
+ (*map)[callee] = iter->second->total_count ();
+ ret += iter->second->total_count ();
+ }
+ return ret;
+}
+
+/* Read the profile and create a function_instance with head count as
+ HEAD_COUNT. Recursively read callsites to create nested function_instances
+ too. STACK is used to track the recursive creation process. */
+
+/* function instance profile format:
+
+ ENTRY_COUNT: 8 bytes
+ NAME_INDEX: 4 bytes
+ NUM_POS_COUNTS: 4 bytes
+ NUM_CALLSITES: 4 byte
+ POS_COUNT_1:
+ POS_1_OFFSET: 4 bytes
+ NUM_TARGETS: 4 bytes
+ COUNT: 8 bytes
+ TARGET_1:
+ VALUE_PROFILE_TYPE: 4 bytes
+ TARGET_IDX: 8 bytes
+ COUNT: 8 bytes
+ TARGET_2
+ ...
+ TARGET_n
+ POS_COUNT_2
+ ...
+ POS_COUNT_N
+ CALLSITE_1:
+ CALLSITE_1_OFFSET: 4 bytes
+ FUNCTION_INSTANCE_PROFILE (nested)
+ CALLSITE_2
+ ...
+ CALLSITE_n. */
+
+function_instance *
+function_instance::read_function_instance (function_instance_stack *stack,
+ gcov_type head_count)
+{
+ unsigned name = gcov_read_unsigned ();
+ unsigned num_pos_counts = gcov_read_unsigned ();
+ unsigned num_callsites = gcov_read_unsigned ();
+ function_instance *s = new function_instance (name, head_count);
+ stack->push_back (s);
+
+ for (unsigned i = 0; i < num_pos_counts; i++)
+ {
+ unsigned offset = gcov_read_unsigned () & 0xffff0000;
+ unsigned num_targets = gcov_read_unsigned ();
+ gcov_type count = gcov_read_counter ();
+ s->pos_counts[offset].count = count;
+ for (unsigned j = 0; j < stack->size (); j++)
+ (*stack)[j]->total_count_ += count;
+ for (unsigned j = 0; j < num_targets; j++)
+ {
+ /* Only indirect call target histogram is supported now. */
+ gcov_read_unsigned ();
+ gcov_type target_idx = gcov_read_counter ();
+ s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
+ }
+ }
+ for (unsigned i = 0; i < num_callsites; i++)
+ {
+ unsigned offset = gcov_read_unsigned ();
+ function_instance *callee_function_instance
+ = read_function_instance (stack, 0);
+ s->callsites[std::make_pair (offset, callee_function_instance->name ())]
+ = callee_function_instance;
+ }
+ stack->pop_back ();
+ return s;
+}
+
+/* Sum of counts that is used during annotation. */
+
+gcov_type
+function_instance::total_annotated_count () const
+{
+ gcov_type ret = 0;
+ for (callsite_map::const_iterator iter = callsites.begin ();
+ iter != callsites.end (); ++iter)
+ ret += iter->second->total_annotated_count ();
+ for (position_count_map::const_iterator iter = pos_counts.begin ();
+ iter != pos_counts.end (); ++iter)
+ if (iter->second.annotated)
+ ret += iter->second.count;
+ return ret;
+}
+
+/* Member functions for autofdo_source_profile. */
+
+autofdo_source_profile::~autofdo_source_profile ()
+{
+ for (name_function_instance_map::const_iterator iter = map_.begin ();
+ iter != map_.end (); ++iter)
+ delete iter->second;
+}
+
+/* For a given DECL, returns the top-level function_instance. */
+
+function_instance *
+autofdo_source_profile::get_function_instance_by_decl (tree decl) const
+{
+ int index = afdo_string_table->get_index_by_decl (decl);
+ if (index == -1)
+ return NULL;
+ name_function_instance_map::const_iterator ret = map_.find (index);
+ return ret == map_.end () ? NULL : ret->second;
+}
+
+/* Find count_info for a given gimple STMT. If found, store the count_info
+ in INFO and return true; otherwise return false. */
+
+bool
+autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
+{
+ if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
+ return false;
+
+ inline_stack stack;
+ get_inline_stack (gimple_location (stmt), &stack);
+ if (stack.size () == 0)
+ return false;
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL)
+ return false;
+ return s->get_count_info (stack[0].second, info);
+}
+
+/* Mark LOC as annotated. */
+
+void
+autofdo_source_profile::mark_annotated (location_t loc)
+{
+ inline_stack stack;
+ get_inline_stack (loc, &stack);
+ if (stack.size () == 0)
+ return;
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL)
+ return;
+ s->mark_annotated (stack[0].second);
+}
+
+/* Update value profile INFO for STMT from the inlined indirect callsite.
+ Return true if INFO is updated. */
+
+bool
+autofdo_source_profile::update_inlined_ind_target (gimple stmt,
+ count_info *info)
+{
+ if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
+ return false;
+
+ count_info old_info;
+ get_count_info (stmt, &old_info);
+ gcov_type total = 0;
+ for (icall_target_map::const_iterator iter = old_info.targets.begin ();
+ iter != old_info.targets.end (); ++iter)
+ total += iter->second;
+
+ /* Program behavior changed, original promoted (and inlined) target is not
+ hot any more. Will avoid promote the original target.
+
+ To check if original promoted target is still hot, we check the total
+ count of the unpromoted targets (stored in old_info). If it is no less
+ than half of the callsite count (stored in INFO), the original promoted
+ target is considered not hot any more. */
+ if (total >= info->count / 2)
+ return false;
+
+ inline_stack stack;
+ get_inline_stack (gimple_location (stmt), &stack);
+ if (stack.size () == 0)
+ return false;
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL)
+ return false;
+ icall_target_map map;
+ if (s->find_icall_target_map (stmt, &map) == 0)
+ return false;
+ for (icall_target_map::const_iterator iter = map.begin ();
+ iter != map.end (); ++iter)
+ info->targets[iter->first] = iter->second;
+ return true;
+}
+
+/* Find total count of the callee of EDGE. */
+
+gcov_type
+autofdo_source_profile::get_callsite_total_count (
+ struct cgraph_edge *edge) const
+{
+ inline_stack stack;
+ stack.push_back (std::make_pair (edge->callee->decl, 0));
+ get_inline_stack (gimple_location (edge->call_stmt), &stack);
+
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL
+ || afdo_string_table->get_index (IDENTIFIER_POINTER (
+ DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
+ return 0;
+ else
+ return s->total_count ();
+}
+
+/* Read AutoFDO profile and returns TRUE on success. */
+
+/* source profile format:
+
+ GCOV_TAG_AFDO_FUNCTION: 4 bytes
+ LENGTH: 4 bytes
+ NUM_FUNCTIONS: 4 bytes
+ FUNCTION_INSTANCE_1
+ FUNCTION_INSTANCE_2
+ ...
+ FUNCTION_INSTANCE_N. */
+
+bool
+autofdo_source_profile::read ()
+{
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
+ {
+ inform (0, "Not expected TAG.");
+ return false;
+ }
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+
+ /* Read in the function/callsite profile, and store it in local
+ data structure. */
+ unsigned function_num = gcov_read_unsigned ();
+ for (unsigned i = 0; i < function_num; i++)
+ {
+ function_instance::function_instance_stack stack;
+ function_instance *s = function_instance::read_function_instance (
+ &stack, gcov_read_counter ());
+ afdo_profile_info->sum_all += s->total_count ();
+ map_[s->name ()] = s;
+ }
+ return true;
+}
+
+/* Return the function_instance in the profile that correspond to the
+ inline STACK. */
+
+function_instance *
+autofdo_source_profile::get_function_instance_by_inline_stack (
+ const inline_stack &stack) const
+{
+ name_function_instance_map::const_iterator iter = map_.find (
+ afdo_string_table->get_index_by_decl (stack[stack.size () - 1].first));
+ return iter == map_.end ()
+ ? NULL
+ : iter->second->get_function_instance (stack, stack.size () - 1);
+}
+
+/* Module profile is only used by LIPO. Here we simply ignore it. */
+
+static void
+fake_read_autofdo_module_profile ()
+{
+ /* Read in the module info. */
+ gcov_read_unsigned ();
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+
+ /* Read in the file name table. */
+ unsigned total_module_num = gcov_read_unsigned ();
+ gcc_assert (total_module_num == 0);
+}
+
+static void
+read_profile (void)
+{
+ if (gcov_open (auto_profile_file, 1) == 0)
+ error ("Cannot open profile file %s.", auto_profile_file);
+
+ if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
+ error ("AutoFDO profile magic number does not mathch.");
+
+ /* Skip the version number. */
+ gcov_read_unsigned ();
+
+ /* Skip the empty integer. */
+ gcov_read_unsigned ();
+
+ /* string_table. */
+ afdo_string_table = string_table::create ();
+ if (afdo_string_table == NULL)
+ error ("Cannot read string table from %s.", auto_profile_file);
+
+ /* autofdo_source_profile. */
+ afdo_source_profile = autofdo_source_profile::create ();
+ if (afdo_source_profile == NULL)
+ error ("Cannot read function profile from %s.", auto_profile_file);
+
+ /* autofdo_module_profile. */
+ fake_read_autofdo_module_profile ();
+
+ /* Read in the working set. */
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
+ error ("Cannot read working set from %s.", auto_profile_file);
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+ gcov_working_set_t set[128];
+ for (unsigned i = 0; i < 128; i++)
+ {
+ set[i].num_counters = gcov_read_unsigned ();
+ set[i].min_counter = gcov_read_counter ();
+ }
+ add_working_set (set);
+}
+
+/* From AutoFDO profiles, find values inside STMT for that we want to measure
+ histograms for indirect-call optimization. */
+
+static void
+afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
+ bool transform)
+{
+ gimple stmt = gsi_stmt (*gsi);
+ tree callee;
+
+ if (map.size () == 0 || gimple_code (stmt) != GIMPLE_CALL
+ || gimple_call_fndecl (stmt) != NULL_TREE)
+ return;
+
+ callee = gimple_call_fn (stmt);
+
+ histogram_value hist = gimple_alloc_histogram_value (
+ cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
+ hist->n_counters = 3;
+ hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
+ gimple_add_histogram_value (cfun, stmt, hist);
+
+ gcov_type total = 0;
+ icall_target_map::const_iterator max_iter = map.end ();
+
+ for (icall_target_map::const_iterator iter = map.begin ();
+ iter != map.end (); ++iter)
+ {
+ total += iter->second;
+ if (max_iter == map.end () || max_iter->second < iter->second)
+ max_iter = iter;
+ }
+
+ hist->hvalue.counters[0]
+ = (unsigned long long)afdo_string_table->get_name (max_iter->first);
+ hist->hvalue.counters[1] = max_iter->second;
+ hist->hvalue.counters[2] = total;
+
+ if (!transform)
+ return;
+
+ if (gimple_ic_transform (gsi))
+ {
+ struct cgraph_edge *indirect_edge
+ = cgraph_node::get (current_function_decl)->get_edge (stmt);
+ struct cgraph_node *direct_call
+ = find_func_by_profile_id ((int)hist->hvalue.counters[0]);
+ if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
+ return;
+ struct cgraph_edge *new_edge
+ = indirect_edge->make_speculative (direct_call, 0, 0);
+ new_edge->redirect_call_stmt_to_callee ();
+ gimple_remove_histogram_value (cfun, stmt, hist);
+ inline_call (new_edge, true, NULL, NULL, false);
+ return;
+ }
+ return;
+}
+
+/* From AutoFDO profiles, find values inside STMT for that we want to measure
+ histograms and adds them to list VALUES. */
+
+static void
+afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
+ bool transform)
+{
+ afdo_indirect_call (gsi, map, transform);
+}
+
+typedef std::set<basic_block> bb_set;
+typedef std::set<edge> edge_set;
+
+static bool
+is_bb_annotated (const basic_block bb, const bb_set &annotated)
+{
+ return annotated.find (bb) != annotated.end ();
+}
+
+static void
+set_bb_annotated (basic_block bb, bb_set *annotated)
+{
+ annotated->insert (bb);
+}
+
+static bool
+is_edge_annotated (const edge e, const edge_set &annotated)
+{
+ return annotated.find (e) != annotated.end ();
+}
+
+static void
+set_edge_annotated (edge e, edge_set *annotated)
+{
+ annotated->insert (e);
+}
+
+/* For a given BB, set its execution count. Attach value profile if a stmt
+ is not in PROMOTED, because we only want to promot an indirect call once.
+ Return TRUE if BB is annotated. */
+
+static bool
+afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
+{
+ gimple_stmt_iterator gsi;
+ edge e;
+ edge_iterator ei;
+ gcov_type max_count = 0;
+ bool has_annotated = false;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ count_info info;
+ gimple stmt = gsi_stmt (gsi);
+ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
+ continue;
+ if (afdo_source_profile->get_count_info (stmt, &info))
+ {
+ if (info.count > max_count)
+ max_count = info.count;
+ has_annotated = true;
+ if (info.targets.size () > 0
+ && promoted.find (stmt) == promoted.end ())
+ afdo_vpt (&gsi, info.targets, false);
+ }
+ }
+
+ if (!has_annotated)
+ return false;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple phi = gsi_stmt (gsi);
+ size_t i;
+ for (i = 0; i < gimple_phi_num_args (phi); i++)
+ afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
+ }
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ afdo_source_profile->mark_annotated (e->goto_locus);
+
+ bb->count = max_count;
+ return true;
+}
+
+/* BB1 and BB2 are in an equivalent class iff:
+ 1. BB1 dominates BB2.
+ 2. BB2 post-dominates BB1.
+ 3. BB1 and BB2 are in the same loop nest.
+ This function finds the equivalent class for each basic block, and
+ stores a pointer to the first BB in its equivalent class. Meanwhile,
+ set bb counts for the same equivalent class to be idenical. Update
+ ANNOTATED_BB for the first BB in its equivalent class. */
+
+static void
+afdo_find_equiv_class (bb_set *annotated_bb)
+{
+ basic_block bb;
+
+ FOR_ALL_BB_FN (bb, cfun)
+ bb->aux = NULL;
+
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ vec<basic_block> dom_bbs;
+ basic_block bb1;
+ int i;
+
+ if (bb->aux != NULL)
+ continue;
+ bb->aux = bb;
+ dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
+ FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
+ if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
+ && bb1->loop_father == bb->loop_father)
+ {
+ bb1->aux = bb;
+ if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
+ {
+ bb->count = MAX (bb->count, bb1->count);
+ set_bb_annotated (bb, annotated_bb);
+ }
+ }
+ dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
+ FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
+ if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
+ && bb1->loop_father == bb->loop_father)
+ {
+ bb1->aux = bb;
+ if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
+ {
+ bb->count = MAX (bb->count, bb1->count);
+ set_bb_annotated (bb, annotated_bb);
+ }
+ }
+ }
+}
+
+/* If a basic block's count is known, and only one of its in/out edges' count
+ is unknown, its count can be calculated. Meanwhile, if all of the in/out
+ edges' counts are known, then the basic block's unknown count can also be
+ calculated.
+ IS_SUCC is true if out edges of a basic blocks are examined.
+ Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
+ Return TRUE if any basic block/edge count is changed. */
+
+static bool
+afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
+ edge_set *annotated_edge)
+{
+ basic_block bb;
+ bool changed = false;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ edge e, unknown_edge = NULL;
+ edge_iterator ei;
+ int num_unknown_edge = 0;
+ gcov_type total_known_count = 0;
+
+ FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
+ if (!is_edge_annotated (e, *annotated_edge))
+ num_unknown_edge++, unknown_edge = e;
+ else
+ total_known_count += e->count;
+
+ if (num_unknown_edge == 0)
+ {
+ if (total_known_count > bb->count)
+ {
+ bb->count = total_known_count;
+ changed = true;
+ }
+ if (!is_bb_annotated (bb, *annotated_bb))
+ {
+ set_bb_annotated (bb, annotated_bb);
+ changed = true;
+ }
+ }
+ else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
+ {
+ if (bb->count >= total_known_count)
+ unknown_edge->count = bb->count - total_known_count;
+ else
+ unknown_edge->count = 0;
+ set_edge_annotated (unknown_edge, annotated_edge);
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+/* Special propagation for circuit expressions. Because GCC translates
+ control flow into data flow for circuit expressions. E.g.
+ BB1:
+ if (a && b)
+ BB2
+ else
+ BB3
+
+ will be translated into:
+
+ BB1:
+ if (a)
+ goto BB.t1
+ else
+ goto BB.t3
+ BB.t1:
+ if (b)
+ goto BB.t2
+ else
+ goto BB.t3
+ BB.t2:
+ goto BB.t3
+ BB.t3:
+ tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
+ if (tmp)
+ goto BB2
+ else
+ goto BB3
+
+ In this case, we need to propagate through PHI to determine the edge
+ count of BB1->BB.t1, BB.t1->BB.t2.
+ Update ANNOTATED_EDGE accordingly. */
+
+static void
+afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
+{
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ gimple phi_stmt;
+ tree cmp_rhs, cmp_lhs;
+ gimple cmp_stmt = last_stmt (bb);
+ edge e;
+ edge_iterator ei;
+
+ if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
+ continue;
+ cmp_rhs = gimple_cond_rhs (cmp_stmt);
+ cmp_lhs = gimple_cond_lhs (cmp_stmt);
+ if (!TREE_CONSTANT (cmp_rhs)
+ || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
+ continue;
+ if (TREE_CODE (cmp_lhs) != SSA_NAME)
+ continue;
+ if (!is_bb_annotated (bb, annotated_bb))
+ continue;
+ phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
+ while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN
+ && gimple_assign_single_p (phi_stmt)
+ && TREE_CODE (gimple_assign_rhs1 (phi_stmt)) == SSA_NAME)
+ phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt));
+ if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
+ continue;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ unsigned i, total = 0;
+ edge only_one;
+ bool check_value_one = (((integer_onep (cmp_rhs))
+ ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
+ ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
+ if (!is_edge_annotated (e, *annotated_edge))
+ continue;
+ for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
+ {
+ tree val = gimple_phi_arg_def (phi_stmt, i);
+ edge ep = gimple_phi_arg_edge (phi_stmt, i);
+
+ if (!TREE_CONSTANT (val)
+ || !(integer_zerop (val) || integer_onep (val)))
+ continue;
+ if (check_value_one ^ integer_onep (val))
+ continue;
+ total++;
+ only_one = ep;
+ if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
+ {
+ ep->probability = 0;
+ ep->count = 0;
+ set_edge_annotated (ep, annotated_edge);
+ }
+ }
+ if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
+ {
+ only_one->probability = e->probability;
+ only_one->count = e->count;
+ set_edge_annotated (only_one, annotated_edge);
+ }
+ }
+ }
+}
+
+/* Propagate the basic block count and edge count on the control flow
+ graph. We do the propagation iteratively until stablize. */
+
+static void
+afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
+{
+ basic_block bb;
+ bool changed = true;
+ int i = 0;
+
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ bb->count = ((basic_block)bb->aux)->count;
+ if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
+ set_bb_annotated (bb, annotated_bb);
+ }
+
+ while (changed && i++ < 10)
+ {
+ changed = false;
+
+ if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
+ changed = true;
+ if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
+ changed = true;
+ afdo_propagate_circuit (*annotated_bb, annotated_edge);
+ }
+}
+
+/* Propagate counts on control flow graph and calculate branch
+ probabilities. */
+
+static void
+afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
+{
+ basic_block bb;
+ bool has_sample = false;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ if (bb->count > 0)
+ has_sample = true;
+
+ if (!has_sample)
+ return;
+
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ calculate_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_init (0);
+
+ afdo_find_equiv_class (annotated_bb);
+ afdo_propagate (annotated_bb, annotated_edge);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ edge e;
+ edge_iterator ei;
+ int num_unknown_succ = 0;
+ gcov_type total_count = 0;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (!is_edge_annotated (e, *annotated_edge))
+ num_unknown_succ++;
+ else
+ total_count += e->count;
+ }
+ if (num_unknown_succ == 0 && total_count > 0)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
+ }
+ }
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
+ bb->aux = NULL;
+ }
+
+ loop_optimizer_finalize ();
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+}
+
+/* Perform value profile transformation using AutoFDO profile. Add the
+ promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
+ indirect call promoted. */
+
+static bool
+afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
+{
+ basic_block bb;
+ if (afdo_source_profile->get_function_instance_by_decl (
+ current_function_decl) == NULL)
+ return false;
+
+ compute_inline_parameters (cgraph_node::get (current_function_decl), true);
+
+ bool has_vpt = false;
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (!has_indirect_call (bb))
+ continue;
+ gimple_stmt_iterator gsi;
+
+ gcov_type bb_count = 0;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ count_info info;
+ gimple stmt = gsi_stmt (gsi);
+ if (afdo_source_profile->get_count_info (stmt, &info))
+ bb_count = MAX (bb_count, info.count);
+ }
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ /* IC_promotion and early_inline_2 is done in multiple iterations.
+ No need to promoted the stmt if its in promoted_stmts (means
+ it is already been promoted in the previous iterations). */
+ if (gimple_code (stmt) != GIMPLE_CALL || gimple_call_fn (stmt) == NULL
+ || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
+ || promoted_stmts->find (stmt) != promoted_stmts->end ())
+ continue;
+
+ count_info info;
+ afdo_source_profile->get_count_info (stmt, &info);
+ info.count = bb_count;
+ if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
+ {
+ /* Promote the indirect call and update the promoted_stmts. */
+ promoted_stmts->insert (stmt);
+ afdo_vpt (&gsi, info.targets, true);
+ has_vpt = true;
+ }
+ }
+ }
+ if (has_vpt)
+ {
+ optimize_inline_calls (current_function_decl);
+ return true;
+ }
+ else
+ return false;
+}
+
+/* Annotate auto profile to the control flow graph. Do not annotate value
+ profile for stmts in PROMOTED_STMTS. */
+
+static void
+afdo_annotate_cfg (const stmt_set &promoted_stmts)
+{
+ basic_block bb;
+ bb_set annotated_bb;
+ edge_set annotated_edge;
+ const function_instance *s
+ = afdo_source_profile->get_function_instance_by_decl (
+ current_function_decl);
+
+ if (s == NULL)
+ return;
+ cgraph_node::get (current_function_decl)->count = s->head_count ();
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
+ gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ edge e;
+ edge_iterator ei;
+
+ bb->count = 0;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->count = 0;
+
+ if (afdo_set_bb_count (bb, promoted_stmts))
+ set_bb_annotated (bb, &annotated_bb);
+ if (bb->count > max_count)
+ max_count = bb->count;
+ }
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
+ > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
+ {
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
+ = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
+ }
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
+ > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
+ {
+ EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
+ = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
+ }
+ afdo_source_profile->mark_annotated (
+ DECL_SOURCE_LOCATION (current_function_decl));
+ afdo_source_profile->mark_annotated (cfun->function_start_locus);
+ afdo_source_profile->mark_annotated (cfun->function_end_locus);
+ if (max_count > 0)
+ {
+ afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
+ counts_to_freqs ();
+ profile_status_for_fn (cfun) = PROFILE_READ;
+ }
+ if (flag_value_profile_transformations)
+ gimple_value_profile_transformations ();
+}
+
+/* Wrapper function to invoke early inliner. */
+
+static void
+early_inline ()
+{
+ compute_inline_parameters (cgraph_node::get (current_function_decl), true);
+ unsigned todo = early_inliner (cfun);
+ if (todo & TODO_update_ssa_any)
+ update_ssa (TODO_update_ssa);
+}
+
+/* Use AutoFDO profile to annoate the control flow graph.
+ Return the todo flag. */
+
+static unsigned int
+auto_profile (void)
+{
+ struct cgraph_node *node;
+
+ if (symtab->state == FINISHED)
+ return 0;
+
+ init_node_map (true);
+ profile_info = autofdo::afdo_profile_info;
+
+ FOR_EACH_FUNCTION (node)
+ {
+ if (!gimple_has_body_p (node->decl))
+ continue;
+
+ /* Don't profile functions produced for builtin stuff. */
+ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
+ continue;
+
+ push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+
+ /* First do indirect call promotion and early inline to make the
+ IR match the profiled binary before actual annotation.
+
+ This is needed because an indirect call might have been promoted
+ and inlined in the profiled binary. If we do not promote and
+ inline these indirect calls before annotation, the profile for
+ these promoted functions will be lost.
+
+ e.g. foo() --indirect_call--> bar()
+ In profiled binary, the callsite is promoted and inlined, making
+ the profile look like:
+
+ foo: {
+ loc_foo_1: count_1
+ bar@loc_foo_2: {
+ loc_bar_1: count_2
+ loc_bar_2: count_3
+ }
+ }
+
+ Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
+ If we perform annotation on it, the profile inside bar@loc_foo2
+ will be wasted.
+
+ To avoid this, we promote loc_foo_2 and inline the promoted bar
+ function before annotation, so the profile inside bar@loc_foo2
+ will be useful. */
+ autofdo::stmt_set promoted_stmts;
+ for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
+ {
+ if (!flag_value_profile_transformations
+ || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
+ break;
+ early_inline ();
+ }
+
+ early_inline ();
+ autofdo::afdo_annotate_cfg (promoted_stmts);
+ compute_function_frequency ();
+ update_ssa (TODO_update_ssa);
+
+ /* Local pure-const may imply need to fixup the cfg. */
+ if (execute_fixup_cfg () & TODO_cleanup_cfg)
+ cleanup_tree_cfg ();
+
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+ cgraph_edge::rebuild_edges ();
+ pop_cfun ();
+ }
+
+ return TODO_rebuild_cgraph_edges;
+}
+} /* namespace autofdo. */
+
+/* Read the profile from the profile data file. */
+
+void
+read_autofdo_file (void)
+{
+ if (auto_profile_file == NULL)
+ auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
+
+ autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
+ 1, sizeof (struct gcov_ctr_summary));
+ autofdo::afdo_profile_info->runs = 1;
+ autofdo::afdo_profile_info->sum_max = 0;
+ autofdo::afdo_profile_info->sum_all = 0;
+
+ /* Read the profile from the profile file. */
+ autofdo::read_profile ();
+}
+
+/* Free the resources. */
+
+void
+end_auto_profile (void)
+{
+ delete autofdo::afdo_source_profile;
+ delete autofdo::afdo_string_table;
+ profile_info = NULL;
+}
+
+/* Returns TRUE if EDGE is hot enough to be inlined early. */
+
+bool
+afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
+{
+ gcov_type count
+ = autofdo::afdo_source_profile->get_callsite_total_count (edge);
+ if (count > 0)
+ {
+ bool is_hot;
+ const struct gcov_ctr_summary *saved_profile_info = profile_info;
+ /* At earling inline stage, profile_info is not set yet. We need to
+ temporarily set it to afdo_profile_info to calculate hotness. */
+ profile_info = autofdo::afdo_profile_info;
+ is_hot = maybe_hot_count_p (NULL, count);
+ profile_info = saved_profile_info;
+ return is_hot;
+ }
+ else
+ return false;
+}
+
+namespace
+{
+
+const pass_data pass_data_ipa_auto_profile = {
+ SIMPLE_IPA_PASS, "afdo", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_IPA_AUTOFDO, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_ipa_auto_profile : public simple_ipa_opt_pass
+{
+public:
+ pass_ipa_auto_profile (gcc::context *ctxt)
+ : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
+ {
+ }
+
+ /* opt_pass methods: */
+ virtual bool
+ gate (function *)
+ {
+ return flag_auto_profile;
+ }
+ virtual unsigned int
+ execute (function *)
+ {
+ return autofdo::auto_profile ();
+ }
+}; // class pass_ipa_auto_profile
+
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_ipa_auto_profile (gcc::context *ctxt)
+{
+ return new pass_ipa_auto_profile (ctxt);
+}
===================================================================
@@ -0,0 +1,31 @@
+/* auto-profile.h - Defines data exported from auto-profile.c
+ Copyright (C) 2014. Free Software Foundation, Inc.
+ Contributed by Dehao Chen (dehao@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef AUTO_PROFILE_H
+#define AUTO_PROFILE_H
+
+/* Read, process, finalize AutoFDO data structures. */
+extern void read_autofdo_file (void);
+extern void end_auto_profile (void);
+
+/* Returns TRUE if EDGE is hot enough to be inlined early. */
+extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
+
+#endif /* AUTO_PROFILE_H */
===================================================================
@@ -724,6 +724,7 @@ extern struct edge_list *pre_edge_rev_lcm (int, sb
extern void compute_available (sbitmap *, sbitmap *, sbitmap *, sbitmap *);
/* In predict.c */
+extern bool maybe_hot_count_p (struct function *, gcov_type);
extern bool maybe_hot_bb_p (struct function *, const_basic_block);
extern bool maybe_hot_edge_p (edge);
extern bool probably_never_executed_bb_p (struct function *, const_basic_block);
===================================================================
@@ -244,6 +244,9 @@ typedef uint64_t gcov_type_unsigned;
#define GCOV_TAG_PROGRAM_SUMMARY ((gcov_unsigned_t)0xa3000000)
#define GCOV_TAG_SUMMARY_LENGTH(NUM) \
(1 + GCOV_COUNTERS_SUMMABLE * (10 + 3 * 2) + (NUM) * 5)
+#define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000)
+#define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000)
+#define GCOV_TAG_AFDO_WORKING_SET ((gcov_unsigned_t)0xaf000000)
/* Counters that are collected. */
===================================================================
@@ -1279,6 +1279,55 @@ print_specific_help (unsigned int include_flags,
opts->x_help_columns, opts, lang_mask);
}
+static void
+enable_fdo_optimizations (struct gcc_options *opts,
+ struct gcc_options *opts_set,
+ int value)
+{
+ if (!opts_set->x_flag_branch_probabilities)
+ opts->x_flag_branch_probabilities = value;
+ if (!opts_set->x_flag_profile_values)
+ opts->x_flag_profile_values = value;
+ if (!opts_set->x_flag_unroll_loops)
+ opts->x_flag_unroll_loops = value;
+ if (!opts_set->x_flag_peel_loops)
+ opts->x_flag_peel_loops = value;
+ if (!opts_set->x_flag_tracer)
+ opts->x_flag_tracer = value;
+ if (!opts_set->x_flag_value_profile_transformations)
+ opts->x_flag_value_profile_transformations = value;
+ if (!opts_set->x_flag_inline_functions)
+ opts->x_flag_inline_functions = value;
+ if (!opts_set->x_flag_ipa_cp)
+ opts->x_flag_ipa_cp = value;
+ if (!opts_set->x_flag_ipa_cp_clone
+ && value && opts->x_flag_ipa_cp)
+ opts->x_flag_ipa_cp_clone = value;
+ if (!opts_set->x_flag_predictive_commoning)
+ opts->x_flag_predictive_commoning = value;
+ if (!opts_set->x_flag_unswitch_loops)
+ opts->x_flag_unswitch_loops = value;
+ if (!opts_set->x_flag_gcse_after_reload)
+ opts->x_flag_gcse_after_reload = value;
+ if (!opts_set->x_flag_tree_loop_vectorize
+ && !opts_set->x_flag_tree_vectorize)
+ opts->x_flag_tree_loop_vectorize = value;
+ if (!opts_set->x_flag_tree_slp_vectorize
+ && !opts_set->x_flag_tree_vectorize)
+ opts->x_flag_tree_slp_vectorize = value;
+ if (!opts_set->x_flag_vect_cost_model)
+ opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
+ if (!opts_set->x_flag_tree_loop_distribute_patterns)
+ opts->x_flag_tree_loop_distribute_patterns = value;
+ if (!opts_set->x_flag_profile_reorder_functions)
+ opts->x_flag_profile_reorder_functions = value;
+ /* Indirect call profiling should do all useful transformations
+ speculative devirtualization does. */
+ if (!opts_set->x_flag_devirtualize_speculatively
+ && opts->x_flag_value_profile_transformations)
+ opts->x_flag_devirtualize_speculatively = false;
+}
+
/* Handle target- and language-independent options. Return zero to
generate an "unknown option" message. Only options that need
extra handling need to be listed here; if you simply want
@@ -1746,50 +1795,23 @@ common_handle_option (struct gcc_options *opts,
value = true;
/* No break here - do -fprofile-use processing. */
case OPT_fprofile_use:
- if (!opts_set->x_flag_branch_probabilities)
- opts->x_flag_branch_probabilities = value;
- if (!opts_set->x_flag_profile_values)
- opts->x_flag_profile_values = value;
- if (!opts_set->x_flag_unroll_loops)
- opts->x_flag_unroll_loops = value;
- if (!opts_set->x_flag_peel_loops)
- opts->x_flag_peel_loops = value;
- if (!opts_set->x_flag_tracer)
- opts->x_flag_tracer = value;
- if (!opts_set->x_flag_value_profile_transformations)
- opts->x_flag_value_profile_transformations = value;
- if (!opts_set->x_flag_inline_functions)
- opts->x_flag_inline_functions = value;
- if (!opts_set->x_flag_ipa_cp)
- opts->x_flag_ipa_cp = value;
- if (!opts_set->x_flag_ipa_cp_clone
- && value && opts->x_flag_ipa_cp)
- opts->x_flag_ipa_cp_clone = value;
- if (!opts_set->x_flag_predictive_commoning)
- opts->x_flag_predictive_commoning = value;
- if (!opts_set->x_flag_unswitch_loops)
- opts->x_flag_unswitch_loops = value;
- if (!opts_set->x_flag_gcse_after_reload)
- opts->x_flag_gcse_after_reload = value;
- if (!opts_set->x_flag_tree_loop_vectorize
- && !opts_set->x_flag_tree_vectorize)
- opts->x_flag_tree_loop_vectorize = value;
- if (!opts_set->x_flag_tree_slp_vectorize
- && !opts_set->x_flag_tree_vectorize)
- opts->x_flag_tree_slp_vectorize = value;
- if (!opts_set->x_flag_vect_cost_model)
- opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
- if (!opts_set->x_flag_tree_loop_distribute_patterns)
- opts->x_flag_tree_loop_distribute_patterns = value;
- if (!opts_set->x_flag_profile_reorder_functions)
- opts->x_flag_profile_reorder_functions = value;
- /* Indirect call profiling should do all useful transformations
- speculative devirtualization does. */
- if (!opts_set->x_flag_devirtualize_speculatively
- && opts->x_flag_value_profile_transformations)
- opts->x_flag_devirtualize_speculatively = false;
+ enable_fdo_optimizations (opts, opts_set, value);
break;
+ case OPT_fauto_profile_:
+ opts->x_auto_profile_file = xstrdup (arg);
+ opts->x_flag_auto_profile = true;
+ value = true;
+ /* No break here - do -fauto-profile processing. */
+ case OPT_fauto_profile:
+ enable_fdo_optimizations (opts, opts_set, value);
+ if (!opts_set->x_flag_profile_correction)
+ opts->x_flag_profile_correction = value;
+ maybe_set_param_value (
+ PARAM_EARLY_INLINER_MAX_ITERATIONS, 10,
+ opts->x_param_values, opts_set->x_param_values);
+ break;
+
case OPT_fprofile_generate_:
opts->x_profile_data_prefix = xstrdup (arg);
value = true;
===================================================================
@@ -121,6 +121,7 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-inline.h"
#include "ipa-utils.h"
#include "sreal.h"
+#include "auto-profile.h"
#include "cilk.h"
#include "builtins.h"
@@ -449,6 +450,14 @@ want_early_inline_function_p (struct cgraph_edge *
if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
;
+ /* For AutoFDO, we need to make sure that before profile annotation, all
+ hot paths' IR look exactly the same as profiled binary. As a result,
+ in einliner, we will disregard size limit and inline those callsites
+ that are:
+ * inlined in the profiled binary, and
+ * the cloned callee has enough samples to be considered "hot". */
+ else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
+ ;
else if (!DECL_DECLARED_INLINE_P (callee->decl)
&& !flag_inline_small_functions)
{
@@ -2366,39 +2375,8 @@ early_inline_small_functions (struct cgraph_node *
return inlined;
}
-/* Do inlining of small functions. Doing so early helps profiling and other
- passes to be somewhat more effective and avoids some code duplication in
- later real inlining pass for testcases with very many function calls. */
-
-namespace {
-
-const pass_data pass_data_early_inline =
-{
- GIMPLE_PASS, /* type */
- "einline", /* name */
- OPTGROUP_INLINE, /* optinfo_flags */
- TV_EARLY_INLINING, /* tv_id */
- PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
-
-class pass_early_inline : public gimple_opt_pass
-{
-public:
- pass_early_inline (gcc::context *ctxt)
- : gimple_opt_pass (pass_data_early_inline, ctxt)
- {}
-
- /* opt_pass methods: */
- virtual unsigned int execute (function *);
-
-}; // class pass_early_inline
-
unsigned int
-pass_early_inline::execute (function *fun)
+early_inliner (function *fun)
{
struct cgraph_node *node = cgraph_node::get (current_function_decl);
struct cgraph_edge *edge;
@@ -2499,6 +2477,43 @@ unsigned int
return todo;
}
+/* Do inlining of small functions. Doing so early helps profiling and other
+ passes to be somewhat more effective and avoids some code duplication in
+ later real inlining pass for testcases with very many function calls. */
+
+namespace {
+
+const pass_data pass_data_early_inline =
+{
+ GIMPLE_PASS, /* type */
+ "einline", /* name */
+ OPTGROUP_INLINE, /* optinfo_flags */
+ TV_EARLY_INLINING, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_early_inline : public gimple_opt_pass
+{
+public:
+ pass_early_inline (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_early_inline, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual unsigned int execute (function *);
+
+}; // class pass_early_inline
+
+unsigned int
+pass_early_inline::execute (function *fun)
+{
+ return early_inliner (fun);
+}
+
} // anon namespace
gimple_opt_pass *
===================================================================
@@ -238,6 +238,7 @@ void initialize_growth_caches (void);
void free_growth_caches (void);
void compute_inline_parameters (struct cgraph_node *, bool);
bool speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining);
+unsigned int early_inliner (function *fun);
/* In ipa-inline-transform.c */
bool inline_call (struct cgraph_edge *, bool, vec<cgraph_edge *> *, int *, bool,
===================================================================
@@ -161,7 +161,7 @@ set_hot_bb_threshold (gcov_type min)
/* Return TRUE if frequency FREQ is considered to be hot. */
-static inline bool
+bool
maybe_hot_count_p (struct function *fun, gcov_type count)
{
if (fun && profile_status_for_fn (fun) != PROFILE_READ)
@@ -2852,7 +2852,7 @@ counts_to_freqs (void)
/* Don't overwrite the estimated frequencies when the profile for
the function is missing. We may drop this function PROFILE_GUESSED
later in drop_profile (). */
- if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
+ if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
return 0;
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
@@ -3223,7 +3223,8 @@ rebuild_frequencies (void)
count_max = MAX (bb->count, count_max);
if (profile_status_for_fn (cfun) == PROFILE_GUESSED
- || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
+ || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
+ && count_max < REG_BR_PROB_BASE/10))
{
loop_optimizer_init (0);
add_noreturn_fake_exit_edges ();
===================================================================
@@ -699,8 +699,10 @@ class pass_ipa_tree_profile : public simple_ipa_op
bool
pass_ipa_tree_profile::gate (function *)
{
- /* When profile instrumentation, use or test coverage shall be performed. */
- return (!in_lto_p
+ /* When profile instrumentation, use or test coverage shall be performed.
+ But for AutoFDO, this there is no instrumentation, thus this pass is
+ diabled. */
+ return (!in_lto_p && !flag_auto_profile
&& (flag_branch_probabilities || flag_test_coverage
|| profile_arc_flag));
}
===================================================================
@@ -106,6 +106,14 @@ static int total_num_times_called;
static int total_hist_br_prob[20];
static int total_num_branches;
+/* Helper function to update gcov_working_sets. */
+
+void add_working_set (gcov_working_set_t *set) {
+ int i = 0;
+ for (; i < NUM_GCOV_WORKING_SETS; i++)
+ gcov_working_sets[i] = set[i];
+}
+
/* Forward declarations. */
static void find_spanning_tree (struct edge_list *);
===================================================================
@@ -1153,6 +1153,7 @@ OBJS = \
alias.o \
alloc-pool.o \
auto-inc-dec.o \
+ auto-profile.o \
bb-reorder.o \
bitmap.o \
bt-load.o \
===================================================================
@@ -47,6 +47,7 @@ extern void init_node_map (bool);
extern void del_node_map (void);
extern void get_working_sets (void);
+extern void add_working_set (gcov_working_set_t *);
/* In predict.c. */
extern gcov_type get_hot_bb_threshold (void);
===================================================================
@@ -169,6 +169,7 @@ extern const struct gcc_debug_hooks dbx_debug_hook
extern const struct gcc_debug_hooks sdb_debug_hooks;
extern const struct gcc_debug_hooks xcoff_debug_hooks;
extern const struct gcc_debug_hooks dwarf2_debug_hooks;
+extern const struct gcc_debug_hooks auto_profile_debug_hooks;
extern const struct gcc_debug_hooks vmsdbg_debug_hooks;
/* Dwarf2 frame information. */
===================================================================
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_rebuild_cgraph_edges);
NEXT_PASS (pass_inline_parameters);
POP_INSERT_PASSES ()
+ NEXT_PASS (pass_ipa_auto_profile);
NEXT_PASS (pass_ipa_free_inline_summary);
NEXT_PASS (pass_ipa_tree_profile);
PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)