===================================================================
@@ -144,14 +144,18 @@ const int NUM_FUNCTIONS = 100;
/* Reads off the next string from the char stream CONTENTS and updates
READ_LENGTH to the length of the string read. The value of CONTENTS
- is updated to start at the next string. */
+ is updated to start at the next string. UPDATE_CONTENTS tells if
+ CONTENTS must be moved past the read string to the next string. To
+ peek at the string, UPDATE_CONTENTS can be set to false. */
static char *
-get_next_string (char **contents, unsigned int *read_length)
+get_next_string (char **contents, unsigned int *read_length,
+ int update_contents)
{
char *s = *contents;
*read_length = strlen (*contents) + 1;
- *contents += *read_length;
+ if (update_contents)
+ *contents += *read_length;
return s;
}
@@ -192,7 +196,7 @@ remove_edge_from_list (Edge * curr_edge)
/* Adds the WEIGHT value to the edge count of CALLER and CALLEE. */
static void
-update_edge (Node *n1, Node *n2, unsigned int weight)
+update_edge (Node *n1, Node *n2, unsigned long long weight)
{
void **slot;
Raw_edge re, *r;
@@ -227,6 +231,9 @@ static void
e = *slot;
e->weight += weight;
}
+ /* Update the computed_weight, the computed node weight, of n2 which is the
+ sum of weights of all incoming edges to n2. */
+ n2->computed_weight += weight;
}
/* Create a unique node for a function. */
@@ -288,10 +295,14 @@ void dump_edges (FILE *fp)
it != NULL;
it = it->next)
{
- fprintf (fp,"# %s ---- (%u)---- %s\n",
+ fprintf (fp,"# %s (%llu, %llu) ---- (%llu)---- %s (%llu, %llu)\n",
it->first_function->name,
+ it->first_function->weight,
+ it->first_function->computed_weight,
it->weight,
- it->second_function->name);
+ it->second_function->name,
+ it->second_function->weight,
+ it->second_function->computed_weight);
}
}
@@ -320,6 +331,8 @@ canonicalize_function_name (void *file_handle, cha
call graph edges with appropriate weights. The section contents have the
following format :
Function <caller_name>
+ Weight <entry_count> <max_count> (optional line)
+ ColdWeight <max_count> (optional line)
<callee_1>
<edge count between caller and callee_1>
<callee_2>
@@ -332,30 +345,85 @@ parse_callgraph_section_contents (void *file_handl
{
char *contents;
char *caller;
+ char *node_weight_s = NULL;
unsigned int read_length = 0, curr_length = 0;
Node *caller_node;
/* HEADER_LEN is the length of string 'Function '. */
const int HEADER_LEN = 9;
- /* First string in contents is 'Function <function-name>'. */
+ /* Prefix of line containing node weights. */
+ const char *NODE_WEIGHT_PREFIX = "Weight ";
+ /* Prefix of line containing max bb count of cold split part. */
+ const char *SPLIT_FUNCTION_PREFIX = "ColdWeight ";
+
+ /* First string in contents is 'Function <function-name>'. */
assert (length > 0);
contents = (char*) (section_contents);
- caller = get_next_string (&contents, &read_length);
+ caller = get_next_string (&contents, &read_length, 1);
assert (read_length > HEADER_LEN);
caller = canonicalize_function_name (file_handle, caller + HEADER_LEN);
curr_length = read_length;
caller_node = get_function_node (caller);
+ /* Check if next string is a node weight, it has the format
+ "Weight <entr_count> <max_count>". We could have callgraph
+ sections with or without node weights. */
+
+ /* Peek at the next string. */
+ if (curr_length < length)
+ node_weight_s = get_next_string (&contents, &read_length, 0);
+ if (node_weight_s != NULL
+ && is_prefix_of (NODE_WEIGHT_PREFIX, node_weight_s))
+ {
+ char *max_count_s;
+ unsigned long long max_count;
+ unsigned long long node_weight
+ = atoll (node_weight_s + strlen (NODE_WEIGHT_PREFIX));
+ /* Functions like comdats only have one caller_node and can
+ have multiple node weights from multiple modules. */
+ caller_node->weight += node_weight;
+
+ /* Find the space and get the max_count. */
+ max_count_s = strstr (node_weight_s + strlen (NODE_WEIGHT_PREFIX), " ");
+ if (max_count_s != NULL)
+ {
+ max_count = atoll (max_count_s + 1);
+ /* Functions like comdats only have one caller_node and can
+ have multiple node weights from multiple modules. */
+ caller_node->max_count += max_count;
+ }
+ /* Actually read the node weight here. */
+ get_next_string (&contents, &read_length, 1);
+ curr_length += read_length;
+ }
+
+ /* If the function is split it could have the weight of the split cold
+ section here as "SplitWeight <max_count>". */
+
+ /* Peek at the next string. */
+ if (curr_length < length)
+ node_weight_s = get_next_string (&contents, &read_length, 0);
+ if (node_weight_s != NULL
+ && is_prefix_of (SPLIT_FUNCTION_PREFIX, node_weight_s))
+ {
+ unsigned long long split_weight
+ = atoll (node_weight_s + strlen (SPLIT_FUNCTION_PREFIX));
+ caller_node->split_weight = split_weight;
+ /* Actually read the node weight here. */
+ get_next_string (&contents, &read_length, 1);
+ curr_length += read_length;
+ }
+
while (curr_length < length)
{
/* Read callee, weight tuples. */
char *callee;
char *weight_str;
- unsigned int weight;
+ unsigned long long weight;
Node *callee_node;
- callee = get_next_string (&contents, &read_length);
+ callee = get_next_string (&contents, &read_length, 1);
curr_length += read_length;
/* We can have multiple header lines; such a situation arises when
@@ -369,8 +437,8 @@ parse_callgraph_section_contents (void *file_handl
callee_node = get_function_node (callee);
assert (curr_length < length);
- weight_str = get_next_string (&contents, &read_length);
- weight = atoi (weight_str);
+ weight_str = get_next_string (&contents, &read_length, 1);
+ weight = atoll (weight_str);
curr_length += read_length;
update_edge (caller_node, callee_node, weight);
}
@@ -473,16 +541,56 @@ static void set_node_type (Node *n)
slot = htab_find_with_hash (section_map, name, htab_hash_string (name));
if (slot != NULL)
{
+ /* Update the section instance corresponding to the node instance.
+ Assign the weights from the node instance to the section instance. */
+ Section_id *s = (Section_id *)(slot);
+ Section_id *s_comdat;
+ assert (s->weight == 0 && s->computed_weight == 0 && s->max_count == 0);
+ s->weight = n->weight;
+ s->computed_weight = n->computed_weight;
+ s->max_count = n->max_count;
+
+ /* If s is comdat, update all the comdat candidates for weight. */
+ s_comdat = s->comdat_group;
+ while (s_comdat != NULL)
+ {
+ s_comdat->weight = s->weight;
+ s_comdat->computed_weight = s->computed_weight;
+ s_comdat->max_count = s->max_count;
+ s_comdat = s_comdat->comdat_group;
+ }
set_as_real_node (n);
num_real_nodes++;
}
}
+/* Return true if WEIGHT is more than the cutoff, specified either as
+ as percent, CUTOFF_P, of MAX or as an absolute value, CUTOFF_A. */
+int
+edge_over_cutoff (unsigned long long weight, unsigned long long max,
+ unsigned int cutoff_p, unsigned long long cutoff_a)
+{
+ /* First check if weight if more than cutoff_p% of max. */
+ if (((double)(max) * (cutoff_p/100.0)) >= (double) weight)
+ return 0;
+ if (cutoff_a >= weight)
+ return 0;
+ return 1;
+}
+
+/* Edge cutoff is used to discard callgraph edges that are not above a
+ certain threshold. cutoff_p is to express this as a percent of the
+ maximum value and cutoff_a is used to express this as an absolute
+ value. */
+extern unsigned int edge_cutoff_p;
+extern unsigned long long edge_cutoff_a;
+
void
find_pettis_hansen_function_layout (FILE *fp)
{
Node *n_it;
Edge *it;
+ unsigned int max_edge_value = 0;
assert (node_chain != NULL);
assert (active_edges != NULL);
@@ -500,8 +608,18 @@ find_pettis_hansen_function_layout (FILE *fp)
set_edge_type (it);
it = find_max_edge ();
+ if (it != NULL)
+ max_edge_value = it->weight;
while (it != NULL)
{
+ if (!edge_over_cutoff (it->weight, max_edge_value, edge_cutoff_p,
+ edge_cutoff_a))
+ {
+ if (fp !=NULL)
+ fprintf (fp, "Not considering edge with weight %llu and below\n",
+ it->weight);
+ break;
+ }
collapse_edge (it);
it = find_max_edge ();
}
@@ -520,12 +638,17 @@ const char *section_types[] = {".text.hot.",
".text." };
/* For sections that are not in the callgraph, the priority gives the
- order in which the sections should be laid out. Sections are grouped
- according to priority, higher priority (lower number), and then laid
- out in priority order. */
-const int section_priority[] = {0, 3, 4, 2, 1};
-const int UNLIKELY_SECTION_INDEX = 2;
+ importance of each section type. Sections are grouped according to
+ priority, higher priority (lower number). */
+const int section_priority[] = {0, 3, 4, 1, 2};
+/* Order in which the sections must be laid out is given by
+ section_position[section_type]. The order in which the section
+ types are laid out from address low to high are: .text.unlikely,
+ .text.cold, .text.startup, .text., .text.hot followed by the
+ sections grouped by the callgraph. */
+const int section_position[] = {4, 1, 0, 2, 3};
+
/* Maps the function name corresponding to section SECTION_NAME to the
object handle and the section index. */
@@ -628,9 +751,118 @@ write_out_node (Section_id *s, Section_id **sectio
}
}
-int unlikely_segment_start = 0;
-int unlikely_segment_end = 0;
+/* Find the max of a, b and c. */
+static unsigned long long
+get_max (unsigned long long a, unsigned long long b, unsigned long long c)
+{
+ unsigned long long max = a;
+ if (b > max)
+ max = b;
+ if (c > max)
+ max = c;
+ return max;
+}
+/* This is true if the max count of any bb in a function should be used as
+ the node weight rather than the count of the entry bb. */
+extern int use_max_count;
+
+/* Comparison function for sorting two sections a and b by their
+ weight. */
+static
+int section_weight_compare (const void *a, const void *b)
+{
+ Section_id *s_a = *(Section_id **)a;
+ Section_id *s_b = *(Section_id **)b;
+ assert (use_max_count <= 1);
+ unsigned long long max_sa_weight = get_max (s_a->weight, s_a->computed_weight,
+ s_a->max_count * use_max_count);
+ unsigned long long max_sb_weight = get_max (s_b->weight, s_b->computed_weight,
+ s_b->max_count * use_max_count);
+
+ if (max_sa_weight < max_sb_weight)
+ return -1;
+ else if (max_sa_weight == max_sb_weight)
+ return 0;
+
+ return 1;
+}
+
+/* s is a pointer to a section and the group of sections is linked
+ via s->group. The output is the list of sections sorted by their
+ node weights (which is the maximum of their profile count, computed
+ weights or the max bb count if use_max_count is true). */
+static Section_id *
+sort_section_group (Section_id *s)
+{
+ Section_id **sort_array;
+ Section_id *s_tmp;
+ int num_elements = 0;
+ int i;
+
+ if (s == NULL)
+ return s;
+
+ s_tmp = s;
+ while (s_tmp != NULL)
+ {
+ num_elements++;
+ s_tmp = s_tmp->group;
+ }
+
+ if (num_elements == 1)
+ return s;
+
+ XNEWVEC_ALLOC (sort_array, Section_id *, num_elements);
+ s_tmp = s;
+ for (i = 0; i < num_elements; ++i)
+ {
+ sort_array[i] = s_tmp;
+ s_tmp = s_tmp->group;
+ }
+
+ for (i = 0; i < num_elements; ++i)
+ {
+ sort_array[i]->group = NULL;
+ }
+
+ qsort (sort_array, num_elements, sizeof (Section_id *),
+ section_weight_compare);
+
+ s_tmp = sort_array[0];
+ for (i = 1; i < num_elements; ++i)
+ {
+ s_tmp->group = sort_array[i];
+ s_tmp = s_tmp->group;
+ }
+ s_tmp->group = NULL;
+ return sort_array[0];
+}
+
+/* If sort_name_prefix is true then the sections not touched by the callgraph
+ are grouped according to their name prefix. When sort_name_prefix is zero,
+ all the sections are put together and sorted according to their node
+ weights. The default value of sort_name_prefix is 0. Even when sections
+ are grouped by their prefix, each group is sorted by the node weights. */
+extern int sort_name_prefix;
+static int section_position_index (int section_type)
+{
+ assert (section_type >= 0 && section_type < NUM_SECTION_TYPES);
+ if (!sort_name_prefix)
+ return 0;
+ else
+ return section_position[section_type];
+}
+
+/* Track where the unlikely sections start and end. This will be needed if
+ the unlikely sections need to be split into a separate segment. */
+int unlikely_segment_start = -1;
+int unlikely_segment_end = -1;
+
+/* This value is used to determine the profile threshold below which the
+ section is considered unlikely. The default is zero. */
+extern unsigned long long unlikely_segment_profile_cutoff;
+
/* Visit each node and print the chain of merged nodes to the file. Update
HANDLES and SHNDX to contain the ordered list of sections. */
@@ -642,12 +874,15 @@ get_layout (FILE *fp, void*** handles,
int i = 0;
int position;
void *slot;
+ int unlikely_section_index;
+ const int CALLGRAPH_POSITION = 5;
- /* Form NUM_SECTION_TYPES + 1 groups of sections. Index 0 corresponds
+ /* Form NUM_SECTION_TYPES + 1 groups of sections. Index 5 corresponds
to the list of sections that correspond to functions in the callgraph.
For other sections, they are grouped by section_type and stored in
- index: (section_type + 1). SECTION_START points to the first
- section in each section group and SECTION_END points to the last. */
+ index: section_position[section_type]).
+ SECTION_START points to the first section in each section group and
+ SECTION_END points to the last. */
Section_id *section_start[NUM_SECTION_TYPES + 1];
Section_id *section_end[NUM_SECTION_TYPES + 1];
Section_id *s_it;
@@ -675,7 +910,8 @@ get_layout (FILE *fp, void*** handles,
htab_hash_string (n_it->name));
assert (slot != NULL);
s = (Section_id *)slot;
- write_out_node (s, §ion_start[0], §ion_end[0]);
+ write_out_node (s, §ion_start[CALLGRAPH_POSITION],
+ §ion_end[CALLGRAPH_POSITION]);
if (fp)
fprintf (fp, "# Callgraph group : %s", n_it->name);
@@ -689,8 +925,8 @@ get_layout (FILE *fp, void*** handles,
htab_hash_string (node->name));
assert (slot != NULL);
s = (Section_id *)slot;
- write_out_node (s, §ion_start[0],
- §ion_end[0]);
+ write_out_node (s, §ion_start[CALLGRAPH_POSITION],
+ §ion_end[CALLGRAPH_POSITION]);
if (fp)
fprintf (fp, " %s", node->name);
}
@@ -707,30 +943,87 @@ get_layout (FILE *fp, void*** handles,
while (s_it)
{
if (!s_it->processed)
- write_out_node (s_it, §ion_start[s_it->section_type + 1],
- §ion_end[s_it->section_type + 1]);
+ {
+ int index = section_position_index(s_it->section_type);
+ write_out_node (s_it, §ion_start[index], §ion_end[index]);
+ }
s_it = s_it->next;
}
+ /* Determine the unlikely section index */
+ unlikely_section_index = -1;
+ for (i = 0; i < ARRAY_SIZE (section_types); ++i)
+ if (strcmp (".text.unlikely.", section_types[i]) == 0)
+ break;
+ assert (i < ARRAY_SIZE (section_types));
+ unlikely_section_index = section_position_index(i);
+
position = 0;
for (i = 0; i < NUM_SECTION_TYPES + 1; ++i)
{
s_it = section_start[i];
- if (i == UNLIKELY_SECTION_INDEX + 1)
- unlikely_segment_start = position;
- while (s_it)
+
+ if (s_it == NULL)
+ continue;
+
+ /* Sort all section groups by weight except the callgraph group. */
+ if (i < NUM_SECTION_TYPES)
+ s_it = sort_section_group (s_it);
+
+ /* Start the unlikely segment if necessary. */
+ assert (use_max_count <= 1);
+ if (i == unlikely_section_index
+ && (get_max (s_it->weight, s_it->computed_weight,
+ s_it->max_count * use_max_count)
+ <= unlikely_segment_profile_cutoff))
+ {
+ assert (unlikely_segment_start == -1);
+ unlikely_segment_start = position;
+ if (fp != NULL)
+ fprintf (fp, "=== Unlikely sections start ===\n");
+ }
+
+ do
{
assert (position < num_sections);
(*handles)[position] = s_it->handle;
(*shndx)[position] = s_it->shndx;
+
+ /* Check if this section will end the unlikely segment. */
+ if (i == unlikely_section_index
+ && unlikely_segment_start >= 0
+ && unlikely_segment_start != position
+ && unlikely_segment_end == -1
+ && (get_max (s_it->weight, s_it->computed_weight,
+ s_it->max_count * use_max_count)
+ > unlikely_segment_profile_cutoff))
+ {
+ unlikely_segment_end = position - 1;
+ if (fp != NULL)
+ fprintf (fp, "=== Unlikely sections end ===\n");
+ }
+
position++;
if (fp != NULL)
- fprintf (fp, "%s\n", s_it->full_name);
+ {
+ fprintf (fp, "%s entry count = %llu computed = %llu "
+ "max count = %llu\n", s_it->full_name, s_it->weight,
+ s_it->computed_weight, s_it->max_count);
+ }
s_it = s_it->group;
}
- if (i == UNLIKELY_SECTION_INDEX + 1)
- unlikely_segment_end = position;
+ while (s_it);
+
+ /* End the unlikely segment if it has not been done already. */
+ if (i == unlikely_section_index
+ && unlikely_segment_start != -1
+ && unlikely_segment_end == -1)
+ {
+ unlikely_segment_end = position - 1;
+ if (fp != NULL)
+ fprintf (fp, "=== Unlikely sections end ===\n");
+ }
}
return position;
}
===================================================================
@@ -61,6 +61,15 @@ typedef struct node_d
{
unsigned int id;
char *name;
+ /* Node weight, execution count of entry bb. */
+ unsigned long long weight;
+ /* Weight computed by adding weights of incoming edges to
+ this node. */
+ unsigned long long computed_weight;
+ /* Max count of any bb executed. */
+ unsigned long long max_count;
+ /* Stores the max count of any bb in the split cold section. */
+ unsigned long long split_weight;
/* Chain all the Nodes created. */
struct node_d *next;
/* Pointer to the next node in the chain of merged nodes. */
@@ -81,6 +90,10 @@ make_node (unsigned int id, char *name)
XNEW_ALLOC (node, Node);
node->id = id;
node->name = name;
+ node->weight = 0;
+ node->computed_weight = 0;
+ node->max_count = 0;
+ node->split_weight = 0;
node->is_real_node = 0;
node->next = NULL;
node->edge_list = NULL;
@@ -133,7 +146,7 @@ struct edge_d
{
Node *first_function;
Node *second_function;
- unsigned int weight;
+ unsigned long long weight;
Edge_type type;
/* 1 if the nodes corresponding to this edge have been merged. */
unsigned int is_merged;
@@ -143,7 +156,7 @@ struct edge_d
};
inline static Edge *
-make_edge (Node *first, Node *second, unsigned int weight)
+make_edge (Node *first, Node *second, unsigned long long weight)
{
Edge *edge;
XNEW_ALLOC (edge, Edge);
@@ -205,6 +218,12 @@ typedef struct section_id_
char *full_name;
void *handle;
int shndx;
+ /* Corresponds to node weight. */
+ unsigned long long weight;
+ /* Corresponds to node's computed weight. */
+ unsigned long long computed_weight;
+ /* Max count of bb executed in this function. */
+ unsigned long long max_count;
/* Type of prefix in section name. */
int section_type;
/* Pointer to the next section in the same comdat_group. */
@@ -213,6 +232,10 @@ typedef struct section_id_
struct section_id_ *next;
/* Used for grouping sections. */
struct section_id_ *group;
+ /* Pointer to the cold split section if any. If this function
+ is comdat hot and kept, pointer to the kept cold split
+ section. */
+ struct section_id_ *split_section;
/* Check if this section has been considered for output. */
char processed;
} Section_id;
@@ -233,6 +256,10 @@ make_section_id (char *name, char *full_name,
s->next = NULL;
s->group = NULL;
s->processed = 0;
+ s->weight = 0;
+ s->computed_weight = 0;
+ s->max_count = 0;
+ s->split_section = NULL;
return s;
}
===================================================================
@@ -97,6 +97,29 @@ static int no_op = 0;
"--plugin-opt,split_segment=yes". */
static int split_segment = 0;
+/* If SORT_NAME_PREFIX is true then the sections not touched by the callgraph
+ are grouped according to their name prefix. When SORT_NAME_PREFIX is zero,
+ all the sections are put together and sorted according to their node
+ weights. The default value of SORT_NAME_PREFIX is 0. Even when sections
+ are grouped by their prefix, each group is sorted by the node weights. */
+int sort_name_prefix = 0;
+
+/* Edge cutoff is used to discard callgraph edges that are not above a
+ certain threshold. cutoff_p is to express this as a percent of the
+ maximum value and cutoff_a is used to express this as an absolute
+ value. The default is to consider all edges. */
+unsigned int edge_cutoff_p = 0;
+unsigned long long edge_cutoff_a = 0;
+
+/* This is true if the max count of any bb in a function should be used as
+ the node weight rather than the count of the entry bb. */
+int use_max_count = 1;
+
+/* This is used to decide which sections are considered unlikely. If the
+ section profile is greater than this value then it is not unlikely
+ executed. */
+unsigned long long unlikely_segment_profile_cutoff = 0;
+
/* Copies new output file name out_file */
void get_filename (const char *name)
{
@@ -133,6 +156,10 @@ process_option (const char *name)
const char *option_group = "group=";
const char *option_file = "file=";
const char *option_segment = "split_segment=";
+ const char *option_edge_cutoff = "edge_cutoff=";
+ const char *option_sort_name_prefix = "sort_name_prefix=";
+ const char *option_max_count = "use_maxcount=";
+ const char *option_unlikely_cutoff = "unlikely_cutoff=";
/* Check if option is "group=" */
if (strncmp (name, option_group, strlen (option_group)) == 0)
@@ -164,6 +191,65 @@ process_option (const char *name)
return 0;
}
}
+ else if (strncmp (name, option_edge_cutoff,
+ strlen (option_edge_cutoff)) == 0)
+ {
+ const char *a_or_p = name + strlen (option_edge_cutoff);
+ if (a_or_p[0] == 'p')
+ {
+ edge_cutoff_p = atoi (a_or_p + 1);
+ return 0;
+ }
+ else if (a_or_p[0] == 'a')
+ {
+ edge_cutoff_a = atoll (a_or_p + 1);
+ return 0;
+ }
+ else
+ {
+ fprintf (stderr, "Wrong format for edge_cutoff, "
+ "use edge_cutoff=[p|a]<value>\n");
+ }
+ }
+ else if (strncmp (name, option_sort_name_prefix,
+ strlen (option_sort_name_prefix)) == 0)
+ {
+ const char *option_val = name + strlen (option_sort_name_prefix);
+ if (strcmp (option_val, "no") == 0)
+ {
+ sort_name_prefix = 0;
+ return 0;
+ }
+ else if (strcmp (option_val, "yes") == 0)
+ {
+ sort_name_prefix = 1;
+ return 0;
+ }
+ }
+ else if (strncmp (name, option_max_count,
+ strlen (option_max_count)) == 0)
+ {
+ const char *option_val = name + strlen (option_max_count);
+ if (strcmp (option_val, "no") == 0)
+ {
+ use_max_count = 0;
+ return 0;
+ }
+ else if (strcmp (option_val, "yes") == 0)
+ {
+ use_max_count = 1;
+ return 0;
+ }
+ }
+ /* Check if option is unlikely_cutoff. This decides what sections are
+ considered unlikely for segment splitting. The default cutoff is 0. */
+ else if (strncmp (name, option_unlikely_cutoff,
+ strlen (option_unlikely_cutoff)) == 0)
+ {
+ const char *option_val = name + strlen (option_unlikely_cutoff);
+ unlikely_segment_profile_cutoff = atoll (option_val);
+ return 0;
+ }
/* Flag error on unknown plugin option. */
MSG_ERROR ("Unknown option to function reordering plugin :%s\n", name);
@@ -365,24 +451,27 @@ all_symbols_read_hook (void)
section_list[i].shndx = shndx[i];
}
- if (split_segment == 1)
+ if (split_segment == 1
+ && unlikely_segment_start >= 0
+ && (unlikely_segment_end >= unlikely_segment_start))
{
/* Pass the new order of functions to the linker. */
/* Fix the order of all sections upto the beginning of the
unlikely section. */
update_section_order (section_list, unlikely_segment_start);
- assert (num_entries >= unlikely_segment_end);
+ assert (num_entries > unlikely_segment_end);
/* Fix the order of all sections after the end of the unlikely
section. */
- update_section_order (section_list, num_entries - unlikely_segment_end);
+ update_section_order (section_list + unlikely_segment_end + 1,
+ num_entries - unlikely_segment_end - 1);
/* Map all unlikely code into a new segment. */
unique_segment_for_sections (
".text.unlikely_executed", 0, 0x1000,
section_list + unlikely_segment_start,
- unlikely_segment_end - unlikely_segment_start);
+ unlikely_segment_end - unlikely_segment_start + 1);
if (fp != NULL)
fprintf (fp, "Moving %u section(s) to new segment\n",
- unlikely_segment_end - unlikely_segment_start);
+ unlikely_segment_end - unlikely_segment_start + 1);
}
else
{
===================================================================
@@ -204,6 +204,9 @@ rtx current_insn_predicate;
/* True if printing into -fdump-final-insns= dump. */
bool final_insns_dump_p;
+/* True if the function has a split cold section. */
+static bool has_cold_section_p;
+
#ifdef HAVE_ATTR_length
static int asm_insn_count (rtx);
#endif
@@ -1934,6 +1937,7 @@ final_scan_insn (rtx insn, FILE *file, int optimiz
targetm.asm_out.function_switched_text_sections (asm_out_file,
current_function_decl,
in_cold_section_p);
+ has_cold_section_p = true;
break;
case NOTE_INSN_BASIC_BLOCK:
@@ -4330,6 +4334,24 @@ dump_cgraph_profiles (void)
}
}
+/* Iterate through the basic blocks in DECL and get the max count.
+ If COLD is true, find the max count of the cold part of the split. */
+static gcov_type
+get_max_count (tree decl, bool cold)
+{
+ basic_block bb;
+ gcov_type max_count = (cgraph_get_node (decl))->count;
+
+ FOR_EACH_BB (bb)
+ {
+ if (cold && BB_PARTITION (bb) != BB_COLD_PARTITION)
+ continue;
+ if (bb->count > max_count)
+ max_count = bb->count;
+ }
+ return max_count;
+}
+
/* Turn the RTL into assembly. */
static unsigned int
rest_of_handle_final (void)
@@ -4348,6 +4370,8 @@ rest_of_handle_final (void)
gcc_assert (GET_CODE (x) == SYMBOL_REF);
fnname = XSTR (x, 0);
+ has_cold_section_p = false;
+
assemble_start_function (current_function_decl, fnname);
final_start_function (get_insns (), asm_out_file, optimize);
final (get_insns (), asm_out_file, optimize);
@@ -4403,12 +4427,27 @@ rest_of_handle_final (void)
if ((flag_reorder_functions > 1)
&& flag_profile_use
&& cgraph_get_node (current_function_decl) != NULL
- && (cgraph_get_node (current_function_decl))->callees != NULL)
+ && ((cgraph_get_node (current_function_decl))->callees != NULL
+ || (cgraph_get_node (current_function_decl))->count > 0))
{
flags = SECTION_DEBUG | SECTION_EXCLUDE;
asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname);
switch_to_section (get_section (profile_fnname, flags, NULL));
fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname);
+ fprintf (asm_out_file, "\t.string \"Weight "
+ HOST_WIDEST_INT_PRINT_DEC
+ " "
+ HOST_WIDEST_INT_PRINT_DEC
+ "\"\n",
+ (cgraph_get_node (current_function_decl))->count,
+ get_max_count (current_function_decl, false));
+ /* If this function is split into a cold section, record that weight
+ here. */
+ if (has_cold_section_p)
+ fprintf (asm_out_file, "\t.string \"ColdWeight "
+ HOST_WIDEST_INT_PRINT_DEC
+ "\"\n",
+ get_max_count (current_function_decl, true));
dump_cgraph_profiles ();
free (profile_fnname);
}
===================================================================
@@ -1,41 +0,0 @@
-/* Verify if call-graph profile sections are created
- with -freorder-functions=. */
-/* { dg-require-section-exclude "" } */
-/* { dg-require-linker-function-reordering-plugin "" } */
-/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections --save-temps -Wl,-plugin-opt,file=callgraph-profiles.C.dump -Wl,-plugin-opt,split_segment=yes" } */
-
-int
-notcalled ()
-{
- return 0;
-}
-
-int __attribute__ ((noinline))
-foo ()
-{
- return 1;
-}
-
-int __attribute__ ((noinline))
-bar ()
-{
- return 0;
-}
-
-int main ()
-{
- int sum;
- for (int i = 0; i< 1000; i++)
- {
- sum = foo () + bar();
- }
- return sum * bar ();
-}
-
-/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */
-/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */
-/* { dg-final-use { scan-file callgraph-profiles.C.dump "Callgraph group : main _Z3barv _Z3foov\n" } } */
-/* { dg-final-use { scan-file callgraph-profiles.C.dump "\.text\.*\.main\n.text\.*\._Z3barv\n\.text\.*\._Z3foov\n\.text\.*\._Z9notcalledv" } } */
-/* { dg-final-use { scan-file callgraph-profiles.C.dump "Moving 1 section\\(s\\) to new segment" } } */
-/* { dg-final-use { cleanup-saved-temps } } */
-/* { dg-final-use { remove-build-file "callgraph-profiles.C.dump" } } */
===================================================================
@@ -0,0 +1,45 @@
+/* Verify if call-graph profile sections are created with -freorder-functions=.
+ Check of edge profiles and node profiles are present in the profile
+ sections. Check if the segment splitting API is invoked. */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections --save-temps -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,split_segment=yes" } */
+
+int
+notcalled ()
+{
+ return 0;
+}
+
+int __attribute__ ((noinline))
+foo ()
+{
+ return 1;
+}
+
+int __attribute__ ((noinline))
+bar ()
+{
+ return 0;
+}
+
+int main ()
+{
+ int sum;
+ for (int i = 0; i< 1000; i++)
+ {
+ sum = foo () + bar();
+ }
+ return sum * bar ();
+}
+
+/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */
+/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */
+/* { dg-final-use { scan-assembler "\.string \"Weight 1000 1000\"" } } */
+/* { dg-final-use { scan-assembler "\.string \"Weight 1001 1001\"" } } */
+/* { dg-final-use { scan-file linker.dump "Callgraph group : _Z3foov _Z3barv main\n" } } */
+/* { dg-final-use { scan-file linker.dump ".text\.*\._Z9notcalledv entry count = 0 computed = 0 max count = 0" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z3foov.*\n\.text\.*\._Z3barv.*\n\.text\.*\.main.*\n" } } */
+/* { dg-final-use { scan-file linker.dump "Moving . section\\(s\\) to new segment" } } */
+/* { dg-final-use { cleanup-saved-temps } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */
===================================================================
@@ -0,0 +1,25 @@
+/* Check if the edge_cutoffa option to the function reordering plugin works as
+ expected. */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=a1000" } */
+
+int __attribute__ ((noinline))
+foo ()
+{
+ return 1;
+}
+
+int main ()
+{
+ int sum = 0;
+ for (int i = 0; i< 1000; i++)
+ {
+ sum += foo ();
+ }
+ return sum - 1000;
+}
+
+/* { dg-final-use { scan-file linker.dump "Not considering edge with weight 1000 and below" } } */
+/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */
===================================================================
@@ -0,0 +1,25 @@
+/* Check if the edge_cutoffp option to the function reordering plugin works as
+ expected. */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100" } */
+
+int __attribute__ ((noinline))
+foo ()
+{
+ return 1;
+}
+
+int main ()
+{
+ int sum = 0;
+ for (int i = 0; i< 1000; i++)
+ {
+ sum += foo ();
+ }
+ return sum - 1000;
+}
+
+/* { dg-final-use { scan-file linker.dump "Not considering edge with weight 1000 and below" } } */
+/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */
===================================================================
@@ -0,0 +1,41 @@
+/* Check if cutting off callgraph gets all functions laid out only according to
+ function profiles and not prefixes. foo_200 is as hot as the other foo's but
+ has a unlikely section prefix. This should not matter as sort_name_prefix
+ is turned off. */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump,-plugin-opt,edge_cutoff=p100,-plugin-opt,sort_name_prefix=no" } */
+
+int __attribute__ ((noinline, section(".text.unlikely._Z7foo_200v")))
+foo_200 ()
+{
+ return 1;
+}
+
+int __attribute__ ((noinline))
+foo_100 ()
+{
+ return 1;
+}
+
+int __attribute__ ((noinline))
+foo_300 ()
+{
+ return 1;
+}
+int main ()
+{
+ int sum = 0;
+ for (int i = 0; i< 200; i++)
+ sum += foo_200 ();
+ for (int i = 0; i< 100; i++)
+ sum += foo_100 ();
+ for (int i = 0; i< 300; i++)
+ sum += foo_300 ();
+ return sum - 600;
+}
+
+/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */
+/* { dg-final-use { scan-file linker.dump ".text.unlikely._Z7foo_200v entry count = 200 computed = 200 max count = 200" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100v.*\n\.text\.unlikely\._Z7foo_200v.*\n\.text\.*\._Z7foo_300v.*\n" } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */
===================================================================
@@ -0,0 +1,41 @@
+/* Check if cutting off callgraph and using sort_name_prefix gets all functions laid out
+ according to prefixes. foo_200 is almost as hot as the other foo's but should
+ not be grouped with them as it has a different section prefix and sort_name_prefix is
+ turned on. */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump,-plugin-opt,edge_cutoff=p100,-plugin-opt,sort_name_prefix=yes" } */
+
+int __attribute__ ((noinline, section(".text.unlikely._Z7foo_200v")))
+foo_200 ()
+{
+ return 1;
+}
+
+int __attribute__ ((noinline))
+foo_100 ()
+{
+ return 1;
+}
+
+int __attribute__ ((noinline))
+foo_300 ()
+{
+ return 1;
+}
+int main ()
+{
+ int sum = 0;
+ for (int i = 0; i< 200; i++)
+ sum += foo_200 ();
+ for (int i = 0; i< 100; i++)
+ sum += foo_100 ();
+ for (int i = 0; i< 300; i++)
+ sum += foo_300 ();
+ return sum - 600;
+}
+
+/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */
+/* { dg-final-use { scan-file linker.dump ".text.unlikely._Z7foo_200v entry count = 200 computed = 200 max count = 200" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.unlikely\._Z7foo_200v.*\n\.text\.*\._Z7foo_100v.*\n\.text\.*\._Z7foo_300v.*\n" } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */
===================================================================
@@ -0,0 +1,53 @@
+/* Check if use_maxcount works as expected. This makes the node profile weight to
+ be equal to the maximum count of any basic block in a function rather than the
+ entry count. foo_100's maxcount > foo_200's max count */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,use_maxcount=yes" } */
+
+
+int __attribute__ ((noinline))
+bar (int *i)
+{
+ (*i)--;
+ if (*i >= 0)
+ return 1;
+ return 0;
+}
+
+int __attribute__ ((noinline))
+foo_100 (int count)
+{
+ int sum = 0;
+ while (count > 0)
+ {
+ sum += bar(&count);
+ }
+ return sum;
+}
+
+int __attribute__ ((noinline))
+foo_200 (int count)
+{
+ int sum = 0;
+ while (count > 0)
+ {
+ sum += bar(&count);
+ }
+ return sum;
+}
+
+int main ()
+{
+ int sum = 0;
+ for (int i = 0; i< 200; i++)
+ sum += foo_200 (100);
+ for (int i = 0; i< 100; i++)
+ sum += foo_100 (400);
+ return sum - 60000;
+}
+/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i entry count = 100 computed = 100 max count = 40000" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i entry count = 200 computed = 200 max count = 20000" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i.*\n\.text\.*\._Z7foo_100i.*\n\.text\.*\._Z3barPi.*\n" } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */
===================================================================
@@ -0,0 +1,55 @@
+/* Check if turning off use_maxcount works as expected. This makes the node
+ profile weight to be equal to the entry count of any basic block in a
+ function rather than the max count.
+ foo_100's maxcount > foo_200's max count but
+ foo_100's entry count < foo_200's entry count. */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,use_maxcount=no" } */
+
+
+int __attribute__ ((noinline))
+bar (int *i)
+{
+ (*i)--;
+ if (*i >= 0)
+ return 1;
+ return 0;
+}
+
+int __attribute__ ((noinline))
+foo_100 (int count)
+{
+ int sum = 0;
+ while (count > 0)
+ {
+ sum += bar(&count);
+ }
+ return sum;
+}
+
+int __attribute__ ((noinline))
+foo_200 (int count)
+{
+ int sum = 0;
+ while (count > 0)
+ {
+ sum += bar(&count);
+ }
+ return sum;
+}
+
+int main ()
+{
+ int sum = 0;
+ for (int i = 0; i< 200; i++)
+ sum += foo_200 (100);
+ for (int i = 0; i< 100; i++)
+ sum += foo_100 (400);
+ return sum - 60000;
+}
+/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i entry count = 100 computed = 100 max count = 40000" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i entry count = 200 computed = 200 max count = 20000" } } */
+/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i.*\n\.text\.*\._Z7foo_200i.*\n\.text\.*\._Z3barPi.*\n" } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */
===================================================================
@@ -0,0 +1,19 @@
+/* Check if unlikely_cutoff works as expected. Function foo is unlikely because of the cutoff. */
+/* { dg-require-section-exclude "" } */
+/* { dg-require-linker-function-reordering-plugin "" } */
+/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,unlikely_cutoff=1" } */
+
+int __attribute__ ((noinline,section(".text.hot._Z3foov")))
+foo ()
+{
+ return 0;
+}
+
+int main()
+{
+ return foo ();
+}
+
+/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */
+/* { dg-final-use { scan-file linker.dump "=== Unlikely sections start ===\n.*\.text\.hot\._Z3foov.* entry count = 1 computed = 1 max count = 1\n.*=== Unlikely sections end ===" } } */
+/* { dg-final-use { remove-build-file "linker.dump" } } */