[google,4_7] Function reordering plugin enhancements

Message ID	CAAs8HmwEc=0qD27g5FBzGAVmJAxeOiAz2huy8yBUjmf8UhvEGQ@mail.gmail.com
State	New
Headers	show Return-Path: <gcc-patches-return-340364-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:date:message-id:subject:from:to:content-type; q= dns; s=default; b=F1kb+0BOfXnSM0sFWoNQa7rGoEyYstLdZ5CzGkRqAzd9gC uwT/BF69LF2SSGYhRQyHNV8r15Du9HAgwA5BA/7hER/XlyOIcg3i0SeXivtLv/11 r9xBJCCIt4K4hhno/V5Or7IDlFRRSpQYKijVBB9kra1bfeFKJjKApYeiYzzEE= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org MIME-Version: 1.0 Date: Tue, 23 Apr 2013 13:51:04 -0700 Message-ID: <CAAs8HmwEc=0qD27g5FBzGAVmJAxeOiAz2huy8yBUjmf8UhvEGQ@mail.gmail.com> Subject: [google][4_7] Function reordering plugin enhancements From: Sriraman Tallam <tmsriram@google.com> To: GCC Patches <gcc-patches@gcc.gnu.org>, Teresa Johnson <tejohnson@google.com>, David Li <davidxl@google.com> Content-Type: multipart/mixed; boundary=047d7b3442e408a11304db0d5857

Index: function_reordering_plugin/callgraph.c =================================================================== --- function_reordering_plugin/callgraph.c (revision 198081) +++ function_reordering_plugin/callgraph.c (working copy) @@ -144,14 +144,18 @@ const int NUM_FUNCTIONS = 100; /* Reads off the next string from the char stream CONTENTS and updates READ_LENGTH to the length of the string read. The value of CONTENTS - is updated to start at the next string. */ + is updated to start at the next string. UPDATE_CONTENTS tells if + CONTENTS must be moved past the read string to the next string. To + peek at the string, UPDATE_CONTENTS can be set to false. */ static char * -get_next_string (char **contents, unsigned int *read_length) +get_next_string (char **contents, unsigned int *read_length, + int update_contents) { char *s = *contents; *read_length = strlen (*contents) + 1; - *contents += *read_length; + if (update_contents) + *contents += *read_length; return s; } @@ -192,7 +196,7 @@ remove_edge_from_list (Edge * curr_edge) /* Adds the WEIGHT value to the edge count of CALLER and CALLEE. */ static void -update_edge (Node *n1, Node *n2, unsigned int weight) +update_edge (Node *n1, Node *n2, unsigned long long weight) { void **slot; Raw_edge re, *r; @@ -227,6 +231,9 @@ static void e = *slot; e->weight += weight; } + /* Update the computed_weight, the computed node weight, of n2 which is the + sum of weights of all incoming edges to n2. */ + n2->computed_weight += weight; } /* Create a unique node for a function. */ @@ -288,10 +295,14 @@ void dump_edges (FILE *fp) it != NULL; it = it->next) { - fprintf (fp,"# %s ---- (%u)---- %s\n", + fprintf (fp,"# %s (%llu, %llu) ---- (%llu)---- %s (%llu, %llu)\n", it->first_function->name, + it->first_function->weight, + it->first_function->computed_weight, it->weight, - it->second_function->name); + it->second_function->name, + it->second_function->weight, + it->second_function->computed_weight); } } @@ -320,6 +331,8 @@ canonicalize_function_name (void *file_handle, cha call graph edges with appropriate weights. The section contents have the following format : Function <caller_name> + Weight <entry_count> <max_count> (optional line) + ColdWeight <max_count> (optional line) <callee_1> <edge count between caller and callee_1> <callee_2> @@ -332,30 +345,85 @@ parse_callgraph_section_contents (void *file_handl { char *contents; char *caller; + char *node_weight_s = NULL; unsigned int read_length = 0, curr_length = 0; Node *caller_node; /* HEADER_LEN is the length of string 'Function '. */ const int HEADER_LEN = 9; - /* First string in contents is 'Function <function-name>'. */ + /* Prefix of line containing node weights. */ + const char *NODE_WEIGHT_PREFIX = "Weight "; + /* Prefix of line containing max bb count of cold split part. */ + const char *SPLIT_FUNCTION_PREFIX = "ColdWeight "; + + /* First string in contents is 'Function <function-name>'. */ assert (length > 0); contents = (char*) (section_contents); - caller = get_next_string (&contents, &read_length); + caller = get_next_string (&contents, &read_length, 1); assert (read_length > HEADER_LEN); caller = canonicalize_function_name (file_handle, caller + HEADER_LEN); curr_length = read_length; caller_node = get_function_node (caller); + /* Check if next string is a node weight, it has the format + "Weight <entr_count> <max_count>". We could have callgraph + sections with or without node weights. */ + + /* Peek at the next string. */ + if (curr_length < length) + node_weight_s = get_next_string (&contents, &read_length, 0); + if (node_weight_s != NULL + && is_prefix_of (NODE_WEIGHT_PREFIX, node_weight_s)) + { + char *max_count_s; + unsigned long long max_count; + unsigned long long node_weight + = atoll (node_weight_s + strlen (NODE_WEIGHT_PREFIX)); + /* Functions like comdats only have one caller_node and can + have multiple node weights from multiple modules. */ + caller_node->weight += node_weight; + + /* Find the space and get the max_count. */ + max_count_s = strstr (node_weight_s + strlen (NODE_WEIGHT_PREFIX), " "); + if (max_count_s != NULL) + { + max_count = atoll (max_count_s + 1); + /* Functions like comdats only have one caller_node and can + have multiple node weights from multiple modules. */ + caller_node->max_count += max_count; + } + /* Actually read the node weight here. */ + get_next_string (&contents, &read_length, 1); + curr_length += read_length; + } + + /* If the function is split it could have the weight of the split cold + section here as "SplitWeight <max_count>". */ + + /* Peek at the next string. */ + if (curr_length < length) + node_weight_s = get_next_string (&contents, &read_length, 0); + if (node_weight_s != NULL + && is_prefix_of (SPLIT_FUNCTION_PREFIX, node_weight_s)) + { + unsigned long long split_weight + = atoll (node_weight_s + strlen (SPLIT_FUNCTION_PREFIX)); + caller_node->split_weight = split_weight; + /* Actually read the node weight here. */ + get_next_string (&contents, &read_length, 1); + curr_length += read_length; + } + while (curr_length < length) { /* Read callee, weight tuples. */ char *callee; char *weight_str; - unsigned int weight; + unsigned long long weight; Node *callee_node; - callee = get_next_string (&contents, &read_length); + callee = get_next_string (&contents, &read_length, 1); curr_length += read_length; /* We can have multiple header lines; such a situation arises when @@ -369,8 +437,8 @@ parse_callgraph_section_contents (void *file_handl callee_node = get_function_node (callee); assert (curr_length < length); - weight_str = get_next_string (&contents, &read_length); - weight = atoi (weight_str); + weight_str = get_next_string (&contents, &read_length, 1); + weight = atoll (weight_str); curr_length += read_length; update_edge (caller_node, callee_node, weight); } @@ -473,16 +541,56 @@ static void set_node_type (Node *n) slot = htab_find_with_hash (section_map, name, htab_hash_string (name)); if (slot != NULL) { + /* Update the section instance corresponding to the node instance. + Assign the weights from the node instance to the section instance. */ + Section_id *s = (Section_id *)(slot); + Section_id *s_comdat; + assert (s->weight == 0 && s->computed_weight == 0 && s->max_count == 0); + s->weight = n->weight; + s->computed_weight = n->computed_weight; + s->max_count = n->max_count; + + /* If s is comdat, update all the comdat candidates for weight. */ + s_comdat = s->comdat_group; + while (s_comdat != NULL) + { + s_comdat->weight = s->weight; + s_comdat->computed_weight = s->computed_weight; + s_comdat->max_count = s->max_count; + s_comdat = s_comdat->comdat_group; + } set_as_real_node (n); num_real_nodes++; } } +/* Return true if WEIGHT is more than the cutoff, specified either as + as percent, CUTOFF_P, of MAX or as an absolute value, CUTOFF_A. */ +int +edge_over_cutoff (unsigned long long weight, unsigned long long max, + unsigned int cutoff_p, unsigned long long cutoff_a) +{ + /* First check if weight if more than cutoff_p% of max. */ + if (((double)(max) * (cutoff_p/100.0)) >= (double) weight) + return 0; + if (cutoff_a >= weight) + return 0; + return 1; +} + +/* Edge cutoff is used to discard callgraph edges that are not above a + certain threshold. cutoff_p is to express this as a percent of the + maximum value and cutoff_a is used to express this as an absolute + value. */ +extern unsigned int edge_cutoff_p; +extern unsigned long long edge_cutoff_a; + void find_pettis_hansen_function_layout (FILE *fp) { Node *n_it; Edge *it; + unsigned int max_edge_value = 0; assert (node_chain != NULL); assert (active_edges != NULL); @@ -500,8 +608,18 @@ find_pettis_hansen_function_layout (FILE *fp) set_edge_type (it); it = find_max_edge (); + if (it != NULL) + max_edge_value = it->weight; while (it != NULL) { + if (!edge_over_cutoff (it->weight, max_edge_value, edge_cutoff_p, + edge_cutoff_a)) + { + if (fp !=NULL) + fprintf (fp, "Not considering edge with weight %llu and below\n", + it->weight); + break; + } collapse_edge (it); it = find_max_edge (); } @@ -520,12 +638,17 @@ const char *section_types[] = {".text.hot.", ".text." }; /* For sections that are not in the callgraph, the priority gives the - order in which the sections should be laid out. Sections are grouped - according to priority, higher priority (lower number), and then laid - out in priority order. */ -const int section_priority[] = {0, 3, 4, 2, 1}; -const int UNLIKELY_SECTION_INDEX = 2; + importance of each section type. Sections are grouped according to + priority, higher priority (lower number). */ +const int section_priority[] = {0, 3, 4, 1, 2}; +/* Order in which the sections must be laid out is given by + section_position[section_type]. The order in which the section + types are laid out from address low to high are: .text.unlikely, + .text.cold, .text.startup, .text., .text.hot followed by the + sections grouped by the callgraph. */ +const int section_position[] = {4, 1, 0, 2, 3}; + /* Maps the function name corresponding to section SECTION_NAME to the object handle and the section index. */ @@ -628,9 +751,118 @@ write_out_node (Section_id *s, Section_id **sectio } } -int unlikely_segment_start = 0; -int unlikely_segment_end = 0; +/* Find the max of a, b and c. */ +static unsigned long long +get_max (unsigned long long a, unsigned long long b, unsigned long long c) +{ + unsigned long long max = a; + if (b > max) + max = b; + if (c > max) + max = c; + return max; +} +/* This is true if the max count of any bb in a function should be used as + the node weight rather than the count of the entry bb. */ +extern int use_max_count; + +/* Comparison function for sorting two sections a and b by their + weight. */ +static +int section_weight_compare (const void *a, const void *b) +{ + Section_id *s_a = *(Section_id **)a; + Section_id *s_b = *(Section_id **)b; + assert (use_max_count <= 1); + unsigned long long max_sa_weight = get_max (s_a->weight, s_a->computed_weight, + s_a->max_count * use_max_count); + unsigned long long max_sb_weight = get_max (s_b->weight, s_b->computed_weight, + s_b->max_count * use_max_count); + + if (max_sa_weight < max_sb_weight) + return -1; + else if (max_sa_weight == max_sb_weight) + return 0; + + return 1; +} + +/* s is a pointer to a section and the group of sections is linked + via s->group. The output is the list of sections sorted by their + node weights (which is the maximum of their profile count, computed + weights or the max bb count if use_max_count is true). */ +static Section_id * +sort_section_group (Section_id *s) +{ + Section_id **sort_array; + Section_id *s_tmp; + int num_elements = 0; + int i; + + if (s == NULL) + return s; + + s_tmp = s; + while (s_tmp != NULL) + { + num_elements++; + s_tmp = s_tmp->group; + } + + if (num_elements == 1) + return s; + + XNEWVEC_ALLOC (sort_array, Section_id *, num_elements); + s_tmp = s; + for (i = 0; i < num_elements; ++i) + { + sort_array[i] = s_tmp; + s_tmp = s_tmp->group; + } + + for (i = 0; i < num_elements; ++i) + { + sort_array[i]->group = NULL; + } + + qsort (sort_array, num_elements, sizeof (Section_id *), + section_weight_compare); + + s_tmp = sort_array[0]; + for (i = 1; i < num_elements; ++i) + { + s_tmp->group = sort_array[i]; + s_tmp = s_tmp->group; + } + s_tmp->group = NULL; + return sort_array[0]; +} + +/* If sort_name_prefix is true then the sections not touched by the callgraph + are grouped according to their name prefix. When sort_name_prefix is zero, + all the sections are put together and sorted according to their node + weights. The default value of sort_name_prefix is 0. Even when sections + are grouped by their prefix, each group is sorted by the node weights. */ +extern int sort_name_prefix; +static int section_position_index (int section_type) +{ + assert (section_type >= 0 && section_type < NUM_SECTION_TYPES); + if (!sort_name_prefix) + return 0; + else + return section_position[section_type]; +} + +/* Track where the unlikely sections start and end. This will be needed if + the unlikely sections need to be split into a separate segment. */ +int unlikely_segment_start = -1; +int unlikely_segment_end = -1; + +/* This value is used to determine the profile threshold below which the + section is considered unlikely. The default is zero. */ +extern unsigned long long unlikely_segment_profile_cutoff; + /* Visit each node and print the chain of merged nodes to the file. Update HANDLES and SHNDX to contain the ordered list of sections. */ @@ -642,12 +874,15 @@ get_layout (FILE *fp, void*** handles, int i = 0; int position; void *slot; + int unlikely_section_index; + const int CALLGRAPH_POSITION = 5; - /* Form NUM_SECTION_TYPES + 1 groups of sections. Index 0 corresponds + /* Form NUM_SECTION_TYPES + 1 groups of sections. Index 5 corresponds to the list of sections that correspond to functions in the callgraph. For other sections, they are grouped by section_type and stored in - index: (section_type + 1). SECTION_START points to the first - section in each section group and SECTION_END points to the last. */ + index: section_position[section_type]). + SECTION_START points to the first section in each section group and + SECTION_END points to the last. */ Section_id *section_start[NUM_SECTION_TYPES + 1]; Section_id *section_end[NUM_SECTION_TYPES + 1]; Section_id *s_it; @@ -675,7 +910,8 @@ get_layout (FILE *fp, void*** handles, htab_hash_string (n_it->name)); assert (slot != NULL); s = (Section_id *)slot; - write_out_node (s, &section_start[0], &section_end[0]); + write_out_node (s, &section_start[CALLGRAPH_POSITION], + &section_end[CALLGRAPH_POSITION]); if (fp) fprintf (fp, "# Callgraph group : %s", n_it->name); @@ -689,8 +925,8 @@ get_layout (FILE *fp, void*** handles, htab_hash_string (node->name)); assert (slot != NULL); s = (Section_id *)slot; - write_out_node (s, &section_start[0], - &section_end[0]); + write_out_node (s, &section_start[CALLGRAPH_POSITION], + &section_end[CALLGRAPH_POSITION]); if (fp) fprintf (fp, " %s", node->name); } @@ -707,30 +943,87 @@ get_layout (FILE *fp, void*** handles, while (s_it) { if (!s_it->processed) - write_out_node (s_it, &section_start[s_it->section_type + 1], - &section_end[s_it->section_type + 1]); + { + int index = section_position_index(s_it->section_type); + write_out_node (s_it, &section_start[index], &section_end[index]); + } s_it = s_it->next; } + /* Determine the unlikely section index */ + unlikely_section_index = -1; + for (i = 0; i < ARRAY_SIZE (section_types); ++i) + if (strcmp (".text.unlikely.", section_types[i]) == 0) + break; + assert (i < ARRAY_SIZE (section_types)); + unlikely_section_index = section_position_index(i); + position = 0; for (i = 0; i < NUM_SECTION_TYPES + 1; ++i) { s_it = section_start[i]; - if (i == UNLIKELY_SECTION_INDEX + 1) - unlikely_segment_start = position; - while (s_it) + + if (s_it == NULL) + continue; + + /* Sort all section groups by weight except the callgraph group. */ + if (i < NUM_SECTION_TYPES) + s_it = sort_section_group (s_it); + + /* Start the unlikely segment if necessary. */ + assert (use_max_count <= 1); + if (i == unlikely_section_index + && (get_max (s_it->weight, s_it->computed_weight, + s_it->max_count * use_max_count) + <= unlikely_segment_profile_cutoff)) + { + assert (unlikely_segment_start == -1); + unlikely_segment_start = position; + if (fp != NULL) + fprintf (fp, "=== Unlikely sections start ===\n"); + } + + do { assert (position < num_sections); (*handles)[position] = s_it->handle; (*shndx)[position] = s_it->shndx; + + /* Check if this section will end the unlikely segment. */ + if (i == unlikely_section_index + && unlikely_segment_start >= 0 + && unlikely_segment_start != position + && unlikely_segment_end == -1 + && (get_max (s_it->weight, s_it->computed_weight, + s_it->max_count * use_max_count) + > unlikely_segment_profile_cutoff)) + { + unlikely_segment_end = position - 1; + if (fp != NULL) + fprintf (fp, "=== Unlikely sections end ===\n"); + } + position++; if (fp != NULL) - fprintf (fp, "%s\n", s_it->full_name); + { + fprintf (fp, "%s entry count = %llu computed = %llu " + "max count = %llu\n", s_it->full_name, s_it->weight, + s_it->computed_weight, s_it->max_count); + } s_it = s_it->group; } - if (i == UNLIKELY_SECTION_INDEX + 1) - unlikely_segment_end = position; + while (s_it); + + /* End the unlikely segment if it has not been done already. */ + if (i == unlikely_section_index + && unlikely_segment_start != -1 + && unlikely_segment_end == -1) + { + unlikely_segment_end = position - 1; + if (fp != NULL) + fprintf (fp, "=== Unlikely sections end ===\n"); + } } return position; } Index: function_reordering_plugin/callgraph.h =================================================================== --- function_reordering_plugin/callgraph.h (revision 198081) +++ function_reordering_plugin/callgraph.h (working copy) @@ -61,6 +61,15 @@ typedef struct node_d { unsigned int id; char *name; + /* Node weight, execution count of entry bb. */ + unsigned long long weight; + /* Weight computed by adding weights of incoming edges to + this node. */ + unsigned long long computed_weight; + /* Max count of any bb executed. */ + unsigned long long max_count; + /* Stores the max count of any bb in the split cold section. */ + unsigned long long split_weight; /* Chain all the Nodes created. */ struct node_d *next; /* Pointer to the next node in the chain of merged nodes. */ @@ -81,6 +90,10 @@ make_node (unsigned int id, char *name) XNEW_ALLOC (node, Node); node->id = id; node->name = name; + node->weight = 0; + node->computed_weight = 0; + node->max_count = 0; + node->split_weight = 0; node->is_real_node = 0; node->next = NULL; node->edge_list = NULL; @@ -133,7 +146,7 @@ struct edge_d { Node *first_function; Node *second_function; - unsigned int weight; + unsigned long long weight; Edge_type type; /* 1 if the nodes corresponding to this edge have been merged. */ unsigned int is_merged; @@ -143,7 +156,7 @@ struct edge_d }; inline static Edge * -make_edge (Node *first, Node *second, unsigned int weight) +make_edge (Node *first, Node *second, unsigned long long weight) { Edge *edge; XNEW_ALLOC (edge, Edge); @@ -205,6 +218,12 @@ typedef struct section_id_ char *full_name; void *handle; int shndx; + /* Corresponds to node weight. */ + unsigned long long weight; + /* Corresponds to node's computed weight. */ + unsigned long long computed_weight; + /* Max count of bb executed in this function. */ + unsigned long long max_count; /* Type of prefix in section name. */ int section_type; /* Pointer to the next section in the same comdat_group. */ @@ -213,6 +232,10 @@ typedef struct section_id_ struct section_id_ *next; /* Used for grouping sections. */ struct section_id_ *group; + /* Pointer to the cold split section if any. If this function + is comdat hot and kept, pointer to the kept cold split + section. */ + struct section_id_ *split_section; /* Check if this section has been considered for output. */ char processed; } Section_id; @@ -233,6 +256,10 @@ make_section_id (char *name, char *full_name, s->next = NULL; s->group = NULL; s->processed = 0; + s->weight = 0; + s->computed_weight = 0; + s->max_count = 0; + s->split_section = NULL; return s; } Index: function_reordering_plugin/function_reordering_plugin.c =================================================================== --- function_reordering_plugin/function_reordering_plugin.c (revision 198081) +++ function_reordering_plugin/function_reordering_plugin.c (working copy) @@ -97,6 +97,29 @@ static int no_op = 0; "--plugin-opt,split_segment=yes". */ static int split_segment = 0; +/* If SORT_NAME_PREFIX is true then the sections not touched by the callgraph + are grouped according to their name prefix. When SORT_NAME_PREFIX is zero, + all the sections are put together and sorted according to their node + weights. The default value of SORT_NAME_PREFIX is 0. Even when sections + are grouped by their prefix, each group is sorted by the node weights. */ +int sort_name_prefix = 0; + +/* Edge cutoff is used to discard callgraph edges that are not above a + certain threshold. cutoff_p is to express this as a percent of the + maximum value and cutoff_a is used to express this as an absolute + value. The default is to consider all edges. */ +unsigned int edge_cutoff_p = 0; +unsigned long long edge_cutoff_a = 0; + +/* This is true if the max count of any bb in a function should be used as + the node weight rather than the count of the entry bb. */ +int use_max_count = 1; + +/* This is used to decide which sections are considered unlikely. If the + section profile is greater than this value then it is not unlikely + executed. */ +unsigned long long unlikely_segment_profile_cutoff = 0; + /* Copies new output file name out_file */ void get_filename (const char *name) { @@ -133,6 +156,10 @@ process_option (const char *name) const char *option_group = "group="; const char *option_file = "file="; const char *option_segment = "split_segment="; + const char *option_edge_cutoff = "edge_cutoff="; + const char *option_sort_name_prefix = "sort_name_prefix="; + const char *option_max_count = "use_maxcount="; + const char *option_unlikely_cutoff = "unlikely_cutoff="; /* Check if option is "group=" */ if (strncmp (name, option_group, strlen (option_group)) == 0) @@ -164,6 +191,65 @@ process_option (const char *name) return 0; } } + else if (strncmp (name, option_edge_cutoff, + strlen (option_edge_cutoff)) == 0) + { + const char *a_or_p = name + strlen (option_edge_cutoff); + if (a_or_p[0] == 'p') + { + edge_cutoff_p = atoi (a_or_p + 1); + return 0; + } + else if (a_or_p[0] == 'a') + { + edge_cutoff_a = atoll (a_or_p + 1); + return 0; + } + else + { + fprintf (stderr, "Wrong format for edge_cutoff, " + "use edge_cutoff=[p|a]<value>\n"); + } + } + else if (strncmp (name, option_sort_name_prefix, + strlen (option_sort_name_prefix)) == 0) + { + const char *option_val = name + strlen (option_sort_name_prefix); + if (strcmp (option_val, "no") == 0) + { + sort_name_prefix = 0; + return 0; + } + else if (strcmp (option_val, "yes") == 0) + { + sort_name_prefix = 1; + return 0; + } + } + else if (strncmp (name, option_max_count, + strlen (option_max_count)) == 0) + { + const char *option_val = name + strlen (option_max_count); + if (strcmp (option_val, "no") == 0) + { + use_max_count = 0; + return 0; + } + else if (strcmp (option_val, "yes") == 0) + { + use_max_count = 1; + return 0; + } + } + /* Check if option is unlikely_cutoff. This decides what sections are + considered unlikely for segment splitting. The default cutoff is 0. */ + else if (strncmp (name, option_unlikely_cutoff, + strlen (option_unlikely_cutoff)) == 0) + { + const char *option_val = name + strlen (option_unlikely_cutoff); + unlikely_segment_profile_cutoff = atoll (option_val); + return 0; + } /* Flag error on unknown plugin option. */ MSG_ERROR ("Unknown option to function reordering plugin :%s\n", name); @@ -365,24 +451,27 @@ all_symbols_read_hook (void) section_list[i].shndx = shndx[i]; } - if (split_segment == 1) + if (split_segment == 1 + && unlikely_segment_start >= 0 + && (unlikely_segment_end >= unlikely_segment_start)) { /* Pass the new order of functions to the linker. */ /* Fix the order of all sections upto the beginning of the unlikely section. */ update_section_order (section_list, unlikely_segment_start); - assert (num_entries >= unlikely_segment_end); + assert (num_entries > unlikely_segment_end); /* Fix the order of all sections after the end of the unlikely section. */ - update_section_order (section_list, num_entries - unlikely_segment_end); + update_section_order (section_list + unlikely_segment_end + 1, + num_entries - unlikely_segment_end - 1); /* Map all unlikely code into a new segment. */ unique_segment_for_sections ( ".text.unlikely_executed", 0, 0x1000, section_list + unlikely_segment_start, - unlikely_segment_end - unlikely_segment_start); + unlikely_segment_end - unlikely_segment_start + 1); if (fp != NULL) fprintf (fp, "Moving %u section(s) to new segment\n", - unlikely_segment_end - unlikely_segment_start); + unlikely_segment_end - unlikely_segment_start + 1); } else { Index: gcc/final.c =================================================================== --- gcc/final.c (revision 198081) +++ gcc/final.c (working copy) @@ -204,6 +204,9 @@ rtx current_insn_predicate; /* True if printing into -fdump-final-insns= dump. */ bool final_insns_dump_p; +/* True if the function has a split cold section. */ +static bool has_cold_section_p; + #ifdef HAVE_ATTR_length static int asm_insn_count (rtx); #endif @@ -1934,6 +1937,7 @@ final_scan_insn (rtx insn, FILE *file, int optimiz targetm.asm_out.function_switched_text_sections (asm_out_file, current_function_decl, in_cold_section_p); + has_cold_section_p = true; break; case NOTE_INSN_BASIC_BLOCK: @@ -4330,6 +4334,24 @@ dump_cgraph_profiles (void) } } +/* Iterate through the basic blocks in DECL and get the max count. + If COLD is true, find the max count of the cold part of the split. */ +static gcov_type +get_max_count (tree decl, bool cold) +{ + basic_block bb; + gcov_type max_count = (cgraph_get_node (decl))->count; + + FOR_EACH_BB (bb) + { + if (cold && BB_PARTITION (bb) != BB_COLD_PARTITION) + continue; + if (bb->count > max_count) + max_count = bb->count; + } + return max_count; +} + /* Turn the RTL into assembly. */ static unsigned int rest_of_handle_final (void) @@ -4348,6 +4370,8 @@ rest_of_handle_final (void) gcc_assert (GET_CODE (x) == SYMBOL_REF); fnname = XSTR (x, 0); + has_cold_section_p = false; + assemble_start_function (current_function_decl, fnname); final_start_function (get_insns (), asm_out_file, optimize); final (get_insns (), asm_out_file, optimize); @@ -4403,12 +4427,27 @@ rest_of_handle_final (void) if ((flag_reorder_functions > 1) && flag_profile_use && cgraph_get_node (current_function_decl) != NULL - && (cgraph_get_node (current_function_decl))->callees != NULL) + && ((cgraph_get_node (current_function_decl))->callees != NULL + || (cgraph_get_node (current_function_decl))->count > 0)) { flags = SECTION_DEBUG | SECTION_EXCLUDE; asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname); switch_to_section (get_section (profile_fnname, flags, NULL)); fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname); + fprintf (asm_out_file, "\t.string \"Weight " + HOST_WIDEST_INT_PRINT_DEC + " " + HOST_WIDEST_INT_PRINT_DEC + "\"\n", + (cgraph_get_node (current_function_decl))->count, + get_max_count (current_function_decl, false)); + /* If this function is split into a cold section, record that weight + here. */ + if (has_cold_section_p) + fprintf (asm_out_file, "\t.string \"ColdWeight " + HOST_WIDEST_INT_PRINT_DEC + "\"\n", + get_max_count (current_function_decl, true)); dump_cgraph_profiles (); free (profile_fnname); } Index: gcc/testsuite/g++.dg/tree-prof/callgraph-profiles.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/callgraph-profiles.C (revision 198081) +++ gcc/testsuite/g++.dg/tree-prof/callgraph-profiles.C (working copy) @@ -1,41 +0,0 @@ -/* Verify if call-graph profile sections are created - with -freorder-functions=. */ -/* { dg-require-section-exclude "" } */ -/* { dg-require-linker-function-reordering-plugin "" } */ -/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections --save-temps -Wl,-plugin-opt,file=callgraph-profiles.C.dump -Wl,-plugin-opt,split_segment=yes" } */ - -int -notcalled () -{ - return 0; -} - -int __attribute__ ((noinline)) -foo () -{ - return 1; -} - -int __attribute__ ((noinline)) -bar () -{ - return 0; -} - -int main () -{ - int sum; - for (int i = 0; i< 1000; i++) - { - sum = foo () + bar(); - } - return sum * bar (); -} - -/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */ -/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */ -/* { dg-final-use { scan-file callgraph-profiles.C.dump "Callgraph group : main _Z3barv _Z3foov\n" } } */ -/* { dg-final-use { scan-file callgraph-profiles.C.dump "\.text\.*\.main\n.text\.*\._Z3barv\n\.text\.*\._Z3foov\n\.text\.*\._Z9notcalledv" } } */ -/* { dg-final-use { scan-file callgraph-profiles.C.dump "Moving 1 section\\(s\\) to new segment" } } */ -/* { dg-final-use { cleanup-saved-temps } } */ -/* { dg-final-use { remove-build-file "callgraph-profiles.C.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_1.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_1.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_1.C (revision 0) @@ -0,0 +1,45 @@ +/* Verify if call-graph profile sections are created with -freorder-functions=. + Check of edge profiles and node profiles are present in the profile + sections. Check if the segment splitting API is invoked. */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections --save-temps -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,split_segment=yes" } */ + +int +notcalled () +{ + return 0; +} + +int __attribute__ ((noinline)) +foo () +{ + return 1; +} + +int __attribute__ ((noinline)) +bar () +{ + return 0; +} + +int main () +{ + int sum; + for (int i = 0; i< 1000; i++) + { + sum = foo () + bar(); + } + return sum * bar (); +} + +/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */ +/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */ +/* { dg-final-use { scan-assembler "\.string \"Weight 1000 1000\"" } } */ +/* { dg-final-use { scan-assembler "\.string \"Weight 1001 1001\"" } } */ +/* { dg-final-use { scan-file linker.dump "Callgraph group : _Z3foov _Z3barv main\n" } } */ +/* { dg-final-use { scan-file linker.dump ".text\.*\._Z9notcalledv entry count = 0 computed = 0 max count = 0" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z3foov.*\n\.text\.*\._Z3barv.*\n\.text\.*\.main.*\n" } } */ +/* { dg-final-use { scan-file linker.dump "Moving . section\\(s\\) to new segment" } } */ +/* { dg-final-use { cleanup-saved-temps } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_2.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_2.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_2.C (revision 0) @@ -0,0 +1,25 @@ +/* Check if the edge_cutoffa option to the function reordering plugin works as + expected. */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=a1000" } */ + +int __attribute__ ((noinline)) +foo () +{ + return 1; +} + +int main () +{ + int sum = 0; + for (int i = 0; i< 1000; i++) + { + sum += foo (); + } + return sum - 1000; +} + +/* { dg-final-use { scan-file linker.dump "Not considering edge with weight 1000 and below" } } */ +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_3.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_3.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_3.C (revision 0) @@ -0,0 +1,25 @@ +/* Check if the edge_cutoffp option to the function reordering plugin works as + expected. */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100" } */ + +int __attribute__ ((noinline)) +foo () +{ + return 1; +} + +int main () +{ + int sum = 0; + for (int i = 0; i< 1000; i++) + { + sum += foo (); + } + return sum - 1000; +} + +/* { dg-final-use { scan-file linker.dump "Not considering edge with weight 1000 and below" } } */ +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_4.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_4.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_4.C (revision 0) @@ -0,0 +1,41 @@ +/* Check if cutting off callgraph gets all functions laid out only according to + function profiles and not prefixes. foo_200 is as hot as the other foo's but + has a unlikely section prefix. This should not matter as sort_name_prefix + is turned off. */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump,-plugin-opt,edge_cutoff=p100,-plugin-opt,sort_name_prefix=no" } */ + +int __attribute__ ((noinline, section(".text.unlikely._Z7foo_200v"))) +foo_200 () +{ + return 1; +} + +int __attribute__ ((noinline)) +foo_100 () +{ + return 1; +} + +int __attribute__ ((noinline)) +foo_300 () +{ + return 1; +} +int main () +{ + int sum = 0; + for (int i = 0; i< 200; i++) + sum += foo_200 (); + for (int i = 0; i< 100; i++) + sum += foo_100 (); + for (int i = 0; i< 300; i++) + sum += foo_300 (); + return sum - 600; +} + +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ +/* { dg-final-use { scan-file linker.dump ".text.unlikely._Z7foo_200v entry count = 200 computed = 200 max count = 200" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100v.*\n\.text\.unlikely\._Z7foo_200v.*\n\.text\.*\._Z7foo_300v.*\n" } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_5.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_5.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_5.C (revision 0) @@ -0,0 +1,41 @@ +/* Check if cutting off callgraph and using sort_name_prefix gets all functions laid out + according to prefixes. foo_200 is almost as hot as the other foo's but should + not be grouped with them as it has a different section prefix and sort_name_prefix is + turned on. */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump,-plugin-opt,edge_cutoff=p100,-plugin-opt,sort_name_prefix=yes" } */ + +int __attribute__ ((noinline, section(".text.unlikely._Z7foo_200v"))) +foo_200 () +{ + return 1; +} + +int __attribute__ ((noinline)) +foo_100 () +{ + return 1; +} + +int __attribute__ ((noinline)) +foo_300 () +{ + return 1; +} +int main () +{ + int sum = 0; + for (int i = 0; i< 200; i++) + sum += foo_200 (); + for (int i = 0; i< 100; i++) + sum += foo_100 (); + for (int i = 0; i< 300; i++) + sum += foo_300 (); + return sum - 600; +} + +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ +/* { dg-final-use { scan-file linker.dump ".text.unlikely._Z7foo_200v entry count = 200 computed = 200 max count = 200" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.unlikely\._Z7foo_200v.*\n\.text\.*\._Z7foo_100v.*\n\.text\.*\._Z7foo_300v.*\n" } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_6.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_6.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_6.C (revision 0) @@ -0,0 +1,53 @@ +/* Check if use_maxcount works as expected. This makes the node profile weight to + be equal to the maximum count of any basic block in a function rather than the + entry count. foo_100's maxcount > foo_200's max count */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,use_maxcount=yes" } */ + + +int __attribute__ ((noinline)) +bar (int *i) +{ + (*i)--; + if (*i >= 0) + return 1; + return 0; +} + +int __attribute__ ((noinline)) +foo_100 (int count) +{ + int sum = 0; + while (count > 0) + { + sum += bar(&count); + } + return sum; +} + +int __attribute__ ((noinline)) +foo_200 (int count) +{ + int sum = 0; + while (count > 0) + { + sum += bar(&count); + } + return sum; +} + +int main () +{ + int sum = 0; + for (int i = 0; i< 200; i++) + sum += foo_200 (100); + for (int i = 0; i< 100; i++) + sum += foo_100 (400); + return sum - 60000; +} +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i entry count = 100 computed = 100 max count = 40000" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i entry count = 200 computed = 200 max count = 20000" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i.*\n\.text\.*\._Z7foo_100i.*\n\.text\.*\._Z3barPi.*\n" } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_7.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_7.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_7.C (revision 0) @@ -0,0 +1,55 @@ +/* Check if turning off use_maxcount works as expected. This makes the node + profile weight to be equal to the entry count of any basic block in a + function rather than the max count. + foo_100's maxcount > foo_200's max count but + foo_100's entry count < foo_200's entry count. */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,use_maxcount=no" } */ + + +int __attribute__ ((noinline)) +bar (int *i) +{ + (*i)--; + if (*i >= 0) + return 1; + return 0; +} + +int __attribute__ ((noinline)) +foo_100 (int count) +{ + int sum = 0; + while (count > 0) + { + sum += bar(&count); + } + return sum; +} + +int __attribute__ ((noinline)) +foo_200 (int count) +{ + int sum = 0; + while (count > 0) + { + sum += bar(&count); + } + return sum; +} + +int main () +{ + int sum = 0; + for (int i = 0; i< 200; i++) + sum += foo_200 (100); + for (int i = 0; i< 100; i++) + sum += foo_100 (400); + return sum - 60000; +} +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i entry count = 100 computed = 100 max count = 40000" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_200i entry count = 200 computed = 200 max count = 20000" } } */ +/* { dg-final-use { scan-file linker.dump "\.text\.*\._Z7foo_100i.*\n\.text\.*\._Z7foo_200i.*\n\.text\.*\._Z3barPi.*\n" } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */ Index: gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_8.C =================================================================== --- gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_8.C (revision 0) +++ gcc/testsuite/g++.dg/tree-prof/func_reorder_gold_plugin_8.C (revision 0) @@ -0,0 +1,19 @@ +/* Check if unlikely_cutoff works as expected. Function foo is unlikely because of the cutoff. */ +/* { dg-require-section-exclude "" } */ +/* { dg-require-linker-function-reordering-plugin "" } */ +/* { dg-options "-O2 -freorder-functions=callgraph -ffunction-sections -Wl,-plugin-opt,file=linker.dump -Wl,-plugin-opt,edge_cutoff=p100,-plugin-opt,unlikely_cutoff=1" } */ + +int __attribute__ ((noinline,section(".text.hot._Z3foov"))) +foo () +{ + return 0; +} + +int main() +{ + return foo (); +} + +/* { dg-final-use { scan-file-not linker.dump "Callgraph group" } } */ +/* { dg-final-use { scan-file linker.dump "=== Unlikely sections start ===\n.*\.text\.hot\._Z3foov.* entry count = 1 computed = 1 max count = 1\n.*=== Unlikely sections end ===" } } */ +/* { dg-final-use { remove-build-file "linker.dump" } } */

[google,4_7] Function reordering plugin enhancements

Commit Message

Comments

Patch