@@ -328,6 +328,8 @@ static tree handle_used_attribute (tree *, tree, tree, int, bool *);
static tree handle_unused_attribute (tree *, tree, tree, int, bool *);
static tree handle_externally_visible_attribute (tree *, tree, tree, int,
bool *);
+static tree handle_no_reorder_attribute (tree *, tree, tree, int,
+ bool *);
static tree handle_const_attribute (tree *, tree, tree, int, bool *);
static tree handle_transparent_union_attribute (tree *, tree, tree,
int, bool *);
@@ -652,6 +654,8 @@ const struct attribute_spec c_common_attribute_table[] =
handle_unused_attribute, false },
{ "externally_visible", 0, 0, true, false, false,
handle_externally_visible_attribute, false },
+ { "no_reorder", 0, 0, true, false, false,
+ handle_no_reorder_attribute, false },
/* The same comments as for noreturn attributes apply to const ones. */
{ "const", 0, 0, true, false, false,
handle_const_attribute, false },
@@ -6953,6 +6957,30 @@ handle_externally_visible_attribute (tree *pnode, tree name,
return NULL_TREE;
}
+/* Handle the "no_reorder" attribute. Arguments as in
+ struct attribute_spec.handler. */
+
+static tree
+handle_no_reorder_attribute (tree *pnode,
+ tree name,
+ tree,
+ int,
+ bool *no_add_attrs)
+{
+ tree node = *pnode;
+
+ if ((TREE_CODE (node) != FUNCTION_DECL && TREE_CODE (node) != VAR_DECL)
+ && !(TREE_STATIC (node) || (DECL_P (node) && DECL_EXTERNAL (node))))
+ {
+ warning (OPT_Wattributes,
+ "%qE attribute only affects top level objects",
+ name);
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
/* Handle a "const" attribute; arguments as in
struct attribute_spec.handler. */
@@ -400,6 +400,8 @@ public:
/* Set when function is visible by other units. */
unsigned externally_visible : 1;
+ /* Don't reorder to other symbols having this set. */
+ unsigned no_reorder : 1;
/* The symbol will be assumed to be used in an invisible way (like
by an toplevel asm statement). */
unsigned force_output : 1;
@@ -1701,9 +1703,6 @@ public:
/* Output all variables enqueued to be assembled. */
bool output_variables (void);
- /* Output all asm statements we have stored up to be output. */
- void output_asm_statements (void);
-
/* Weakrefs may be associated to external decls and thus not output
at expansion time. Emit all necessary aliases. */
void output_weakrefs (void);
@@ -437,6 +437,7 @@ cgraph_node::create_clone (tree decl, gcov_type gcov_count, int freq,
new_node->definition = definition;
new_node->local = local;
new_node->externally_visible = false;
+ new_node->no_reorder = no_reorder;
new_node->local.local = true;
new_node->global = global;
new_node->global.inlined_to = new_inlined_to;
@@ -876,6 +877,7 @@ cgraph_node::create_version_clone (tree new_decl,
new_version->definition = definition;
new_version->local = local;
new_version->externally_visible = false;
+ new_version->no_reorder = no_reorder;
new_version->local.local = new_version->definition;
new_version->global = global;
new_version->rtl = rtl;
@@ -545,22 +545,6 @@ cgraph_node::add_new_function (tree fndecl, bool lowered)
DECL_FUNCTION_PERSONALITY (fndecl) = lang_hooks.eh_personality ();
}
-/* Output all asm statements we have stored up to be output. */
-
-void
-symbol_table::output_asm_statements (void)
-{
- asm_node *can;
-
- if (seen_error ())
- return;
-
- for (can = first_asm_symbol (); can; can = can->next)
- assemble_asm (can->asm_str);
-
- clear_asm_symbols ();
-}
-
/* Analyze the function scheduled to be output. */
void
cgraph_node::analyze (void)
@@ -785,8 +769,10 @@ varpool_node::finalize_decl (tree decl)
if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl)
/* Traditionally we do not eliminate static variables when not
optimizing and when not doing toplevel reoder. */
- || (!flag_toplevel_reorder && !DECL_COMDAT (node->decl)
- && !DECL_ARTIFICIAL (node->decl)))
+ || node->no_reorder
+ || ((!flag_toplevel_reorder
+ && !DECL_COMDAT (node->decl)
+ && !DECL_ARTIFICIAL (node->decl))))
node->force_output = true;
if (symtab->state == CONSTRUCTION
@@ -1922,10 +1908,11 @@ struct cgraph_order_sort
according to their order fields, which is the order in which they
appeared in the file. This implements -fno-toplevel-reorder. In
this mode we may output functions and variables which don't really
- need to be output. */
+ need to be output.
+ When NO_REORDER is true only do this for symbols marked no reorder. */
static void
-output_in_order (void)
+output_in_order (bool no_reorder)
{
int max;
cgraph_order_sort *nodes;
@@ -1940,6 +1927,8 @@ output_in_order (void)
{
if (pf->process && !pf->thunk.thunk_p && !pf->alias)
{
+ if (no_reorder && !pf->no_reorder)
+ continue;
i = pf->order;
gcc_assert (nodes[i].kind == ORDER_UNDEFINED);
nodes[i].kind = ORDER_FUNCTION;
@@ -1950,6 +1939,8 @@ output_in_order (void)
FOR_EACH_DEFINED_VARIABLE (pv)
if (!DECL_EXTERNAL (pv->decl))
{
+ if (no_reorder && !pv->no_reorder)
+ continue;
i = pv->order;
gcc_assert (nodes[i].kind == ORDER_UNDEFINED);
nodes[i].kind = ORDER_VAR;
@@ -2203,11 +2194,12 @@ symbol_table::compile (void)
state = EXPANSION;
if (!flag_toplevel_reorder)
- output_in_order ();
+ output_in_order (false);
else
{
- output_asm_statements ();
-
+ /* Output first asm statements and anything ordered. The process
+ flag is cleared for these nodes, so we skip them later. */
+ output_in_order (true);
expand_all_functions ();
output_variables ();
}
@@ -2180,7 +2180,7 @@ attributes are currently defined for functions on all targets:
@code{returns_nonnull}, @code{gnu_inline},
@code{externally_visible}, @code{hot}, @code{cold}, @code{artificial},
@code{no_sanitize_address}, @code{no_address_safety_analysis},
-@code{no_sanitize_undefined},
+@code{no_sanitize_undefined}, @code{no_reorder},
@code{error} and @code{warning}.
Several other attributes are defined for functions on particular
target systems. Other attributes, including @code{section} are
@@ -3472,6 +3472,16 @@ my_memcpy (void *dest, const void *src, size_t len)
__attribute__((nonnull));
@end smallexample
+@item no_reorder
+@cindex @code{no_reorder} function or variable attribute
+Do not reorder functions or variables marked @code{no_reorder}
+against each other or top level assembler statements the executable.
+The actual order in the program will depend on the linker command
+line. Static variables marked like this are also not removed.
+This has a similar effect
+as the @option{-fno-toplevel-reorder} option, but only applies to the
+marked symbols.
+
@item returns_nonnull
@cindex @code{returns_nonnull} function attribute
The @code{returns_nonnull} attribute specifies that the function
@@ -492,6 +492,9 @@ function_and_variable_visibility (bool whole_program)
node->externally_visible = false;
node->forced_by_abi = false;
}
+ if (lookup_attribute ("no_reorder",
+ DECL_ATTRIBUTES (node->decl)))
+ node->no_reorder = 1;
if (!node->externally_visible
&& node->definition && !node->weakref
&& !DECL_EXTERNAL (node->decl))
@@ -633,6 +636,9 @@ function_and_variable_visibility (bool whole_program)
vnode->externally_visible = false;
vnode->forced_by_abi = false;
}
+ if (lookup_attribute ("no_reorder",
+ DECL_ATTRIBUTES (vnode->decl)))
+ vnode->no_reorder = 1;
if (!vnode->externally_visible
&& !vnode->weakref)
{
@@ -508,6 +508,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
bp = bitpack_create (ob->main_stream);
bp_pack_value (&bp, node->local.local, 1);
bp_pack_value (&bp, node->externally_visible, 1);
+ bp_pack_value (&bp, node->no_reorder, 1);
bp_pack_value (&bp, node->definition, 1);
bp_pack_value (&bp, node->local.versionable, 1);
bp_pack_value (&bp, node->local.can_change_signature, 1);
@@ -581,6 +582,7 @@ lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node *node,
lto_output_var_decl_index (ob->decl_state, ob->main_stream, node->decl);
bp = bitpack_create (ob->main_stream);
bp_pack_value (&bp, node->externally_visible, 1);
+ bp_pack_value (&bp, node->no_reorder, 1);
bp_pack_value (&bp, node->force_output, 1);
bp_pack_value (&bp, node->forced_by_abi, 1);
bp_pack_value (&bp, node->unique_name, 1);
@@ -1041,6 +1043,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data,
node->local.local = bp_unpack_value (bp, 1);
node->externally_visible = bp_unpack_value (bp, 1);
+ node->no_reorder = bp_unpack_value (bp, 1);
node->definition = bp_unpack_value (bp, 1);
node->local.versionable = bp_unpack_value (bp, 1);
node->local.can_change_signature = bp_unpack_value (bp, 1);
@@ -1246,6 +1249,7 @@ input_varpool_node (struct lto_file_decl_data *file_data,
bp = streamer_read_bitpack (ib);
node->externally_visible = bp_unpack_value (&bp, 1);
+ node->no_reorder = bp_unpack_value (&bp, 1);
node->force_output = bp_unpack_value (&bp, 1);
node->forced_by_abi = bp_unpack_value (&bp, 1);
node->unique_name = bp_unpack_value (&bp, 1);
@@ -333,7 +333,8 @@ lto_max_map (void)
new_partition ("empty");
}
-/* Helper function for qsort; sort nodes by order. */
+/* Helper function for qsort; sort nodes by order. noreorder functions must have
+ been removed earlier. */
static int
node_cmp (const void *pa, const void *pb)
{
@@ -365,11 +366,26 @@ node_cmp (const void *pa, const void *pb)
static int
varpool_node_cmp (const void *pa, const void *pb)
{
- const varpool_node *a = *(const varpool_node * const *) pa;
- const varpool_node *b = *(const varpool_node * const *) pb;
+ const symtab_node *a = *static_cast<const symtab_node * const *> (pa);
+ const symtab_node *b = *static_cast<const symtab_node * const *> (pb);
return b->order - a->order;
}
+/* Add all symtab nodes from NEXT_NODE to PARTITION in order. */
+
+static void
+add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
+{
+ unsigned i;
+ symtab_node *node;
+
+ next_nodes.qsort (varpool_node_cmp);
+ FOR_EACH_VEC_ELT (next_nodes, i, node)
+ if (!symbol_partitioned_p (node))
+ add_symbol_to_partition (partition, node);
+}
+
+
/* Group cgraph nodes into equally-sized partitions.
The partitioning algorithm is simple: nodes are taken in predefined order.
@@ -414,7 +430,8 @@ lto_balanced_map (int n_lto_partitions)
int n_nodes = 0;
int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
struct cgraph_node **order = XNEWVEC (cgraph_node *, symtab->cgraph_max_uid);
- varpool_node **varpool_order = NULL;
+ auto_vec<cgraph_node *> noreorder;
+ auto_vec<varpool_node *> varpool_order;
int i;
struct cgraph_node *node;
int total_size = 0, best_total_size = 0;
@@ -427,6 +444,7 @@ lto_balanced_map (int n_lto_partitions)
INT_MAX, best_internal = 0;
int npartitions;
int current_order = -1;
+ int noreorder_pos = 0;
FOR_EACH_VARIABLE (vnode)
gcc_assert (!vnode->aux);
@@ -434,7 +452,10 @@ lto_balanced_map (int n_lto_partitions)
FOR_EACH_DEFINED_FUNCTION (node)
if (node->get_partitioning_class () == SYMBOL_PARTITION)
{
- order[n_nodes++] = node;
+ if (node->no_reorder)
+ noreorder.safe_push (node);
+ else
+ order[n_nodes++] = node;
if (!node->alias)
total_size += inline_summary (node)->size;
}
@@ -445,27 +466,26 @@ lto_balanced_map (int n_lto_partitions)
get better about minimizing the function bounday, but until that
things works smoother if we order in source order. */
qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp);
+ noreorder.qsort (node_cmp);
if (symtab->dump_file)
- for(i = 0; i < n_nodes; i++)
- fprintf (symtab->dump_file, "Balanced map symbol order:%s:%u\n",
- order[i]->name (), order[i]->tp_first_run);
-
- if (!flag_toplevel_reorder)
{
- FOR_EACH_VARIABLE (vnode)
- if (vnode->get_partitioning_class () == SYMBOL_PARTITION)
- n_varpool_nodes++;
- varpool_order = XNEWVEC (varpool_node *, n_varpool_nodes);
-
- n_varpool_nodes = 0;
- FOR_EACH_VARIABLE (vnode)
- if (vnode->get_partitioning_class () == SYMBOL_PARTITION)
- varpool_order[n_varpool_nodes++] = vnode;
- qsort (varpool_order, n_varpool_nodes, sizeof (varpool_node *),
- varpool_node_cmp);
+ for(i = 0; i < n_nodes; i++)
+ fprintf (symtab->dump_file, "Balanced map symbol order:%s:%u\n",
+ order[i]->name (), order[i]->tp_first_run);
+ for(i = 0; i < (int)noreorder.length(); i++)
+ fprintf (symtab->dump_file, "Balanced map symbol no_reorder:%s:%u\n",
+ noreorder[i]->name (), noreorder[i]->tp_first_run);
}
+ /* Collect all variables that should not be reordered. */
+ FOR_EACH_VARIABLE (vnode)
+ if (vnode->get_partitioning_class () == SYMBOL_PARTITION
+ && (!flag_toplevel_reorder || vnode->no_reorder))
+ varpool_order.safe_push (vnode);
+ n_varpool_nodes = varpool_order.length ();
+ varpool_order.qsort (varpool_node_cmp);
+
/* Compute partition size and create the first partition. */
partition_size = total_size / n_lto_partitions;
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
@@ -476,6 +496,8 @@ lto_balanced_map (int n_lto_partitions)
fprintf (symtab->dump_file, "Total unit size: %i, partition size: %i\n",
total_size, partition_size);
+ auto_vec<symtab_node *> next_nodes;
+
for (i = 0; i < n_nodes; i++)
{
if (symbol_partitioned_p (order[i]))
@@ -483,14 +505,19 @@ lto_balanced_map (int n_lto_partitions)
current_order = order[i]->order;
- if (!flag_toplevel_reorder)
- while (varpool_pos < n_varpool_nodes
- && varpool_order[varpool_pos]->order < current_order)
- {
- if (!symbol_partitioned_p (varpool_order[varpool_pos]))
- add_symbol_to_partition (partition, varpool_order[varpool_pos]);
- varpool_pos++;
- }
+ /* Output noreorder and varpool in program order first. */
+ next_nodes.truncate (0);
+ while (varpool_pos < n_varpool_nodes
+ && varpool_order[varpool_pos]->order < current_order)
+ next_nodes.safe_push (varpool_order[varpool_pos++]);
+ while (noreorder_pos < (int)noreorder.length ()
+ && noreorder[noreorder_pos]->order < current_order)
+ {
+ if (!noreorder[noreorder_pos]->alias)
+ total_size -= inline_summary (noreorder[noreorder_pos])->size;
+ next_nodes.safe_push (noreorder[noreorder_pos++]);
+ }
+ add_sorted_nodes (next_nodes, partition);
add_symbol_to_partition (partition, order[i]);
if (!order[i]->alias)
@@ -580,6 +607,7 @@ lto_balanced_map (int n_lto_partitions)
if (!vnode->definition)
continue;
if (!symbol_partitioned_p (vnode) && flag_toplevel_reorder
+ && !vnode->no_reorder
&& vnode->get_partitioning_class () == SYMBOL_PARTITION)
add_symbol_to_partition (partition, vnode);
index = lto_symtab_encoder_lookup (partition->encoder,
@@ -616,6 +644,7 @@ lto_balanced_map (int n_lto_partitions)
to be removed. Coupling with objects they refer to only helps to reduce
number of symbols promoted to hidden. */
if (!symbol_partitioned_p (vnode) && flag_toplevel_reorder
+ && !vnode->no_reorder
&& !vnode->can_remove_if_no_refs_p ()
&& vnode->get_partitioning_class () == SYMBOL_PARTITION)
add_symbol_to_partition (partition, vnode);
@@ -705,24 +734,25 @@ lto_balanced_map (int n_lto_partitions)
}
}
+ next_nodes.truncate (0);
+
/* Varables that are not reachable from the code go into last partition. */
if (flag_toplevel_reorder)
{
FOR_EACH_VARIABLE (vnode)
if (vnode->get_partitioning_class () == SYMBOL_PARTITION
- && !symbol_partitioned_p (vnode))
- add_symbol_to_partition (partition, vnode);
- }
- else
- {
- while (varpool_pos < n_varpool_nodes)
- {
- if (!symbol_partitioned_p (varpool_order[varpool_pos]))
- add_symbol_to_partition (partition, varpool_order[varpool_pos]);
- varpool_pos++;
- }
- free (varpool_order);
+ && !symbol_partitioned_p (vnode)
+ && !vnode->no_reorder)
+ next_nodes.safe_push (vnode);
}
+
+ /* Output remaining ordered symbols. */
+ while (varpool_pos < n_varpool_nodes)
+ next_nodes.safe_push (varpool_order[varpool_pos++]);
+ while (noreorder_pos < (int)noreorder.length ())
+ next_nodes.safe_push (noreorder[noreorder_pos++]);
+ add_sorted_nodes (next_nodes, partition);
+
free (order);
}
@@ -831,6 +831,8 @@ symtab_node::dump_base (FILE *f)
fprintf (f, " forced_by_abi");
if (externally_visible)
fprintf (f, " externally_visible");
+ if (no_reorder)
+ fprintf (f, " no_reorder");
if (resolution != LDPR_UNKNOWN)
fprintf (f, " %s",
ld_plugin_symbol_resolution_names[(int)resolution]);
@@ -4849,6 +4849,7 @@ ipa_tm_create_version_alias (struct cgraph_node *node, void *data)
new_node = cgraph_node::create_same_body_alias (new_decl, info->new_decl);
new_node->tm_clone = true;
new_node->externally_visible = info->old_node->externally_visible;
+ new_node->no_reorder = info->old_node->no_reorder;
/* ?? Do not traverse aliases here. */
get_cg_data (&node, false)->clone = new_node;
@@ -449,6 +449,8 @@ varpool_node::add (tree decl)
symtab->call_varpool_insertion_hooks (node);
if (node->externally_visible_p ())
node->externally_visible = true;
+ if (lookup_attribute ("no_reorder", decl))
+ node->no_reorder = 1;
}
/* Return variable availability. See cgraph.h for description of individual
@@ -640,7 +642,7 @@ symbol_table::remove_unreferenced_decls (void)
for (node = first_defined_variable (); node; node = next)
{
next = next_defined_variable (node);
- if (!node->aux)
+ if (!node->aux && !node->no_reorder)
{
if (dump_file)
fprintf (dump_file, " %s", node->asm_name ());
@@ -687,11 +689,22 @@ symbol_table::output_variables (void)
timevar_push (TV_VAROUT);
FOR_EACH_DEFINED_VARIABLE (node)
- node->finalize_named_section_flags ();
+ {
+ /* Handled in output_in_order. */
+ if (node->no_reorder)
+ continue;
+
+ node->finalize_named_section_flags ();
+ }
FOR_EACH_DEFINED_VARIABLE (node)
- if (node->assemble_decl ())
- changed = true;
+ {
+ /* Handled in output_in_order. */
+ if (node->no_reorder)
+ continue;
+ if (node->assemble_decl ())
+ changed = true;
+ }
timevar_pop (TV_VAROUT);
return changed;
}
From: Andi Kleen <ak@linux.intel.com> Some projects need to prevent reordering of specific top level declarations with LTO, in particular declarations defining init calls. The only way to do that with LTO was to use -fno-toplevel-reorder, which stops reordering for all declarations and makes LTO partitioning less efficient. This patch adds a new no_reorder attribute that stops reordering only for the marked declaration. The program can then only mark e.g. the initcalls and leave all the other declarations alone. The patch does: - Adds the new no_reorder attribute for the C family. - Initializes a new no_reorder flag in the symtab_nodes in the function visibility flag. - Maintains the no_reorder flag when creating new nodes. - Changes the partition code to always keep a separate sorted queue of ordered nodes and flush them in order with the other nodes. This is used by all nodes with -fno-toplevel-reorder, and only the marked ones without it. Parts of the old -fno-toplevel-reorder code paths are reused. - Adds various checks throughout the tree to make no_reorder marked functions behave the same as with -fno-toplevel-reorder - Changes the LTO streamer to serialize the no_reorder attribute. Bootstrapped and tested with LTO + -fno-toplevel-reorder, plain LTO and bootstrap w/o LTO on x86_64-linux. Also fixes the reordering in the other large project. gcc/c-family/: 2014-09-15 Andi Kleen <ak@linux.intel.com> * c-common.c (handle_no_reorder_attribute): New function. (c_common_attribute_table): Add no_reorder attribute. gcc/: 2014-09-14 Andi Kleen <ak@linux.intel.com> * cgraph.h (symtab_node): Add no_reorder attribute. (symbol_table::output_asm_statements): Remove. * cgraphclones.c (cgraph_node::create_clone): Copy no_reorder. (cgraph_node::create_version_clone): Dito. (symbol_table::output_asm_statements): Remove. * trans-mem.c (ipa_tm_create_version_alias): Dito. * cgraphunit.c (varpool_node::finalize_decl): Check no_reorder. (output_in_order): Add no_reorder flag. Only handle no_reorder nodes when set. (symbol_table::compile): Add separate pass for no_reorder nodes. * doc/extend.texi (no_reorder): Document no_reorder attribute. * ipa-visibility.c (function_and_variable_visibility): Set no_reorder flag in symtab_node from declaration. * lto-cgraph.c (lto_output_node): Serialize no_reorder. (lto_output_varpool_node): Dito. (input_overwrite_node): Dito. (input_varpool_node): Dito. * varpool.c (varpool_node::add): Set no_reorder attribute. (symbol_table::remove_unreferenced_decls): Handle no_reorder. (symbol_table::output_variables): Dito. * symtab.c (symtab_node::dump_base): Print no_reorder. gcc/lto/: 2014-09-13 Andi Kleen <ak@linux.intel.com> * lto-partition.c (node_cmp): Update comment. (varpool_node_cmp): Use symtab_node for comparison. (add_sorted_nodes): New function. (lto_balanced_map): Change to keep ordered queue of ordered node. Handle no_reorder attribute. --- gcc/c-family/c-common.c | 28 ++++++++++++ gcc/cgraph.h | 5 +-- gcc/cgraphclones.c | 2 + gcc/cgraphunit.c | 38 +++++++--------- gcc/doc/extend.texi | 12 +++++- gcc/ipa-visibility.c | 6 +++ gcc/lto-cgraph.c | 4 ++ gcc/lto/lto-partition.c | 112 ++++++++++++++++++++++++++++++------------------ gcc/symtab.c | 2 + gcc/trans-mem.c | 1 + gcc/varpool.c | 21 +++++++-- 11 files changed, 159 insertions(+), 72 deletions(-)