@@ -854,6 +854,8 @@ extern VEC (basic_block, heap) *get_dominated_by (enum cdi_direction, basic_bloc
extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction,
basic_block *,
unsigned);
+extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction,
+ basic_block, int);
extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction,
basic_block);
extern void add_to_dominance_info (enum cdi_direction, basic_block);
@@ -8195,6 +8195,12 @@ when @option{-ftree-vectorize} is used. The number of iterations after
vectorization needs to be greater than the value specified by this option
to allow vectorization. The default value is 0.
+@item max-hoist-depth
+The depth of search in the dominator tree for expressions to hoist.
+This is used to avoid quadratic behavior in hoisting algorithm.
+The value of 0 will avoid limiting the search, but may slow down compilation
+of huge functions. The default value is 30.
+
@item max-unrolled-insns
The maximum number of instructions that a loop should have if that loop
is unrolled, and if the loop is unrolled, it determines how many times
@@ -783,16 +783,20 @@ get_dominated_by_region (enum cdi_direction dir, basic_block *region,
}
/* Returns the list of basic blocks including BB dominated by BB, in the
- direction DIR. The vector will be sorted in preorder. */
+ direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will
+ produce a vector containing all dominated blocks. The vector will be sorted
+ in preorder. */
VEC (basic_block, heap) *
-get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
+get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth)
{
VEC(basic_block, heap) *bbs = NULL;
unsigned i;
+ unsigned next_level_start;
i = 0;
VEC_safe_push (basic_block, heap, bbs, bb);
+ next_level_start = 1; /* = VEC_length (basic_block, bbs); */
do
{
@@ -803,12 +807,24 @@ get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
son;
son = next_dom_son (dir, son))
VEC_safe_push (basic_block, heap, bbs, son);
+
+ if (i == next_level_start && --depth)
+ next_level_start = VEC_length (basic_block, bbs);
}
- while (i < VEC_length (basic_block, bbs));
+ while (i < next_level_start);
return bbs;
}
+/* Returns the list of basic blocks including BB dominated by BB, in the
+ direction DIR. The vector will be sorted in preorder. */
+
+VEC (basic_block, heap) *
+get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
+{
+ return get_dominated_to_depth (dir, bb, 0);
+}
+
/* Redirect all edges pointing to BB to TO. */
void
redirect_immediate_dominators (enum cdi_direction dir, basic_block bb,
@@ -4145,6 +4145,57 @@ free_code_hoist_mem (void)
free_dominance_info (CDI_DOMINATORS);
}
+/* Cleanup VBEout of BB and its successors in the dominator tree. */
+
+static void
+cleanup_code_hoist_vbeout (basic_block bb)
+{
+ basic_block son;
+ bool first_p = true;
+
+ /* We follow two rules to clean up VBEout[BB]:
+
+ 1. If BB does not have any dominated blocks, nothing will ever be hoisted
+ to BB, so we can just wipe its VBEout clean.
+
+ 2. If an expression can be hoisted both to BB and to a *single* successor
+ of BB in the dominator tree, then there is no point of hoisting
+ the expression to BB over BB's successor. Doing otherwise would
+ unnecessarily extend live ranges. One exception to this rule is when
+ an expression is computed in BB and available at BB's end, so we need
+ to subtract comp[bb] from the set of expressions that are present in
+ only one of the dominated blocks. */
+
+ for (son = first_dom_son (CDI_DOMINATORS, bb);
+ son != NULL;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ {
+ cleanup_code_hoist_vbeout (son);
+
+ if (first_p)
+ {
+ sbitmap_copy (hoist_vbein[bb->index], hoist_vbeout[son->index]);
+ first_p = false;
+ }
+ else
+ sbitmap_difference (hoist_vbein[bb->index],
+ hoist_vbein[bb->index], hoist_vbeout[son->index]);
+ }
+
+ if (!first_p)
+ {
+ sbitmap_difference (hoist_vbein[bb->index],
+ hoist_vbein[bb->index], comp[bb->index]);
+
+ if (sbitmap_any_common_bits (hoist_vbeout[bb->index],
+ hoist_vbein[bb->index]))
+ sbitmap_difference (hoist_vbeout[bb->index],
+ hoist_vbeout[bb->index], hoist_vbein[bb->index]);
+ }
+ else
+ sbitmap_zero (hoist_vbeout[bb->index]);
+}
+
/* Compute the very busy expressions at entry/exit from each block.
An expression is very busy if all paths from a given point
@@ -4203,6 +4254,19 @@ compute_code_hoist_vbeinout (void)
dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
}
}
+
+ cleanup_code_hoist_vbeout (ENTRY_BLOCK_PTR->next_bb);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "hoisting vbeout cleanup pass\n");
+
+ FOR_EACH_BB (bb)
+ {
+ fprintf (dump_file, "vbeout(%d): ", bb->index);
+ dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
+ }
+ }
}
/* Top level routine to do the dataflow analysis needed by code hoisting. */
@@ -4212,8 +4276,8 @@ compute_code_hoist_data (void)
{
compute_local_properties (transp, comp, antloc, &expr_hash_table);
compute_transpout ();
- compute_code_hoist_vbeinout ();
calculate_dominance_info (CDI_DOMINATORS);
+ compute_code_hoist_vbeinout ();
if (dump_file)
fprintf (dump_file, "\n");
}
@@ -4306,7 +4370,8 @@ hoist_code (void)
int found = 0;
int insn_inserted_p;
- domby = get_dominated_by (CDI_DOMINATORS, bb);
+ domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
+
/* Examine each expression that is very busy at the exit of this
block. These are the potentially hoistable expressions. */
for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
@@ -4397,7 +4462,11 @@ hoist_code (void)
it would be safe to compute it at the start of the
dominated block. Now we have to determine if the
expression would reach the dominated block if it was
- placed at the end of BB. */
+ placed at the end of BB.
+ Note: the fact that hoist_exprs has i-th bit set means
+ that /some/, not necesserilly all, occurences from
+ the dominated blocks can be hoisted to BB. Here we check
+ if a specific occurence can be hoisted to BB. */
if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
{
struct expr *expr = index_map[i];
@@ -4410,6 +4479,12 @@ hoist_code (void)
occr = occr->next;
gcc_assert (occr);
+
+ /* An occurence might've been already deleted
+ while processing a dominator of BB. */
+ if (occr->deleted_p)
+ continue;
+
insn = occr->insn;
set = single_set (insn);
gcc_assert (set);
@@ -219,6 +219,14 @@ DEFPARAM(PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION,
"gcse-after-reload-critical-fraction",
"The threshold ratio of critical edges execution count that permit performing redundancy elimination after reload",
10, 0, 0)
+/* How deep from a given basic block the dominator tree should be searched
+ for expressions to hoist to the block. The value of 0 will avoid limiting
+ the search. */
+DEFPARAM(PARAM_MAX_HOIST_DEPTH,
+ "max-hoist-depth",
+ "Maximum depth of search in the dominator tree for expressions to hoist",
+ 30, 0, 0)
+
/* This parameter limits the number of insns in a loop that will be unrolled,
and by how much the loop is unrolled.
@@ -125,6 +125,8 @@ typedef enum compiler_param
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION)
#define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
+#define MAX_HOIST_DEPTH \
+ PARAM_VALUE (PARAM_MAX_HOIST_DEPTH)
#define MAX_UNROLLED_INSNS \
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
#define MAX_SMS_LOOP_NUMBER \