@@ -854,6 +854,8 @@ extern VEC (basic_block, heap) *get_dominated_by (enum cdi_direction, basic_bloc
extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction,
basic_block *,
unsigned);
+extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction,
+ basic_block, int);
extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction,
basic_block);
extern void add_to_dominance_info (enum cdi_direction, basic_block);
@@ -8232,6 +8232,12 @@ an expression can travel. This is currently supported only in
code hoisting pass. The lesser the cost, the more aggressive code hoisting
will be. The default value is 3.
+@item max-hoist-depth
+The depth of search in the dominator tree for expressions to hoist.
+This is used to avoid quadratic behavior in hoisting algorithm.
+The value of 0 will avoid limiting the search, but may slow down compilation
+of huge functions. The default value is 30.
+
@item max-unrolled-insns
The maximum number of instructions that a loop should have if that loop
is unrolled, and if the loop is unrolled, it determines how many times
@@ -783,16 +783,20 @@ get_dominated_by_region (enum cdi_direction dir, basic_block *region,
}
/* Returns the list of basic blocks including BB dominated by BB, in the
- direction DIR. The vector will be sorted in preorder. */
+ direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will
+ produce a vector containing all dominated blocks. The vector will be sorted
+ in preorder. */
VEC (basic_block, heap) *
-get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
+get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth)
{
VEC(basic_block, heap) *bbs = NULL;
unsigned i;
+ unsigned next_level_start;
i = 0;
VEC_safe_push (basic_block, heap, bbs, bb);
+ next_level_start = 1; /* = VEC_length (basic_block, bbs); */
do
{
@@ -803,12 +807,24 @@ get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
son;
son = next_dom_son (dir, son))
VEC_safe_push (basic_block, heap, bbs, son);
+
+ if (i == next_level_start && --depth)
+ next_level_start = VEC_length (basic_block, bbs);
}
- while (i < VEC_length (basic_block, bbs));
+ while (i < next_level_start);
return bbs;
}
+/* Returns the list of basic blocks including BB dominated by BB, in the
+ direction DIR. The vector will be sorted in preorder. */
+
+VEC (basic_block, heap) *
+get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
+{
+ return get_dominated_to_depth (dir, bb, 0);
+}
+
/* Redirect all edges pointing to BB to TO. */
void
redirect_immediate_dominators (enum cdi_direction dir, basic_block bb,
@@ -4219,6 +4219,7 @@ compute_code_hoist_vbeinout (void)
{
int changed, passes;
basic_block bb;
+ sbitmap tmp1, tmp2;
sbitmap_vector_zero (hoist_vbeout, last_basic_block);
sbitmap_vector_zero (hoist_vbein, last_basic_block);
@@ -4235,8 +4236,14 @@ compute_code_hoist_vbeinout (void)
FOR_EACH_BB_REVERSE (bb)
{
if (bb->next_bb != EXIT_BLOCK_PTR)
- sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
- hoist_vbein, bb->index);
+ {
+ sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
+ hoist_vbein, bb->index);
+
+ /* Remove from VBEout expressions that die right after BB. */
+ sbitmap_a_and_b (hoist_vbeout[bb->index],
+ hoist_vbeout[bb->index], transpout[bb->index]);
+ }
changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
antloc[bb->index],
@@ -4248,7 +4255,144 @@ compute_code_hoist_vbeinout (void)
}
if (dump_file)
- fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
+ {
+ fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
+
+ FOR_EACH_BB (bb)
+ {
+ fprintf (dump_file, "vbein (%d): ", bb->index);
+ dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
+ fprintf (dump_file, "vbeout(%d): ", bb->index);
+ dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
+ }
+ }
+
+ /* Now cleanup VBEout to avoid moving expressions too far up.
+
+ We follow two rules to clean up VBEout[BB]:
+
+ 1. If BB does not have any dominated blocks, nothing will ever be hoisted
+ to BB, so we can just wipe its VBEout clean.
+
+ 2. If an expression can be hoisted both to BB and to a *single* successor
+ of BB in the dominator tree, then there is no point of hoisting
+ the expression to BB over BB's successor. Doing otherwise would
+ unnecessarily extend live ranges. */
+
+ /* Wipe VBEout of leaf blocks in the dominator tree. */
+ FOR_EACH_BB (bb)
+ if (first_dom_son (CDI_DOMINATORS, bb) == NULL)
+ sbitmap_zero (hoist_vbeout[bb->index]);
+
+ tmp1 = sbitmap_alloc (expr_hash_table.n_elems);
+ tmp2 = sbitmap_alloc (expr_hash_table.n_elems);
+
+ /* We cannot cleanup VBEout in a single traversal. There has to be both
+ upward and downward links when computing VBEout of current block to
+ avoid removing bits that shouldn't be removed. E.g., consider
+ the following dominator tree; '*' marks blocks which compute same
+ expression, the expression can be freely moved; the expected result
+ is that we move computations of '*' from (3) and (6) to (2).
+
+ 2
+ / \
+ 3* 4
+ / \
+ 5 6*
+
+ A walk over the above tree considering only downward links will first
+ remove '*' from VBEout[4] [as there's no point of hoisting
+ the expression to (4) if it's not computed in both (5) and (6)].
+ Then, when processing VBEout[2]. we won't see '*' as needed in (4),
+ so '*' will be removed from VBEout[2] too, leaving a copy of '*' in (3).
+
+ Therefore, we use iterative algorithm with both upward and downward
+ links to solve this problem. The algorithm obviously converges as at
+ each iteration we make VBEout sets only smaller. */
+
+ passes = 0;
+ changed = 1;
+
+ while (changed)
+ {
+ changed = 0;
+
+ FOR_EACH_BB (bb)
+ {
+ basic_block son;
+ bool first_p = true;
+
+ /* Walk through dominated blocks and calculate the set of expressions
+ that are needed in any one, and only one, of the blocks.
+ TMP1 is the basis of what we want to remove from VBEout[BB]. */
+ for (son = first_dom_son (CDI_DOMINATORS, bb);
+ son != NULL;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ {
+ if (first_p)
+ {
+ sbitmap_copy (tmp1, hoist_vbeout[son->index]);
+ first_p = false;
+ }
+ else
+ sbitmap_difference (tmp1, tmp1, hoist_vbeout[son->index]);
+ }
+
+ if (!first_p)
+ {
+ /* Now trim TMP1 to avoid removing too much. */
+
+ if (bb->prev_bb != ENTRY_BLOCK_PTR)
+ /* Remove epxressions from TMP1 that are needed upwards.
+ These are VBEout[parent] minus expressions that are
+ killed in BB (and, hence, don't get to VBEout[parent] from
+ BB). */
+ {
+ basic_block parent;
+
+ parent = get_immediate_dominator (CDI_DOMINATORS, bb);
+
+ /* Expressions killed in BB. */
+ sbitmap_not (tmp2, transp[bb->index]);
+
+ /* Expressions that reach from PARENT to the end of BB. */
+ sbitmap_difference (tmp2, hoist_vbeout[parent->index],
+ tmp2);
+
+ sbitmap_difference (tmp1, tmp1, tmp2);
+ }
+
+ /* Never remove any of expressions computed in BB from
+ VBEout[BB]. */
+ sbitmap_difference (tmp1, tmp1, comp[bb->index]);
+
+ if (sbitmap_any_common_bits (hoist_vbeout[bb->index], tmp1))
+ /* There is at least one bit that can be removed from
+ VBEout[BB]. */
+ {
+ sbitmap_difference (hoist_vbeout[bb->index],
+ hoist_vbeout[bb->index], tmp1);
+ changed = 1;
+ }
+ }
+ }
+
+ passes++;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "hoisting vbeout cleanup: %d passes\n", passes);
+
+ FOR_EACH_BB (bb)
+ {
+ fprintf (dump_file, "vbeout(%d): ", bb->index);
+ dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
+ }
+ }
+
+ sbitmap_free (tmp1);
+ sbitmap_free (tmp2);
}
/* Top level routine to do the dataflow analysis needed by code hoisting. */
@@ -4258,8 +4402,8 @@ compute_code_hoist_data (void)
{
compute_local_properties (transp, comp, antloc, &expr_hash_table);
compute_transpout ();
- compute_code_hoist_vbeinout ();
calculate_dominance_info (CDI_DOMINATORS);
+ compute_code_hoist_vbeinout ();
if (dump_file)
fprintf (dump_file, "\n");
}
@@ -4311,11 +4455,12 @@ hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
if (pred->src == ENTRY_BLOCK_PTR)
break;
- else if (pred_bb == expr_bb)
- continue;
else if (visited[pred_bb->index])
continue;
-
+ else if (!TEST_BIT (transpout[pred_bb->index], expr_index))
+ break;
+ else if (pred_bb == expr_bb)
+ continue;
/* Does this predecessor generate this expression? */
else if (TEST_BIT (comp[pred_bb->index], expr_index))
break;
@@ -4403,15 +4548,15 @@ hoist_code (void)
int found = 0;
int insn_inserted_p;
- domby = get_dominated_by (CDI_DOMINATORS, bb);
+ domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
+
/* Examine each expression that is very busy at the exit of this
block. These are the potentially hoistable expressions. */
for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
{
int hoistable = 0;
- if (TEST_BIT (hoist_vbeout[bb->index], i)
- && TEST_BIT (transpout[bb->index], i))
+ if (TEST_BIT (hoist_vbeout[bb->index], i))
{
/* We've found a potentially hoistable expression, now
we look at every block BB dominates to see if it
@@ -4438,6 +4583,14 @@ hoist_code (void)
occr = find_occr_in_bb (expr->antic_occr, dominated);
gcc_assert (occr);
+ /* An occurence might've been already deleted
+ while processing a dominator of BB. */
+ if (occr->deleted_p)
+ {
+ gcc_assert (MAX_HOIST_DEPTH > 1);
+ continue;
+ }
+
gcc_assert (NONDEBUG_INSN_P (occr->insn));
/* Adjust MAX_DISTANCE to account for the fact that
@@ -4516,6 +4669,14 @@ hoist_code (void)
occr = find_occr_in_bb (expr->antic_occr, dominated);
gcc_assert (occr);
+ /* An occurence might've been already deleted
+ while processing a dominator of BB. */
+ if (occr->deleted_p)
+ {
+ gcc_assert (MAX_HOIST_DEPTH > 1);
+ continue;
+ }
+
gcc_assert (NONDEBUG_INSN_P (occr->insn));
/* Adjust MAX_DISTANCE to account for the fact that
@@ -4546,6 +4707,14 @@ hoist_code (void)
gcc_assert (occr);
}
+ /* An occurence might've been already deleted
+ while processing a dominator of BB. */
+ if (occr->deleted_p)
+ {
+ gcc_assert (MAX_HOIST_DEPTH > 1);
+ continue;
+ }
+
insn = occr->insn;
set = single_set (insn);
gcc_assert (set);
@@ -240,6 +240,14 @@ DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST,
"Cost at which GCSE optimizations will not constraint the distance an expression can travel",
3, 0, 0)
+/* How deep from a given basic block the dominator tree should be searched
+ for expressions to hoist to the block. The value of 0 will avoid limiting
+ the search. */
+DEFPARAM(PARAM_MAX_HOIST_DEPTH,
+ "max-hoist-depth",
+ "Maximum depth of search in the dominator tree for expressions to hoist",
+ 30, 0, 0)
+
/* This parameter limits the number of insns in a loop that will be unrolled,
and by how much the loop is unrolled.
@@ -129,6 +129,8 @@ typedef enum compiler_param
PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO)
#define GCSE_UNRESTRICTED_COST \
PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST)
+#define MAX_HOIST_DEPTH \
+ PARAM_VALUE (PARAM_MAX_HOIST_DEPTH)
#define MAX_UNROLLED_INSNS \
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
#define MAX_SMS_LOOP_NUMBER \