@@ -8195,6 +8195,19 @@ when @option{-ftree-vectorize} is used. The number of iterations after
vectorization needs to be greater than the value specified by this option
to allow vectorization. The default value is 0.
+@item gcse-cost-distance-ratio
+Scaling factor in calculation of maximum distance an expression
+can be moved by GCSE optimizations. This is currently supported only in
+code hoisting pass. The bigger the ratio, the more agressive code hoisting
+will be with expressions which have cost less than
+@option{gcse-unrestricted-cost}. The default value is 2.
+
+@item gcse-unrestricted-cost
+Cost at which GCSE optimizations will not constraint the distance
+an expression can travel. This is currently supported only in
+code hoisting pass. The lesser the cost, the more aggressive code hoisting
+will be. The default value is 3.
+
@item max-hoist-depth
The depth of search in the dominator tree for expressions to hoist.
This is used to avoid quadratic behavior in hoisting algorithm.
@@ -295,6 +295,12 @@ struct expr
The value is the newly created pseudo-reg to record a copy of the
expression in all the places that reach the redundant copy. */
rtx reaching_reg;
+ /* Maximum distance in instructions this expression can travel.
+ We avoid moving simple expressions for more than a few instructions
+ to keep register pressure under control.
+ A value of "0" removes restrictions on how far the expression can
+ travel. */
+ int max_distance;
};
/* Occurrence of an expression.
@@ -431,12 +437,12 @@ static void hash_scan_insn (rtx, struct hash_table_d *);
static void hash_scan_set (rtx, rtx, struct hash_table_d *);
static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
static void hash_scan_call (rtx, rtx, struct hash_table_d *);
-static int want_to_gcse_p (rtx);
+static int want_to_gcse_p (rtx, int *);
static bool gcse_constant_p (const_rtx);
static int oprs_unchanged_p (const_rtx, const_rtx, int);
static int oprs_anticipatable_p (const_rtx, const_rtx);
static int oprs_available_p (const_rtx, const_rtx);
-static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int,
+static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
struct hash_table_d *);
static void insert_set_in_table (rtx, rtx, struct hash_table_d *);
static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
@@ -496,7 +502,8 @@ static void alloc_code_hoist_mem (int, int);
static void free_code_hoist_mem (void);
static void compute_code_hoist_vbeinout (void);
static void compute_code_hoist_data (void);
-static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *);
+static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
+ int, int *);
static int hoist_code (void);
static int one_code_hoisting_pass (void);
static rtx process_insert_insn (struct expr *);
@@ -754,8 +761,11 @@ static basic_block current_bb;
GCSE. */
static int
-want_to_gcse_p (rtx x)
+want_to_gcse_p (rtx x, int *max_distance_ptr)
{
+ int cost;
+ int max_distance;
+
#ifdef STACK_REGS
/* On register stack architectures, don't GCSE constants from the
constant pool, as the benefits are often swamped by the overhead
@@ -764,14 +774,42 @@ want_to_gcse_p (rtx x)
x = avoid_constant_pool_reference (x);
#endif
+ /* GCSE'ing constants:
+
+ We do not specifically distinguish between constant and non-constant
+ expressions in PRE and Hoist. We use rtx_cost below to limit
+ the maximum distance simple expressions can travel.
+
+ Nevertheless, constants are much easier to GCSE, and, hence,
+ it is easy to overdo the optimizations. Usually, excessive PRE and
+ Hoisting of constant leads to increased register pressure.
+
+ RA can deal with this by rematerialing some of the constants.
+ Therefore, it is important that the back-end generates sets of constants
+ in a way that allows reload rematerialize them under high register
+ pressure, i.e., a pseudo register with REG_EQUAL to constant
+ is set only once. Failing to do so will result in IRA/reload
+ spilling such constants under high register pressure instead of
+ rematerializing them. */
+
+ cost = rtx_cost (x, SET, optimize_function_for_speed_p (cfun));
+
+ if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
+ {
+ max_distance = GCSE_COST_DISTANCE_RATIO * cost;
+ if (max_distance == 0)
+ return 0;
+ }
+ else
+ max_distance = 0;
+
+ if (max_distance_ptr)
+ *max_distance_ptr = max_distance;
+
switch (GET_CODE (x))
{
case REG:
case SUBREG:
- case CONST_INT:
- case CONST_DOUBLE:
- case CONST_FIXED:
- case CONST_VECTOR:
case CALL:
return 0;
@@ -1089,11 +1127,15 @@ expr_equiv_p (const_rtx x, const_rtx y)
It is only used if X is a CONST_INT.
ANTIC_P is nonzero if X is an anticipatable expression.
- AVAIL_P is nonzero if X is an available expression. */
+ AVAIL_P is nonzero if X is an available expression.
+
+ MAX_DISTANCE is the maximum distance in instructions this expression can
+ be moved.
+*/
static void
insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
- int avail_p, struct hash_table_d *table)
+ int avail_p, int max_distance, struct hash_table_d *table)
{
int found, do_not_record_p;
unsigned int hash;
@@ -1136,7 +1178,10 @@ insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
cur_expr->next_same_hash = NULL;
cur_expr->antic_occr = NULL;
cur_expr->avail_occr = NULL;
+ cur_expr->max_distance = max_distance;
}
+ else
+ gcc_assert (cur_expr->max_distance == max_distance);
/* Now record the occurrence(s). */
if (antic_p)
@@ -1237,6 +1282,7 @@ insert_set_in_table (rtx x, rtx insn, struct hash_table_d *table)
cur_expr->next_same_hash = NULL;
cur_expr->antic_occr = NULL;
cur_expr->avail_occr = NULL;
+ cur_expr->max_distance = 0; /* Not used for set_p tables. */
}
/* Now record the occurrence. */
@@ -1306,6 +1352,7 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
{
unsigned int regno = REGNO (dest);
rtx tmp;
+ int max_distance = 0;
/* See if a REG_EQUAL note shows this equivalent to a simpler expression.
@@ -1328,7 +1375,7 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
&& !REG_P (src)
&& (table->set_p
? gcse_constant_p (XEXP (note, 0))
- : want_to_gcse_p (XEXP (note, 0))))
+ : want_to_gcse_p (XEXP (note, 0), NULL)))
src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
/* Only record sets of pseudo-regs in the hash table. */
@@ -1343,7 +1390,7 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
can't do the same thing at the rtl level. */
&& !can_throw_internal (insn)
/* Is SET_SRC something we want to gcse? */
- && want_to_gcse_p (src)
+ && want_to_gcse_p (src, &max_distance)
/* Don't CSE a nop. */
&& ! set_noop_p (pat)
/* Don't GCSE if it has attached REG_EQUIV note.
@@ -1367,7 +1414,8 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
int avail_p = (oprs_available_p (src, insn)
&& ! JUMP_P (insn));
- insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, table);
+ insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
+ max_distance, table);
}
/* Record sets for constant/copy propagation. */
@@ -1404,7 +1452,7 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
do that easily for EH edges so disable GCSE on these for now. */
&& !can_throw_internal (insn)
/* Is SET_DEST something we want to gcse? */
- && want_to_gcse_p (dest)
+ && want_to_gcse_p (dest, NULL)
/* Don't CSE a nop. */
&& ! set_noop_p (pat)
/* Don't GCSE if it has attached REG_EQUIV note.
@@ -1425,7 +1473,7 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
&& ! JUMP_P (insn);
/* Record the memory expression (DEST) in the hash table. */
- insert_expr_in_table (dest, GET_MODE (dest), insn,
+ insert_expr_in_table (dest, GET_MODE (dest), insn, 0,
antic_p, avail_p, table);
}
}
@@ -1512,8 +1560,8 @@ dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
if (flat_table[i] != 0)
{
expr = flat_table[i];
- fprintf (file, "Index %d (hash value %d)\n ",
- expr->bitmap_index, hash_val[i]);
+ fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
+ expr->bitmap_index, hash_val[i], expr->max_distance);
print_rtl (file, expr->expr);
fprintf (file, "\n");
}
@@ -4284,6 +4332,8 @@ compute_code_hoist_data (void)
/* Determine if the expression identified by EXPR_INDEX would
reach BB unimpared if it was placed at the end of EXPR_BB.
+ Stop the search if the expression would need to be moved more
+ than DISTANCE instructions.
It's unclear exactly what Muchnick meant by "unimpared". It seems
to me that the expression must either be computed or transparent in
@@ -4296,7 +4346,8 @@ compute_code_hoist_data (void)
paths. */
static int
-hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb, char *visited)
+hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
+ char *visited, int distance, int *bb_size)
{
edge pred;
edge_iterator ei;
@@ -4309,6 +4360,18 @@ hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
visited = XCNEWVEC (char, last_basic_block);
}
+ /* Terminate the search if distance, for which EXPR is allowed to move,
+ is exhausted. */
+ if (distance > 0)
+ {
+ distance -= bb_size[bb->index];
+
+ if (distance <= 0)
+ return 0;
+ }
+ else
+ gcc_assert (distance == 0);
+
FOR_EACH_EDGE (pred, ei, bb->preds)
{
basic_block pred_bb = pred->src;
@@ -4330,8 +4393,8 @@ hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
else
{
visited[pred_bb->index] = 1;
- if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
- pred_bb, visited))
+ if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
+ visited, distance, bb_size))
break;
}
}
@@ -4341,6 +4404,19 @@ hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
return (pred == NULL);
}
+/* Find anticipatable occurence of EXPR in BB. */
+static struct occr *
+find_antic_occr_in_bb (struct expr *expr, basic_block bb)
+{
+ struct occr *occr = expr->antic_occr;
+
+ /* Find the right occurrence of this expression. */
+ while (BLOCK_FOR_INSN (occr->insn) != bb && occr)
+ occr = occr->next;
+
+ return occr;
+}
+
/* Actually perform code hoisting. */
static int
@@ -4351,6 +4427,7 @@ hoist_code (void)
unsigned int i,j;
struct expr **index_map;
struct expr *expr;
+ int *bb_size;
int changed = 0;
sbitmap_vector_zero (hoist_exprs, last_basic_block);
@@ -4363,6 +4440,28 @@ hoist_code (void)
for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
index_map[expr->bitmap_index] = expr;
+ bb_size = XCNEWVEC (int, last_basic_block);
+ FOR_EACH_BB (bb)
+ {
+ rtx bb_head;
+ rtx bb_end;
+
+ bb_head = next_nonnote_insn (BB_HEAD (bb));
+ bb_end = BB_END (bb);
+ bb_end = INSN_P (bb_end) ? bb_end : prev_nonnote_insn (bb_end);
+
+ if (bb_head && BLOCK_FOR_INSN (bb_head) == bb
+ && bb_end && BLOCK_FOR_INSN (bb_end) == bb)
+ {
+ gcc_assert (INSN_P (bb_head) && INSN_P (bb_end));
+ bb_size[bb->index] = (DF_INSN_LUID (bb_end) - DF_INSN_LUID (bb_head)
+ + 1);
+ gcc_assert (bb_size[bb->index] >= 1);
+ }
+ else
+ bb_size[bb->index] = 0;
+ }
+
/* Walk over each basic block looking for potentially hoistable
expressions, nothing gets hoisted from the entry block. */
FOR_EACH_BB (bb)
@@ -4391,6 +4490,8 @@ hoist_code (void)
computes the expression. */
for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
{
+ int max_distance;
+
/* Ignore self dominance. */
if (bb == dominated)
continue;
@@ -4400,12 +4501,42 @@ hoist_code (void)
if (!TEST_BIT (antloc[dominated->index], i))
continue;
+ expr = index_map[i];
+
+ max_distance = expr->max_distance;
+ if (max_distance > 0)
+ {
+ struct occr *occr;
+
+ occr = find_antic_occr_in_bb (expr, dominated);
+ gcc_assert (occr);
+
+ /* An occurence might've been already deleted
+ while processing a dominator of BB. */
+ if (occr->deleted_p)
+ {
+ gcc_assert (MAX_HOIST_DEPTH > 1);
+ continue;
+ }
+
+ /* Adjust MAX_DISTANCE to account for the fact that
+ OCCR won't have to travel all of DOMINATED, but
+ only part of it.
+ Note: DF_INSN_LUIDs should be used cautiously once
+ we start emitting new instructions. Luckily, we
+ are sure that occr->insn was present at the time of
+ df_analyze, so it has valid DF_INSN_LUID. */
+ max_distance += (bb_size[dominated->index]
+ - DF_INSN_LUID (occr->insn));
+ }
+
/* Note if the expression would reach the dominated block
unimpared if it was placed at the end of BB.
Keep track of how many times this expression is hoistable
from a dominated block into BB. */
- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
+ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
+ max_distance, bb_size))
hoistable++;
}
@@ -4448,6 +4579,9 @@ hoist_code (void)
computes the expression. */
for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
{
+ struct occr *occr = NULL;
+ int max_distance;
+
/* Ignore self dominance. */
if (bb == dominated)
continue;
@@ -4458,6 +4592,33 @@ hoist_code (void)
if (!TEST_BIT (antloc[dominated->index], i))
continue;
+ expr = index_map[i];
+
+ max_distance = expr->max_distance;
+ if (max_distance > 0)
+ {
+ occr = find_antic_occr_in_bb (index_map[i], dominated);
+ gcc_assert (occr);
+
+ /* An occurence might've been already deleted
+ while processing a dominator of BB. */
+ if (occr->deleted_p)
+ {
+ gcc_assert (MAX_HOIST_DEPTH > 1);
+ continue;
+ }
+
+ /* Adjust MAX_DISTANCE to account for the fact that
+ OCCR won't have to travel all of DOMINATED, but
+ only part of it.
+ Note: DF_INSN_LUIDs should be used cautiously once
+ we start emitting new instructions. Luckily, we
+ are sure that occr->insn was present at the time of
+ df_analyze, so it has valid DF_INSN_LUID. */
+ max_distance += (bb_size[dominated->index]
+ - DF_INSN_LUID (occr->insn));
+ }
+
/* The expression is computed in the dominated block and
it would be safe to compute it at the start of the
dominated block. Now we have to determine if the
@@ -4467,23 +4628,25 @@ hoist_code (void)
that /some/, not necesserilly all, occurences from
the dominated blocks can be hoisted to BB. Here we check
if a specific occurence can be hoisted to BB. */
- if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
+ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
+ max_distance, bb_size))
{
- struct expr *expr = index_map[i];
- struct occr *occr = expr->antic_occr;
rtx insn;
rtx set;
- /* Find the right occurrence of this expression. */
- while (BLOCK_FOR_INSN (occr->insn) != dominated && occr)
- occr = occr->next;
-
- gcc_assert (occr);
+ if (!occr)
+ {
+ occr = find_antic_occr_in_bb (expr, dominated);
+ gcc_assert (occr);
+ }
/* An occurence might've been already deleted
while processing a dominator of BB. */
if (occr->deleted_p)
- continue;
+ {
+ gcc_assert (MAX_HOIST_DEPTH > 1);
+ continue;
+ }
insn = occr->insn;
set = single_set (insn);
@@ -4519,6 +4682,7 @@ hoist_code (void)
VEC_free (basic_block, heap, domby);
}
+ free (bb_size);
free (index_map);
return changed;
@@ -219,6 +219,21 @@ DEFPARAM(PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION,
"gcse-after-reload-critical-fraction",
"The threshold ratio of critical edges execution count that permit performing redundancy elimination after reload",
10, 0, 0)
+
+/* GCSE will use GCSE_COST_DISTANCE_RATION as a scaling factor
+ to calculate maximum distance for which an expression is allowed to move
+ from its rtx_cost. */
+DEFPARAM(PARAM_GCSE_COST_DISTANCE_RATIO,
+ "gcse-cost-distance-ratio",
+ "Scaling factor in calculation of maximum distance an expression can be moved by GCSE optimizations",
+ 2, 0, 0)
+/* GCSE won't restrict distance for which an expression with rtx_cost greater
+ than COSTS_N_INSN(GCSE_UNRESTRICTED_COST) is allowed to move. */
+DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST,
+ "gcse-unrestricted-cost",
+ "Cost at which GCSE optimizations will not constraint the distance an expression can travel",
+ 3, 0, 0)
+
/* How deep from a given basic block the dominator tree should be searched
for expressions to hoist to the block. The value of 0 will avoid limiting
the search. */
@@ -125,6 +125,10 @@ typedef enum compiler_param
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION)
#define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_CRITICAL_FRACTION)
+#define GCSE_COST_DISTANCE_RATIO \
+ PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO)
+#define GCSE_UNRESTRICTED_COST \
+ PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST)
#define MAX_HOIST_DEPTH \
PARAM_VALUE (PARAM_MAX_HOIST_DEPTH)
#define MAX_UNROLLED_INSNS \