@@ -197,11 +197,6 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
}
}
- /* If a true dep edge enters the branch create an anti edge in the
- opposite direction to prevent the creation of reg-moves. */
- if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn))
- create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1);
-
latency = dep_cost (link);
e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
add_edge_to_ddg (g, e);
@@ -306,8 +301,11 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
gcc_assert (first_def_node);
+ /* Always create the edge if the use node is a branch in
+ order to prevent the creation of reg-moves. */
if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
- || !flag_modulo_sched_allow_regmoves)
+ || !flag_modulo_sched_allow_regmoves
+ || (flag_modulo_sched_allow_regmoves && JUMP_P (use_node->insn)))
create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
REG_DEP, 1);
@@ -484,7 +482,12 @@ build_intra_loop_deps (ddg_ptr g)
FOR_EACH_DEP (dest_node->insn, SD_LIST_BACK, sd_it, dep)
{
- ddg_node_ptr src_node = get_node_of_insn (g, DEP_PRO (dep));
+ ddg_node_ptr src_node;
+
+ if (DEBUG_INSN_P (DEP_PRO (dep)) && !DEBUG_INSN_P (dest_node->insn))
+ continue;
+
+ src_node = get_node_of_insn (g, DEP_PRO (dep));
if (!src_node)
continue;
@@ -1091,12 +1094,18 @@ find_nodes_on_paths (sbitmap result, ddg_ptr g, sbitmap from, sbitmap to)
ddg_edge_ptr e;
ddg_node_ptr u_node = &g->nodes[u];
+ /* Ignore DEBUG_INSNs when calculating the SCCs to avoid their
+ influence on the scheduling order and rec_mii. */
+ if (DEBUG_INSN_P (u_node->insn))
+ continue;
+
for (e = u_node->out; e != (ddg_edge_ptr) 0; e = e->next_out)
{
ddg_node_ptr v_node = e->dest;
int v = v_node->cuid;
- if (!TEST_BIT (reachable_from, v))
+ /* Ignore DEBUG_INSN when calculating the SCCs. */
+ if (!TEST_BIT (reachable_from, v) && !DEBUG_INSN_P (v_node->insn))
{
SET_BIT (reachable_from, v);
SET_BIT (tmp, v);
@@ -1120,12 +1129,18 @@ find_nodes_on_paths (sbitmap result, ddg_ptr g, sbitmap from, sbitmap to)
ddg_edge_ptr e;
ddg_node_ptr u_node = &g->nodes[u];
+ /* Ignore DEBUG_INSNs when calculating the SCCs to avoid their
+ influence on the scheduling order and rec_mii. */
+ if (DEBUG_INSN_P (u_node->insn))
+ continue;
+
for (e = u_node->in; e != (ddg_edge_ptr) 0; e = e->next_in)
{
ddg_node_ptr v_node = e->src;
int v = v_node->cuid;
- if (!TEST_BIT (reach_to, v))
+ /* Ignore DEBUG_INSN when calculating the SCCs. */
+ if (!TEST_BIT (reach_to, v) && !DEBUG_INSN_P (v_node->insn))
{
SET_BIT (reach_to, v);
SET_BIT (tmp, v);
@@ -84,13 +84,14 @@ along with GCC; see the file COPYING3. If not see
II cycles (i.e. use register copies to prevent a def from overwriting
itself before reaching the use).
- SMS works with countable loops whose loop count can be easily
- adjusted. This is because we peel a constant number of iterations
- into a prologue and epilogue for which we want to avoid emitting
- the control part, and a kernel which is to iterate that constant
- number of iterations less than the original loop. So the control
- part should be a set of insns clearly identified and having its
- own iv, not otherwise used in the loop (at-least for now), which
+ SMS works with countable loops (1) whose control part can be easily
+ decoupled from the rest of the loop and (2) whose loop count can
+ be easily adjusted. This is because we peel a constant number of
+ iterations into a prologue and epilogue for which we want to avoid
+ emitting the control part, and a kernel which is to iterate that
+ constant number of iterations less than the original loop. So the
+ control part should be a set of insns clearly identified and having
+ its own iv, not otherwise used in the loop (at-least for now), which
initializes a register before the loop to the number of iterations.
Currently SMS relies on the do-loop pattern to recognize such loops,
where (1) the control part comprises of all insns defining and/or
@@ -202,33 +203,58 @@ static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
rtx, rtx);
static void duplicate_insns_of_cycles (partial_schedule_ptr,
int, int, int, rtx);
-static int calculate_stage_count (partial_schedule_ptr ps);
+static int calculate_stage_count (partial_schedule_ptr, int);
+static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+ int, int, sbitmap, sbitmap, sbitmap);
+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
+ sbitmap, int, int *, int *, int *);
+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+ int, int, sbitmap, int *, sbitmap,
+ sbitmap);
+static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+static int record_inc_dec_insn_info (rtx, rtx, rtx, rtx, rtx, void *);
+
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
-#define SCHED_FIRST_REG_MOVE(x) \
- (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
-#define SCHED_NREG_MOVES(x) \
- (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
#define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
#define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
#define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
-/* The scheduling parameters held for each node. */
-typedef struct node_sched_params
+/* Information about register-move generated for a definition. */
+struct regmove_info
{
- int asap; /* A lower-bound on the absolute scheduling cycle. */
- int time; /* The absolute scheduling cycle (time >= asap). */
+ /* The definition for which the register-move is generated for. */
+ rtx def;
/* The following field (first_reg_move) is a pointer to the first
- register-move instruction added to handle the modulo-variable-expansion
- of the register defined by this node. This register-move copies the
- original register defined by the node. */
+ register-move instruction added to handle the
+ modulo-variable-expansion of the register defined by this node.
+ This register-move copies the original register defined by the node.
+ */
rtx first_reg_move;
/* The number of register-move instructions added, immediately preceding
first_reg_move. */
int nreg_moves;
+ /* Auxiliary info used in the calculation of the register-moves. */
+ void *aux;
+};
+
+typedef struct regmove_info *regmove_info_ptr;
+DEF_VEC_P (regmove_info_ptr);
+DEF_VEC_ALLOC_P (regmove_info_ptr, heap);
+
+/* The scheduling parameters held for each node. */
+typedef struct node_sched_params
+{
+ int asap; /* A lower-bound on the absolute scheduling cycle. */
+ int time; /* The absolute scheduling cycle (time >= asap). */
+
+ /* Information about register-moves needed for
+ definitions in the instruction. */
+ VEC (regmove_info_ptr, heap) *insn_regmove_info;
+
int row; /* Holds time % ii. */
int stage; /* Holds time / ii. */
@@ -406,12 +432,58 @@ set_node_sched_params (ddg_ptr g)
appropriate sched_params structure. */
for (i = 0; i < g->num_nodes; i++)
{
+ rtx insn = g->nodes[i].insn;
+ rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
+ rtx set = single_set (insn);
+
/* Watch out for aliasing problems? */
node_sched_params[i].asap = g->nodes[i].aux.count;
+ node_sched_params[i].insn_regmove_info = NULL;
+
+ /* Record the definition(s) in the instruction. These will be
+ later used to calculate the register-moves needed for each
+ definition. */
+ if (set && REG_P (SET_DEST (set)))
+ {
+ regmove_info_ptr elt =
+ (regmove_info_ptr) xcalloc (1, sizeof (struct regmove_info));
+
+ elt->def = SET_DEST (set);
+ VEC_safe_push (regmove_info_ptr, heap,
+ node_sched_params[i].insn_regmove_info,
+ elt);
+ }
+
+ if (note)
+ for_each_inc_dec (&insn, record_inc_dec_insn_info,
+ &node_sched_params[i]);
+
g->nodes[i].aux.info = &node_sched_params[i];
}
}
+/* Free the sched_params information allocated for each node. */
+static void
+free_node_sched_params (ddg_ptr g)
+{
+ int i;
+ regmove_info_ptr def;
+
+ for (i = 0; i < g->num_nodes; i++)
+ {
+ int j;
+ VEC (regmove_info_ptr, heap) *rinfo =
+ node_sched_params[i].insn_regmove_info;
+
+ for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+ free (def);
+
+ VEC_free (regmove_info_ptr, heap, rinfo);
+ }
+
+ free (node_sched_params);
+}
+
static void
print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
{
@@ -421,20 +493,32 @@ print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
return;
for (i = 0; i < num_nodes; i++)
{
+ int k;
node_sched_params_ptr nsp = &node_sched_params[i];
- rtx reg_move = nsp->first_reg_move;
- int j;
+ regmove_info_ptr def;
+ VEC (regmove_info_ptr, heap) *rinfo =
+ nsp->insn_regmove_info;
fprintf (file, "Node = %d; INSN = %d\n", i,
(INSN_UID (g->nodes[i].insn)));
fprintf (file, " asap = %d:\n", nsp->asap);
fprintf (file, " time = %d:\n", nsp->time);
- fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
- for (j = 0; j < nsp->nreg_moves; j++)
+
+ /* Iterate over the definitions in the instruction printing the
+ reg-moves needed definition for each definition. */
+ for (k = 0; VEC_iterate (regmove_info_ptr, rinfo, k, def); k++)
{
- fprintf (file, " reg_move = ");
- print_rtl_single (file, reg_move);
- reg_move = PREV_INSN (reg_move);
+ rtx reg_move = def->first_reg_move;
+ int j;
+ fprintf (file, "def:\n");
+ print_rtl_single (file, def->def);
+ fprintf (file, " nreg_moves = %d\n", def->nreg_moves);
+ for (j = 0; j < def->nreg_moves; j++)
+ {
+ fprintf (file, " reg_move = ");
+ print_rtl_single (file, reg_move);
+ reg_move = PREV_INSN (reg_move);
+ }
}
}
}
@@ -455,17 +539,20 @@ generate_reg_moves (partial_schedule_ptr ps, bool rescan)
{
ddg_ptr g = ps->g;
int ii = ps->ii;
- int i;
+ int i, j;
struct undo_replace_buff_elem *reg_move_replaces = NULL;
for (i = 0; i < g->num_nodes; i++)
{
ddg_node_ptr u = &g->nodes[i];
ddg_edge_ptr e;
- int nreg_moves = 0, i_reg_move;
- sbitmap *uses_of_defs;
+ int i_reg_move;
rtx last_reg_move;
rtx prev_reg, old_reg;
+ bool need_reg_moves_p = false;
+ VEC (regmove_info_ptr, heap) *rinfo =
+ node_sched_params[i].insn_regmove_info;
+ regmove_info_ptr def;
/* Compute the number of reg_moves needed for u, by looking at life
ranges started at u (excluding self-loops). */
@@ -483,18 +570,41 @@ generate_reg_moves (partial_schedule_ptr ps, bool rescan)
&& SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
nreg_moves4e--;
- nreg_moves = MAX (nreg_moves, nreg_moves4e);
+ /* Iterate over the definitions in the instruction and record
+ the information about reg-moves needed for each one. */
+ for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+ {
+ if (rtx_referenced_p (def->def, e->dest->insn))
+ {
+ rtx set = single_set (e->dest->insn);
+
+ /* Check that the TRUE_DEP edge belongs to the current
+ definition. */
+ if (set && REG_P (SET_DEST (set))
+ && (SET_DEST (set) == def->def))
+ continue;
+
+ def->nreg_moves = MAX (def->nreg_moves, nreg_moves4e);
+ if (def->nreg_moves != 0)
+ need_reg_moves_p = true;
+ }
+ }
}
- if (nreg_moves == 0)
+ if (!need_reg_moves_p)
continue;
- /* Every use of the register defined by node may require a different
- copy of this register, depending on the time the use is scheduled.
- Set a bitmap vector, telling which nodes use each copy of this
- register. */
- uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
- sbitmap_vector_zero (uses_of_defs, nreg_moves);
+ for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+ {
+ def->aux =
+ (sbitmap *) sbitmap_vector_alloc (def->nreg_moves, g->num_nodes);
+
+ /* Every use of the register defined by node may require a different
+ copy of this register, depending on the time the use is scheduled.
+ Set a bitmap vector, telling which nodes use each copy of this
+ register. */
+ sbitmap_vector_zero ((sbitmap *)def->aux, def->nreg_moves);
+ }
for (e = u->out; e; e = e->next_out)
if (e->type == TRUE_DEP && e->dest != e->src)
{
@@ -508,55 +618,79 @@ generate_reg_moves (partial_schedule_ptr ps, bool rescan)
dest_copy--;
if (dest_copy)
- SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
+ {
+ /* Iterate over the definitions in the instruction and record
+ the information about reg-moves needed for each one. */
+ for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
+ {
+ sbitmap *uses_of_def = (sbitmap *)def->aux;
+
+ if (rtx_referenced_p (def->def, e->dest->insn))
+ {
+ rtx set = single_set (e->dest->insn);
+
+ /* Check that the TRUE_DEP edge belongs to the current
+ definition. */
+ if (set && REG_P (SET_DEST (set))
+ && (SET_DEST (set) == def->def))
+ continue;
+
+ SET_BIT (uses_of_def[dest_copy - 1], e->dest->cuid);
+ }
+ }
+ }
}
/* Now generate the reg_moves, attaching relevant uses to them. */
- SCHED_NREG_MOVES (u) = nreg_moves;
- old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
- /* Insert the reg-moves right before the notes which precede
- the insn they relates to. */
- last_reg_move = u->first_note;
-
- for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
+ for (j = 0; VEC_iterate (regmove_info_ptr, rinfo, j, def); j++)
{
- unsigned int i_use = 0;
- rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
- rtx reg_move = gen_move_insn (new_reg, prev_reg);
- sbitmap_iterator sbi;
-
- add_insn_before (reg_move, last_reg_move, NULL);
- last_reg_move = reg_move;
+ sbitmap *uses_of_def = (sbitmap *)def->aux;
+ old_reg = prev_reg = copy_rtx (def->def);
- if (!SCHED_FIRST_REG_MOVE (u))
- SCHED_FIRST_REG_MOVE (u) = reg_move;
+ /* Insert the reg-moves right before the notes which precede
+ the insn they relates to. */
+ last_reg_move = u->first_note;
- EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
+ for (i_reg_move = 0; i_reg_move < def->nreg_moves; i_reg_move++)
{
- struct undo_replace_buff_elem *rep;
+ unsigned int i_use = 0;
+ rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
+ rtx reg_move = gen_move_insn (new_reg, prev_reg);
+ sbitmap_iterator sbi;
- rep = (struct undo_replace_buff_elem *)
- xcalloc (1, sizeof (struct undo_replace_buff_elem));
- rep->insn = g->nodes[i_use].insn;
- rep->orig_reg = old_reg;
- rep->new_reg = new_reg;
+ add_insn_before (reg_move, last_reg_move, NULL);
+ last_reg_move = reg_move;
+
+ if (!def->first_reg_move)
+ def->first_reg_move = reg_move;
- if (! reg_move_replaces)
- reg_move_replaces = rep;
- else
+ EXECUTE_IF_SET_IN_SBITMAP (uses_of_def[i_reg_move], 0, i_use, sbi)
{
- rep->next = reg_move_replaces;
- reg_move_replaces = rep;
+ struct undo_replace_buff_elem *rep;
+
+ rep = (struct undo_replace_buff_elem *)
+ xcalloc (1, sizeof (struct undo_replace_buff_elem));
+ rep->insn = g->nodes[i_use].insn;
+ rep->orig_reg = old_reg;
+ rep->new_reg = new_reg;
+
+ if (! reg_move_replaces)
+ reg_move_replaces = rep;
+ else
+ {
+ rep->next = reg_move_replaces;
+ reg_move_replaces = rep;
+ }
+
+ replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
+ if (rescan)
+ df_insn_rescan (g->nodes[i_use].insn);
}
- replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
- if (rescan)
- df_insn_rescan (g->nodes[i_use].insn);
+ prev_reg = new_reg;
}
-
- prev_reg = new_reg;
+ sbitmap_vector_free (uses_of_def);
}
- sbitmap_vector_free (uses_of_defs);
}
return reg_move_replaces;
}
@@ -575,6 +709,33 @@ free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
}
}
+/* Update the sched_params for node U using the II,
+ the CYCLE of U and MIN_CYCLE. */
+static void
+update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
+{
+ int sc_until_cycle_zero;
+ int stage;
+
+ SCHED_TIME (u) = cycle;
+ SCHED_ROW (u) = SMODULO (cycle, ii);
+
+ /* The calculation of stage count is done adding the number
+ of stages before cycle zero and after cycle zero. */
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+
+ if (SCHED_TIME (u) < 0)
+ {
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+ }
+ else
+ {
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+ }
+}
+
/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
will move to cycle zero. */
@@ -591,15 +752,14 @@ reset_sched_times (partial_schedule_ptr ps, int amount)
ddg_node_ptr u = crr_insn->node;
int normalized_time = SCHED_TIME (u) - amount;
int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
- int sc_until_cycle_zero, stage;
if (dump_file)
{
/* Print the scheduling times after the rotation. */
fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
"crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid,
- INSN_UID (crr_insn->node->insn), SCHED_TIME (u),
- normalized_time);
+ INSN_UID (crr_insn->node->insn), normalized_time,
+ new_min_cycle);
if (JUMP_P (crr_insn->node->insn))
fprintf (dump_file, " (branch)");
fprintf (dump_file, "\n");
@@ -607,23 +767,9 @@ reset_sched_times (partial_schedule_ptr ps, int amount)
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- SCHED_TIME (u) = normalized_time;
- SCHED_ROW (u) = SMODULO (normalized_time, ii);
-
- /* The calculation of stage count is done adding the number
- of stages before cycle zero and after cycle zero. */
- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
-
- if (SCHED_TIME (u) < 0)
- {
- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
- SCHED_STAGE (u) = sc_until_cycle_zero - stage;
- }
- else
- {
- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
- }
+
+ crr_insn->cycle = normalized_time;
+ update_node_sched_params (u, ii, normalized_time, new_min_cycle);
}
}
@@ -660,6 +806,206 @@ permute_partial_schedule (partial_schedule_ptr ps, rtx last)
PREV_INSN (last));
}
+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+ respectively only if cycle C falls in the scheduling window boundaries
+ marked by START and END cycles. STEP is the direction of the window.
+ */
+static inline void
+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
+ sbitmap *tmp_precede, sbitmap must_precede, int c,
+ int start, int end, int step)
+{
+ *tmp_precede = NULL;
+ *tmp_follow = NULL;
+
+ if (c == start)
+ {
+ if (step == 1)
+ *tmp_precede = must_precede;
+ else /* step == -1. */
+ *tmp_follow = must_follow;
+ }
+ if (c == end - step)
+ {
+ if (step == 1)
+ *tmp_follow = must_follow;
+ else /* step == -1. */
+ *tmp_precede = must_precede;
+ }
+
+}
+
+/* Return True if the branch can be moved to row ii-1 while
+ normalizing the partial schedule PS to start from cycle zero and thus
+ optimize the SC. Otherwise return False. */
+static bool
+optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
+{
+ int amount = PS_MIN_CYCLE (ps);
+ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
+ int start, end, step;
+ int ii = ps->ii;
+ bool ok = false;
+ int stage_count, stage_count_curr;
+
+ /* Compare the SC after normalization and SC after bringing the branch
+ to row ii-1. If they are equal just bail out. */
+ stage_count = calculate_stage_count (ps, amount);
+ stage_count_curr =
+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch) + 1);
+
+ if (stage_count == stage_count_curr)
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS SC already optimized.\n");
+
+ ok = false;
+ goto clear;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS Trying to optimize branch location\n");
+ fprintf (dump_file, "SMS partial schedule before trial:\n");
+ print_partial_schedule (ps, dump_file);
+ }
+
+ /* First, normailize the partial schedualing. */
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "SMS partial schedule after normalization (ii, %d, SC %d):\n",
+ ii, stage_count);
+ print_partial_schedule (ps, dump_file);
+ }
+
+ if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
+ {
+ ok = true;
+ goto clear;
+ }
+
+ sbitmap_ones (sched_nodes);
+
+ /* Calculate the new placement of the branch. It should be in row
+ ii-1 and fall into it's scheduling window. */
+ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
+ &step, &end) == 0)
+ {
+ bool success;
+ ps_insn_ptr next_ps_i;
+ int branch_cycle = SCHED_TIME (g->closing_branch);
+ int row = SMODULO (branch_cycle, ps->ii);
+ int num_splits = 0;
+ sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+ int c;
+
+ if (dump_file)
+ fprintf (dump_file, "\nTrying to schedule node %d "
+ "INSN = %d in (%d .. %d) step %d\n",
+ g->closing_branch->cuid,
+ (INSN_UID (g->closing_branch->insn)), start, end, step);
+
+ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
+ if (step == 1)
+ {
+ c = start + ii - SMODULO (start, ii) - 1;
+ gcc_assert (c >= start);
+ if (c >= end)
+ {
+ ok = false;
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ goto clear;
+ }
+ }
+ else
+ {
+ c = start - SMODULO (start, ii) - 1;
+ gcc_assert (c <= start);
+
+ if (c <= end)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ ok = false;
+ goto clear;
+ }
+ }
+
+ must_precede = sbitmap_alloc (g->num_nodes);
+ must_follow = sbitmap_alloc (g->num_nodes);
+
+ /* Try to schedule the branch is it's new cycle. */
+ calculate_must_precede_follow (g->closing_branch, start, end,
+ step, ii, sched_nodes,
+ must_precede, must_follow);
+
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, c, start, end, step);
+
+ /* Find the element in the partial schedule related to the closing
+ branch so we can remove it from it's current cycle. */
+ for (next_ps_i = ps->rows[row];
+ next_ps_i; next_ps_i = next_ps_i->next_in_row)
+ if (next_ps_i->node->cuid == g->closing_branch->cuid)
+ break;
+
+ gcc_assert (next_ps_i);
+ gcc_assert (remove_node_from_ps (ps, next_ps_i));
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid, c,
+ sched_nodes, &num_splits,
+ tmp_precede, tmp_follow);
+ gcc_assert (num_splits == 0);
+ if (!success)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d, "
+ "bringing it back to cycle %d\n", c, branch_cycle);
+
+ /* The branch was failed to be placed in row ii - 1.
+ Put it back in it's original place in the partial
+ schedualing. */
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, branch_cycle, start, end,
+ step);
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid,
+ branch_cycle, sched_nodes,
+ &num_splits, tmp_precede,
+ tmp_follow);
+ gcc_assert (success && (num_splits == 0));
+ ok = false;
+ }
+ else
+ {
+ /* The branch is placed in row ii - 1. */
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS success in moving branch to cycle %d\n", c);
+
+ update_node_sched_params (g->closing_branch, ii, c,
+ PS_MIN_CYCLE (ps));
+ ok = true;
+ }
+
+ free (must_precede);
+ free (must_follow);
+ }
+
+clear:
+ free (sched_nodes);
+ return ok;
+}
+
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int to_stage, int for_prolog, rtx count_reg)
@@ -671,7 +1017,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
{
ddg_node_ptr u_node = ps_ij->node;
- int j, i_reg_moves;
+ int i_reg_moves;
rtx reg_move = NULL_RTX;
/* Do not duplicate any insn which refers to count_reg as it
@@ -686,43 +1032,68 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
if (for_prolog)
{
- /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
- number of reg_moves starting with the second occurrence of
- u_node, which is generated if its SCHED_STAGE <= to_stage. */
- i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
- i_reg_moves = MAX (i_reg_moves, 0);
- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
-
- /* The reg_moves start from the *first* reg_move backwards. */
- if (i_reg_moves)
+ int i;
+ VEC (regmove_info_ptr, heap) *rinfo =
+ node_sched_params[u_node->cuid].insn_regmove_info;
+ regmove_info_ptr def;
+
+ for (i = 0; VEC_iterate (regmove_info_ptr, rinfo, i, def); i++)
{
- reg_move = SCHED_FIRST_REG_MOVE (u_node);
- for (j = 1; j < i_reg_moves; j++)
- reg_move = PREV_INSN (reg_move);
+ int j;
+
+ /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
+ number of reg_moves starting with the second occurrence of
+ u_node, which is generated if its SCHED_STAGE <= to_stage. */
+ i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
+ i_reg_moves = MAX (i_reg_moves, 0);
+ i_reg_moves = MIN (i_reg_moves, def->nreg_moves);
+
+ /* The reg_moves start from the *first* reg_move backwards. */
+ if (i_reg_moves)
+ {
+ reg_move = def->first_reg_move;
+ for (j = 1; j < i_reg_moves; j++)
+ reg_move = PREV_INSN (reg_move);
+ }
+
+ for (j = 0; j < i_reg_moves;
+ j++, reg_move = NEXT_INSN (reg_move))
+ emit_insn (copy_rtx (PATTERN (reg_move)));
}
}
else /* It's for the epilog. */
{
- /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
- starting to decrease one stage after u_node no longer occurs;
- that is, generate all reg_moves until
- SCHED_STAGE (u_node) == from_stage - 1. */
- i_reg_moves = SCHED_NREG_MOVES (u_node)
- - (from_stage - SCHED_STAGE (u_node) - 1);
- i_reg_moves = MAX (i_reg_moves, 0);
- i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
-
- /* The reg_moves start from the *last* reg_move forwards. */
- if (i_reg_moves)
+ int i;
+ VEC (regmove_info_ptr, heap) *rinfo =
+ node_sched_params[u_node->cuid].insn_regmove_info;
+ regmove_info_ptr def;
+
+ for (i = 0; VEC_iterate (regmove_info_ptr, rinfo, i, def); i++)
{
- reg_move = SCHED_FIRST_REG_MOVE (u_node);
- for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
- reg_move = PREV_INSN (reg_move);
+ int j;
+
+ /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
+ starting to decrease one stage after u_node no longer occurs;
+ that is, generate all reg_moves until
+ SCHED_STAGE (u_node) == from_stage - 1. */
+ i_reg_moves = def->nreg_moves
+ - (from_stage - SCHED_STAGE (u_node) - 1);
+ i_reg_moves = MAX (i_reg_moves, 0);
+ i_reg_moves = MIN (i_reg_moves, def->nreg_moves);
+
+ /* The reg_moves start from the *last* reg_move forwards. */
+ if (i_reg_moves)
+ {
+ reg_move = def->first_reg_move;
+ for (j = 1; j < def->nreg_moves; j++)
+ reg_move = PREV_INSN (reg_move);
+ }
+
+ for (j = 0; j < i_reg_moves;
+ j++, reg_move = NEXT_INSN (reg_move))
+ emit_insn (copy_rtx (PATTERN (reg_move)));
}
}
-
- for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
- emit_insn (copy_rtx (PATTERN (reg_move)));
if (SCHED_STAGE (u_node) >= from_stage
&& SCHED_STAGE (u_node) <= to_stage)
duplicate_insn_chain (u_node->first_note, u_node->insn);
@@ -911,6 +1282,25 @@ setup_sched_infos (void)
/* Used to calculate the upper bound of ii. */
#define MAXII_FACTOR 2
+/* Callback for for_each_inc_dec. Records in ARG the register DEST
+ which is defined by the auto operation. */
+static int
+record_inc_dec_insn_info (rtx mem ATTRIBUTE_UNUSED,
+ rtx op ATTRIBUTE_UNUSED,
+ rtx dest, rtx src ATTRIBUTE_UNUSED,
+ rtx srcoff ATTRIBUTE_UNUSED, void *arg)
+{
+ node_sched_params_ptr params = (node_sched_params_ptr) arg;
+ regmove_info_ptr insn_regmove_info =
+ (regmove_info_ptr) xcalloc (1, sizeof (struct regmove_info));
+
+ insn_regmove_info->def = copy_rtx (dest);
+ VEC_safe_push (regmove_info_ptr, heap, params->insn_regmove_info,
+ insn_regmove_info);
+
+ return -1;
+}
+
/* Main entry point, perform SMS scheduling on the loops of the function
that consist of single basic blocks. */
static void
@@ -927,6 +1317,7 @@ sms_schedule (void)
basic_block condition_bb = NULL;
edge latch_edge;
gcov_type trip_count = 0;
+ int temp;
loop_optimizer_init (LOOPS_HAVE_PREHEADERS
| LOOPS_HAVE_RECORDED_EXITS);
@@ -936,21 +1327,18 @@ sms_schedule (void)
return; /* There are no loops to schedule. */
}
+ temp = reload_completed;
+ reload_completed = 1;
/* Initialize issue_rate. */
if (targetm.sched.issue_rate)
- {
- int temp = reload_completed;
-
- reload_completed = 1;
- issue_rate = targetm.sched.issue_rate ();
- reload_completed = temp;
- }
+ issue_rate = targetm.sched.issue_rate ();
else
issue_rate = 1;
/* Initialize the scheduler. */
setup_sched_infos ();
haifa_sched_init ();
+ reload_completed = temp;
/* Allocate memory to hold the DDG array one entry for each loop.
We use loop->num as index into this array. */
@@ -1042,12 +1430,9 @@ sms_schedule (void)
continue;
}
- /* Don't handle BBs with calls or barriers or auto-increment insns
- (to avoid creating invalid reg-moves for the auto-increment insns),
- or !single_set with the exception of instructions that include
- count_reg---these instructions are part of the control part
- that do-loop recognizes.
- ??? Should handle auto-increment insns.
+ /* Don't handle BBs with calls or barriers, or !single_set insns
+ with the exception of instructions that include count_reg---these
+ instructions are part of the control part that do-loop recognizes
??? Should handle insns defining subregs. */
for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
{
@@ -1058,7 +1443,6 @@ sms_schedule (void)
|| (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
&& !reg_mentioned_p (count_reg, insn))
- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|| (INSN_P (insn) && (set = single_set (insn))
&& GET_CODE (SET_DEST (set)) == SUBREG))
break;
@@ -1072,8 +1456,6 @@ sms_schedule (void)
fprintf (dump_file, "SMS loop-with-call\n");
else if (BARRIER_P (insn))
fprintf (dump_file, "SMS loop-with-barrier\n");
- else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
- fprintf (dump_file, "SMS reg inc\n");
else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
&& !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
fprintf (dump_file, "SMS loop-with-not-single-set\n");
@@ -1115,6 +1497,7 @@ sms_schedule (void)
int mii, rec_mii;
unsigned stage_count = 0;
HOST_WIDEST_INT loop_count = 0;
+ bool opt_sc_p = false;
if (! (g = g_arr[loop->num]))
continue;
@@ -1197,12 +1580,30 @@ sms_schedule (void)
ps = sms_schedule_by_order (g, mii, maxii, node_order);
- if (ps)
- {
- stage_count = calculate_stage_count (ps);
- gcc_assert(stage_count >= 1);
- PS_STAGE_COUNT(ps) = stage_count;
- }
+ if (ps)
+ {
+ /* Try to achieve optimized SC by normalizing the partial
+ schedule (having the cycles start from cycle zero). The branch
+ location must be placed in row ii-1 in the final scheduling.
+ If that's not the case after the normalization then try to
+ move the branch to that row if possible. */
+ opt_sc_p = optimize_sc (ps, g);
+ if (opt_sc_p)
+ stage_count = calculate_stage_count (ps, 0);
+ else
+ {
+ /* Bring the branch to cycle -1. */
+ int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ if (dump_file)
+ fprintf (dump_file, "SMS schedule branch at cycle -1\n");
+
+ stage_count = calculate_stage_count (ps, amount);
+ }
+
+ gcc_assert (stage_count >= 1);
+ PS_STAGE_COUNT (ps) = stage_count;
+ }
/* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1 means that there is no interleaving between iterations thus
@@ -1224,12 +1625,16 @@ sms_schedule (void)
else
{
struct undo_replace_buff_elem *reg_move_replaces;
- int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ if (!opt_sc_p)
+ {
+ /* Rotate the partial schedule to have the branch in row ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ }
- /* Set the stage boundaries. The closing_branch was scheduled
- and should appear in the last (ii-1) row. */
- reset_sched_times (ps, amount);
- rotate_partial_schedule (ps, amount);
set_columns_for_ps (ps);
canon_loop (loop);
@@ -1280,7 +1685,7 @@ sms_schedule (void)
}
free_partial_schedule (ps);
- free (node_sched_params);
+ free_node_sched_params (g);
free (node_order);
free_ddg (g);
}
@@ -1381,13 +1786,11 @@ sms_schedule (void)
scheduling window is empty and zero otherwise. */
static int
-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
{
int start, step, end;
ddg_edge_ptr e;
- int u = nodes_order [i];
- ddg_node_ptr u_node = &ps->g->nodes[u];
sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
@@ -1799,7 +2202,7 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
/* Try to get non-empty scheduling window. */
success = 0;
- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
+ if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
&step, &end) == 0)
{
if (dump_file)
@@ -1819,21 +2222,9 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
sbitmap tmp_precede = NULL;
sbitmap tmp_follow = NULL;
- if (c == start)
- {
- if (step == 1)
- tmp_precede = must_precede;
- else /* step == -1. */
- tmp_follow = must_follow;
- }
- if (c == end - step)
- {
- if (step == 1)
- tmp_follow = must_follow;
- else /* step == -1. */
- tmp_precede = must_precede;
- }
-
+ set_must_precede_follow (&tmp_follow, must_follow,
+ &tmp_precede, must_precede,
+ c, start, end, step);
success =
try_scheduling_node_in_cycle (ps, u_node, u, c,
sched_nodes,
@@ -2550,8 +2941,13 @@ print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
fprintf (dump, "\n[ROW %d ]: ", i);
while (ps_i)
{
- fprintf (dump, "%d, ",
- INSN_UID (ps_i->node->insn));
+ if (JUMP_P (ps_i->node->insn))
+ fprintf (dump, "%d (branch), ",
+ INSN_UID (ps_i->node->insn));
+ else
+ fprintf (dump, "%d, ",
+ INSN_UID (ps_i->node->insn));
+
ps_i = ps_i->next_in_row;
}
}
@@ -2893,12 +3289,10 @@ ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
}
/* Calculate the stage count of the partial schedule PS. The calculation
- takes into account the rotation to bring the closing branch to row
- ii-1. */
+ takes into account the rotation amount passed in ROTATION_AMOUNT. */
int
-calculate_stage_count (partial_schedule_ptr ps)
+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
{
- int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);