@@ -49,6 +49,10 @@ along with GCC; see the file COPYING3. If not see
/* A flag indicating that a ddg edge belongs to an SCC or not. */
enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
+/* A vector of dependencies needed while processing clobbers. */
+DEF_VEC_P(df_ref);
+DEF_VEC_ALLOC_P(df_ref,heap);
+
/* Forward declarations. */
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
@@ -178,23 +182,45 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
whose register has multiple defs in the loop. */
if (flag_modulo_sched_allow_regmoves && (t == ANTI_DEP && dt == REG_DEP))
{
- rtx set;
+ bool can_delete_dep = true;
+ unsigned regno;
+ reg_set_iterator rsi;
+ regset src_uses, dest_sets, regs;
+
+ /* Register sets from modulo scheduler structures. */
+ src_uses = extract_from_insn_map (src_node->insn, 0);
+ dest_sets = extract_from_insn_map (dest_node->insn, 1);
+
+ if (!src_uses || !dest_sets)
+ return;
- set = single_set (dest_node->insn);
- /* TODO: Handle registers that REG_P is not true for them, i.e.
- subregs and special registers. */
- if (set && REG_P (SET_DEST (set)))
+ /* Build regset intersection. */
+ regs = ALLOC_REG_SET (®_obstack);
+ COPY_REG_SET (regs, src_uses);
+ AND_REG_SET (regs, dest_sets);
+
+ EXECUTE_IF_SET_IN_REG_SET (regs, 0, regno, rsi)
{
- int regno = REGNO (SET_DEST (set));
df_ref first_def;
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
first_def = df_bb_regno_first_def_find (g->bb, regno);
gcc_assert (first_def);
- if (bitmap_bit_p (&bb_info->gen, DF_REF_ID (first_def)))
- return;
+ /* CC-flags and other hard registers can't be renamed.
+ Check whether loop kernel has only one def. */
+ if (HARD_REGISTER_NUM_P (regno)
+ || !bitmap_bit_p (&bb_info->gen, DF_REF_ID (first_def)))
+ {
+ can_delete_dep = false;
+ break;
+ }
}
+
+ FREE_REG_SET (regs);
+
+ if (can_delete_dep)
+ return;
}
latency = dep_cost (link);
@@ -247,16 +273,20 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to,
static void
add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
{
- int regno = DF_REF_REGNO (last_def);
+ unsigned int regno = DF_REF_REGNO (last_def);
struct df_link *r_use;
int has_use_in_bb_p = false;
- rtx def_insn = DF_REF_INSN (last_def);
+ rtx insn, def_insn = DF_REF_INSN (last_def);
ddg_node_ptr last_def_node = get_node_of_insn (g, def_insn);
ddg_node_ptr use_node;
+ df_ref *def_rec;
+ unsigned int uid;
+ static VEC(df_ref,heap) *all_defs;
#ifdef ENABLE_CHECKING
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
#endif
df_ref first_def = df_bb_regno_first_def_find (g->bb, regno);
+ bool first_write = true;
gcc_assert (last_def_node);
gcc_assert (first_def);
@@ -267,6 +297,31 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
DF_REF_ID (first_def)));
#endif
+ all_defs = VEC_alloc (df_ref, heap, 0);
+
+ /* Find all defs which are clobbers and the first normal write def. */
+ FOR_BB_INSNS (g->bb, insn)
+ {
+ if (!INSN_P (insn))
+ continue;
+ uid = INSN_UID (insn);
+ for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ {
+ df_ref def = *def_rec;
+ if (DF_REF_REGNO (def) == regno)
+ {
+ bool is_clobber = DF_REF_FLAGS (def) & (DF_REF_MUST_CLOBBER
+ | DF_REF_MAY_CLOBBER);
+ if (is_clobber || first_write)
+ {
+ VEC_safe_push (df_ref, heap, all_defs, def);
+ if (!is_clobber)
+ first_write = false;
+ }
+ }
+ }
+ }
+
/* Create inter-loop true dependences and anti dependences. */
for (r_use = DF_REF_CHAIN (last_def); r_use != NULL; r_use = r_use->next)
{
@@ -290,25 +345,32 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
}
else if (!DEBUG_INSN_P (use_insn))
{
+ unsigned int i;
+ df_ref curr_def;
/* Add anti deps from last_def's uses in the current iteration
- to the first def in the next iteration. We do not add ANTI
- dep when there is an intra-loop TRUE dep in the opposite
- direction, but use regmoves to fix such disregarded ANTI
- deps when broken. If the first_def reaches the USE then
- there is such a dep. */
- ddg_node_ptr first_def_node = get_node_of_insn (g,
- DF_REF_INSN (first_def));
-
- gcc_assert (first_def_node);
-
- /* Always create the edge if the use node is a branch in
- order to prevent the creation of reg-moves. */
- if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
- || !flag_modulo_sched_allow_regmoves
- || (flag_modulo_sched_allow_regmoves && JUMP_P (use_node->insn)))
- create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
- REG_DEP, 1);
-
+ to the first def and all clobbers in the next iteration.
+ We do not add ANTI dep when there is an intra-loop TRUE dep
+ in the opposite direction, but use regmoves to fix such
+ disregarded ANTI deps when broken. If the curr_def reaches
+ the USE then there is such a dep. */
+ FOR_EACH_VEC_ELT (df_ref, all_defs, i, curr_def)
+ {
+ if (DF_REF_ID (last_def) != DF_REF_ID (curr_def)
+ /* Some hard regs (for ex. CC-flags) can't be renamed. */
+ || HARD_REGISTER_P (DF_REF_REG (last_def))
+ || !flag_modulo_sched_allow_regmoves
+ /* Always create the edge if the use node is a branch in
+ order to prevent the creation of reg-moves. */
+ || (flag_modulo_sched_allow_regmoves
+ && JUMP_P (use_node->insn)))
+ {
+ ddg_node_ptr curr_def_node = get_node_of_insn (g,
+ DF_REF_INSN (curr_def));
+ gcc_assert (curr_def_node);
+ create_ddg_dep_no_link (g, use_node, curr_def_node,
+ ANTI_DEP, REG_DEP, 1);
+ }
+ }
}
}
/* Create an inter-loop output dependence between LAST_DEF (which is the
@@ -318,18 +380,15 @@ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
defs starting with a true dependence to a use which can be in the
next iteration; followed by an anti dependence of that use to the
first def (i.e. if there is a use between the two defs.) */
- if (!has_use_in_bb_p)
+ if (!has_use_in_bb_p && DF_REF_ID (last_def) != DF_REF_ID (first_def))
{
- ddg_node_ptr dest_node;
-
- if (DF_REF_ID (last_def) == DF_REF_ID (first_def))
- return;
-
- dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
+ ddg_node_ptr dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
gcc_assert (dest_node);
create_ddg_dep_no_link (g, last_def_node, dest_node,
OUTPUT_DEP, REG_DEP, 1);
}
+
+ VEC_free (df_ref, heap, all_defs);
}
/* Build inter-loop dependencies, by looking at DF analysis backwards. */
static void
@@ -466,6 +525,8 @@ build_intra_loop_deps (ddg_ptr g)
/* Build the dependence information, using the sched_analyze function. */
init_deps_global ();
+ /* Set sms hooks and initialize additional structures. */
+ sms_sched_analyze_init ();
init_deps (&tmp_deps, false);
/* Do the intra-block data dependence analysis for the given block. */
@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "dbgcnt.h"
#include "df.h"
+#include "pointer-set.h"
#ifdef INSN_SCHEDULING
@@ -200,9 +201,10 @@ static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
static void permute_partial_schedule (partial_schedule_ptr, rtx);
static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
- rtx, rtx);
+ rtx, bool, bool, rtx, HOST_WIDEST_INT,
+ bool, HOST_WIDEST_INT, rtx *);
static void duplicate_insns_of_cycles (partial_schedule_ptr,
- int, int, int, rtx);
+ int, int, int, rtx, bool);
static int calculate_stage_count (partial_schedule_ptr, int);
static void calculate_must_precede_follow (ddg_node_ptr, int, int,
int, int, sbitmap, sbitmap, sbitmap);
@@ -264,7 +266,162 @@ typedef struct node_sched_params
} *node_sched_params_ptr;
-/* The following three functions are copied from the current scheduler
+/* Next data structures and callbacks help to store information about
+ using registers in insn. */
+static struct sched_deps_info_def old_sched_deps_info;
+
+/* Two regsets to store which registers the insn reads and writes. */
+typedef struct regset_pair_def
+{
+ regset uses;
+ regset sets;
+} regset_pair;
+
+/* Allocate memory for regset_pair structure. */
+static regset_pair*
+regset_pair_init (void)
+{
+ regset_pair *trs = (regset_pair *)xcalloc (1, sizeof (regset_pair));
+ trs->uses = ALLOC_REG_SET (®_obstack);
+ trs->sets = ALLOC_REG_SET (®_obstack);
+ return trs;
+}
+
+/* Pointer map is used to find a reg sets for insn. */
+static struct pointer_map_t *insn_map;
+
+/* Find (or create) regset_pair for INSN in pointer_map. */
+static regset_pair*
+regset_pair_get (rtx insn)
+{
+ void **slot = pointer_map_contains (insn_map, insn);
+ if (!slot)
+ {
+ slot = pointer_map_insert (insn_map, insn);
+ *slot = regset_pair_init ();
+ }
+ return (regset_pair*)*slot;
+}
+
+/* Callback for pointer_map_traverse to free memory used by regset_pair. */
+static bool
+destroy_regset_pair (const void *key ATTRIBUTE_UNUSED, void **slot,
+ void *data ATTRIBUTE_UNUSED)
+{
+ regset_pair *trs = (regset_pair*)*slot;
+ FREE_REG_SET (trs->uses);
+ FREE_REG_SET (trs->sets);
+ free (trs);
+ return true;
+}
+
+/* SMS sched_analyze hooks. Every of them calls original hook first. */
+static rtx curr_insn = NULL_RTX;
+
+static void
+sms_start_insn (rtx insn)
+{
+ old_sched_deps_info.start_insn (insn);
+
+ gcc_assert (insn && !curr_insn);
+ curr_insn = insn;
+ if (dump_file)
+ {
+ fprintf (dump_file, "sms analyze: start insn:\n");
+ print_rtl_single (dump_file, curr_insn);
+ }
+}
+
+static void
+sms_finish_insn (void)
+{
+ old_sched_deps_info.finish_insn ();
+
+ gcc_assert (curr_insn);
+ curr_insn = NULL_RTX;
+ if (dump_file)
+ fprintf (dump_file, "sms analyze: finished insn\n");
+}
+
+static void
+sms_note_reg_set (int regno)
+{
+ regset_pair *trs;
+ old_sched_deps_info.note_reg_set (regno);
+
+ gcc_assert (curr_insn);
+ trs = regset_pair_get (curr_insn);
+ SET_REGNO_REG_SET (trs->sets, regno);
+
+ if (dump_file)
+ fprintf (dump_file, "sms analyze: reg set %d\n", regno);
+}
+
+static void
+sms_note_reg_clobber (int regno)
+{
+ regset_pair *trs;
+ old_sched_deps_info.note_reg_clobber (regno);
+
+ gcc_assert (curr_insn);
+ trs = regset_pair_get (curr_insn);
+ SET_REGNO_REG_SET (trs->sets, regno);
+
+ if (dump_file)
+ fprintf (dump_file, "sms analyze: reg clobber %d\n", regno);
+}
+
+static void
+sms_note_reg_use (int regno)
+{
+ regset_pair *trs;
+ old_sched_deps_info.note_reg_use (regno);
+
+ gcc_assert (curr_insn);
+ trs = regset_pair_get (curr_insn);
+ SET_REGNO_REG_SET (trs->uses, regno);
+
+ if (dump_file)
+ fprintf (dump_file, "sms analyze: reg use %d\n", regno);
+}
+
+/* Extract the saved data about register usage. Bool SETS is true,
+ when we need the set of written regs. */
+regset
+extract_from_insn_map (rtx insn, bool sets)
+{
+ void **slot = pointer_map_contains (insn_map, insn);
+ regset_pair *trs;
+ if (!slot)
+ return NULL;
+ trs = (regset_pair*)*slot;
+ return sets ? trs->sets : trs->uses;
+}
+
+/* Setup SMS hooks. Initialize pointer_map. */
+void
+sms_sched_analyze_init (void)
+{
+ memcpy (&old_sched_deps_info, sched_deps_info,
+ sizeof (struct sched_deps_info_def));
+ sched_deps_info->start_insn = sms_start_insn;
+ sched_deps_info->finish_insn = sms_finish_insn;
+ sched_deps_info->note_reg_set = sms_note_reg_set;
+ sched_deps_info->note_reg_clobber = sms_note_reg_clobber;
+ sched_deps_info->note_reg_use = sms_note_reg_use;
+ insn_map = pointer_map_create ();
+}
+
+/* Free pointer_map with freeing internal structures first. */
+static void
+sms_create_ddg_finish (void)
+{
+ pointer_map_traverse (insn_map, destroy_regset_pair, NULL);
+ pointer_map_destroy (insn_map);
+}
+
+
+/* The following two functions are copied from the current scheduler
code in order to use sched_analyze() for computing the dependencies.
They are used when initializing the sched_info structure. */
static const char *
@@ -287,9 +444,20 @@ static struct common_sched_info_def sms_common_sched_info;
static struct sched_deps_info_def sms_sched_deps_info =
{
compute_jump_reg_dependencies,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
- NULL,
- 0, 0, 0
+ sms_start_insn,
+ sms_finish_insn,
+ NULL, /* start_lhs */
+ NULL, /* finish_lhs */
+ NULL, /* start_rhs */
+ NULL, /* finish_rhs */
+ sms_note_reg_set,
+ sms_note_reg_clobber,
+ sms_note_reg_use,
+ NULL, /* note_mem_dep */
+ NULL, /* note_dep */
+ 0, /* use_cselib */
+ 0, /* use_deps_list */
+ 0 /* generate_spec_deps */
};
static struct haifa_sched_info sms_sched_info =
@@ -311,6 +479,7 @@ static struct haifa_sched_info sms_sched_info =
0
};
+
/* Given HEAD and TAIL which are the first and last insns in a loop;
return the register which controls the loop. Return zero if it has
more than one occurrence in the loop besides the control part or the
@@ -364,37 +533,164 @@ doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
#endif
}
-/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
- that the number of iterations is a compile-time constant. If so,
- return the rtx that sets COUNT_REG to a constant, and set COUNT to
- this constant. Otherwise return 0. */
+/* Same as previous for loop with always-the-same-step counter. */
+static rtx
+nondoloop_register_get (rtx head, rtx tail, int cmp_side,
+ rtx *addsub_output, rtx *cmp_output)
+{
+ rtx insn, reg, flagreg, addsub, cmp, end;
+
+ /* Check jump instruction form */
+ insn = single_set (tail);
+ if (insn == NULL_RTX
+ || SET_DEST (insn) != pc_rtx
+ || GET_CODE (SET_SRC (insn)) != IF_THEN_ELSE
+ || GET_CODE (XEXP (SET_SRC (insn), 1)) != LABEL_REF
+ || XEXP (SET_SRC (insn), 2) != pc_rtx)
+ return NULL_RTX;
+
+ /* Check loop exit condition */
+ insn = XEXP (SET_SRC (insn), 0);
+ if (GET_CODE (insn) != NE || XEXP (insn, 1) != const0_rtx)
+ return NULL_RTX;
+
+ /* Flags register */
+ flagreg = XEXP (insn, 0);
+
+ /* Searching comparison instruction */
+ cmp = PREV_INSN (tail);
+ while (cmp != PREV_INSN (head))
+ {
+ if (INSN_P (cmp) && reg_set_p (flagreg, cmp))
+ break;
+ cmp = PREV_INSN (cmp);
+ }
+ if (cmp == PREV_INSN (head))
+ return NULL_RTX;
+
+ /* Check comparison */
+ insn = single_set (cmp);
+ if (insn == NULL_RTX
+ || ! rtx_equal_p (flagreg, SET_DEST (insn))
+ || GET_CODE (SET_SRC (insn)) != COMPARE)
+ return NULL_RTX;
+
+ /* Loop register */
+ gcc_assert (0 <= cmp_side && cmp_side <= 1);
+ reg = XEXP (SET_SRC (insn), cmp_side);
+ if (! REG_P (reg))
+ return NULL_RTX;
+
+ /* End value */
+ end = XEXP (SET_SRC (insn), 1 - cmp_side);
+ if (! REG_P (end) && ! CONST_INT_P (end))
+ return NULL_RTX;
+
+ /* Searching register add\sub instruction */
+ addsub = PREV_INSN (cmp);
+ while (addsub != PREV_INSN (head))
+ {
+ if (INSN_P (addsub) && reg_set_p (reg, addsub))
+ break;
+ addsub = PREV_INSN (addsub);
+ }
+ if (addsub == PREV_INSN (head))
+ return NULL_RTX;
+
+ /* Checking register change instruction */
+ insn = single_set (addsub);
+ if (insn == NULL_RTX || ! rtx_equal_p (reg, SET_DEST (insn)))
+ return NULL_RTX;
+ insn = SET_SRC (insn);
+ if ((GET_CODE (insn) != PLUS && GET_CODE (insn) != MINUS)
+ || ! rtx_equal_p (reg, XEXP (insn, 0))
+ || ! (CONST_INT_P (XEXP (insn, 1))))
+ return NULL_RTX;
+
+ /* No other REG and END (if reg) modifications allowed */
+ for (insn = head; insn != tail; insn = NEXT_INSN (insn))
+ {
+ if (REG_P(end) && reg_set_p (end, insn))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS end register found ");
+ print_rtl_single (dump_file, reg);
+ fprintf (dump_file, " outside write in insn:\n");
+ print_rtl_single (dump_file, insn);
+ }
+ return NULL_RTX;
+ }
+ if (insn != addsub && reg_set_p (reg, insn))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS count_reg found ");
+ print_rtl_single (dump_file, reg);
+ fprintf (dump_file, " outside write in insn:\n");
+ print_rtl_single (dump_file, insn);
+ }
+ return NULL_RTX;
+ }
+ }
+
+ *addsub_output = addsub;
+ *cmp_output = cmp;
+ return reg;
+}
+
+/* Check if REG is set to a constant in the PRE_HEADER block.
+ If possible to find, return the rtx that sets REG.
+ If REG is set to a constant (probably not directly),
+ set IS_CONST to true and VALUE to that constant value. */
static rtx
-const_iteration_count (rtx count_reg, basic_block pre_header,
- HOST_WIDEST_INT * count)
+search_const_init (basic_block pre_header, rtx reg, bool *is_const,
+ HOST_WIDEST_INT *value)
{
rtx insn;
rtx head, tail;
- if (! pre_header)
- return NULL_RTX;
+ if (!pre_header)
+ {
+ *is_const = false;
+ return NULL_RTX;
+ }
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
- rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
+ rtx_equal_p (reg, SET_DEST (single_set (insn))))
{
- rtx pat = single_set (insn);
+ rtx src, pat = single_set (insn);
+ src = SET_SRC (pat);
- if (CONST_INT_P (SET_SRC (pat)))
+ if (CONST_INT_P (src))
{
- *count = INTVAL (SET_SRC (pat));
- return insn;
+ *is_const = true;
+ *value = INTVAL (src);
}
+ else if (REG_P (src))
+ { /* Check if previous insn sets SRC = constant. */
+ pat = single_set (PREV_INSN (insn));
+ if (pat != NULL_RTX && rtx_equal_p (src, SET_DEST (pat))
+ && CONST_INT_P (SET_SRC (pat)))
+ {
+ *is_const = true;
+ *value = INTVAL (SET_SRC (pat));
+ }
+ else
+ *is_const = false;
+ }
+ else
+ *is_const = false;
- return NULL_RTX;
+ return insn;
}
+ else if (reg_set_p (reg, insn))
+ break;
+ *is_const = false;
return NULL_RTX;
}
@@ -1008,7 +1304,8 @@ clear:
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
- int to_stage, int for_prolog, rtx count_reg)
+ int to_stage, int for_prolog, rtx count_reg,
+ bool doloop_p)
{
int row;
ps_insn_ptr ps_ij;
@@ -1020,14 +1317,14 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int i_reg_moves;
rtx reg_move = NULL_RTX;
- /* Do not duplicate any insn which refers to count_reg as it
- belongs to the control part.
+ /* In doloop case do not duplicate any insn which refers
+ to count_reg as it belongs to the control part.
The closing branch is scheduled as well and thus should
be ignored.
TODO: This should be done by analyzing the control part of
the loop. */
- if (reg_mentioned_p (count_reg, u_node->insn)
- || JUMP_P (ps_ij->node->insn))
+ if ((doloop_p && reg_mentioned_p (count_reg, u_node->insn))
+ || JUMP_P (u_node->insn))
continue;
if (for_prolog)
@@ -1104,7 +1401,10 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
/* Generate the instructions (including reg_moves) for prolog & epilog. */
static void
generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
- rtx count_reg, rtx count_init)
+ rtx count_reg, bool doloop_p, bool count_init_isconst,
+ rtx fin_reg, HOST_WIDEST_INT fin_nonconst_adjust,
+ bool create_reg, HOST_WIDEST_INT reg_val,
+ rtx *created_reg)
{
int i;
int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1113,12 +1413,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
/* Generate the prolog, inserting its insns on the loop-entry edge. */
start_sequence ();
- if (!count_init)
+ if (doloop_p && !count_init_isconst)
{
- /* Generate instructions at the beginning of the prolog to
- adjust the loop count by STAGE_COUNT. If loop count is constant
- (count_init), this constant is adjusted by STAGE_COUNT in
- generate_prolog_epilog function. */
+ /* In doloop we generate instructions at the beginning of the prolog to
+ adjust the initial value of doloop counter by STAGE_COUNT.
+ If loop count is constant, this adjustment is done outside this
+ function, simply correcting the source of initialization insn. */
rtx sub_reg = NULL_RTX;
sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
@@ -1129,8 +1429,40 @@ generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
emit_move_insn (count_reg, sub_reg);
}
+ if (!doloop_p)
+ {
+ /* In non-doloop we generate instructions at the beginning of
+ the prolog to adjust the final value (with this value loop count
+ register is compared to check whether the loop should stop). */
+ if (fin_nonconst_adjust != 0)
+ {
+ /* If the final value is in a register - create another register
+ to store a shifted value. */
+ rtx new_reg, reg = NULL_RTX;
+ reg = gen_reg_rtx (GET_MODE (fin_reg));
+ new_reg = expand_simple_binop (GET_MODE (fin_reg), MINUS, fin_reg,
+ GEN_INT (fin_nonconst_adjust),
+ reg, 0, OPTAB_DIRECT);
+ gcc_assert (REG_P (new_reg));
+ if (REGNO (new_reg) != REGNO (reg))
+ emit_move_insn (reg, new_reg);
+ *created_reg = new_reg;
+ }
+ else if (create_reg)
+ {
+ /* If old final value is an immediate, and the new one can't be
+ an immediate, we create a register to store it. If both values
+ are immediate the adjustment is done outside this fuction,
+ just correcting the constant value in compare intruction. */
+ rtx reg = NULL_RTX;
+ reg = gen_reg_rtx (GET_MODE (count_reg));
+ emit_move_insn (reg, GEN_INT (reg_val));
+ *created_reg = reg;
+ }
+ }
+
for (i = 0; i < last_stage; i++)
- duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
+ duplicate_insns_of_cycles (ps, 0, i, 1, count_reg, doloop_p);
/* Put the prolog on the entry edge. */
e = loop_preheader_edge (loop);
@@ -1142,7 +1474,7 @@ generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
start_sequence ();
for (i = 0; i < last_stage; i++)
- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
+ duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg, doloop_p);
/* Put the epilogue on the exit edge. */
gcc_assert (single_exit (loop));
@@ -1422,13 +1754,30 @@ sms_schedule (void)
continue;
}
- /* Make sure this is a doloop. */
- if ( !(count_reg = doloop_register_get (head, tail)))
- {
- if (dump_file)
- fprintf (dump_file, "SMS doloop_register_get failed\n");
- continue;
- }
+ /* Is this a doloop? */
+ if ((count_reg = doloop_register_get (head, tail)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS doloop\n");
+ }
+ else if ((count_reg = nondoloop_register_get (head, tail, 0,
+ &insn, &insn)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS non-doloop\n");
+ }
+ else if ((count_reg = nondoloop_register_get (head, tail, 1,
+ &insn, &insn)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS non-doloop with transposed cmp\n");
+ }
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS imcompatible loop\n");
+ continue;
+ }
/* Don't handle BBs with calls or barriers, or !single_set insns
with the exception of instructions that include count_reg---these
@@ -1477,7 +1826,7 @@ sms_schedule (void)
fprintf (dump_file, "SMS create_ddg failed\n");
continue;
}
-
+ sms_create_ddg_finish ();
g_arr[loop->num] = g;
if (dump_file)
fprintf (dump_file, "...OK\n");
@@ -1489,15 +1838,27 @@ sms_schedule (void)
fprintf (dump_file, "=========================\n\n");
}
+ df_clear_flags (DF_LR_RUN_DCE);
+
/* We don't want to perform SMS on new loops - created by versioning. */
FOR_EACH_LOOP (li, loop, 0)
{
+ bool doloop_p, count_fin_isconst, count_init_isconst;
+ bool was_immediate = false;
+ bool prolog_create_reg = false;
+ int prolog_fin_nonconst_adjust = 0;
+ bool nonsimple_loop = false;
rtx head, tail;
- rtx count_reg, count_init;
- int mii, rec_mii;
- unsigned stage_count = 0;
- HOST_WIDEST_INT loop_count = 0;
bool opt_sc_p = false;
+ rtx count_reg, count_fin_reg, new_comp_reg = NULL_RTX;
+ rtx count_init_insn, count_fin_init_insn;
+ rtx add, cmp;
+ int mii, rec_mii, cmp_side = -1;
+ int stage_count = 0;
+ HOST_WIDEST_INT count_init_val = 0, count_fin_val = 0;
+ HOST_WIDEST_INT count_step = 0, loop_count = -1;
+ HOST_WIDEST_INT count_fin_newval = 0;
+ struct niter_desc *desc = NULL;
if (! (g = g_arr[loop->num]))
continue;
@@ -1535,32 +1896,159 @@ sms_schedule (void)
(HOST_WIDEST_INT) profile_info->sum_max);
fprintf (dump_file, "\n");
}
- fprintf (dump_file, "SMS doloop\n");
fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
}
- /* In case of th loop have doloop register it gets special
- handling. */
- count_init = NULL_RTX;
- if ((count_reg = doloop_register_get (head, tail)))
+ /* Extract count register and determine loop type. */
+ add = NULL_RTX;
+ cmp = NULL_RTX;
+ if ((count_reg = doloop_register_get (head, tail))
+ || (count_reg = nondoloop_register_get (head, tail, 0, &add, &cmp))
+ || (count_reg = nondoloop_register_get (head, tail, 1, &add, &cmp)))
{
- basic_block pre_header;
+ basic_block pre_header = loop_preheader_edge (loop)->src;
+
+ doloop_p = (cmp == NULL_RTX);
+ if (doloop_p)
+ {
+ /* Doloop finish parameters are always the same. */
+ count_step = -1;
+ count_fin_isconst = true;
+ count_fin_val = 0;
+ count_fin_reg = NULL_RTX;
+ count_fin_init_insn = NULL_RTX;
+ }
+ else
+ {
+ /* In other loop we need to determine counter step
+ and finish parameters. */
+ rtx step, end;
+
+ gcc_assert (single_set (add) && single_set (cmp));
+
+ /* Extract the step. */
+ step = XEXP (SET_SRC (single_set (add)), 1);
+ gcc_assert (CONST_INT_P (step));
+
+ if (GET_CODE (SET_SRC (single_set (add))) == MINUS)
+ count_step = - INTVAL (step);
+ else if (GET_CODE (SET_SRC (single_set (add))) == PLUS)
+ count_step = INTVAL (step);
+ else
+ gcc_unreachable ();
- pre_header = loop_preheader_edge (loop)->src;
- count_init = const_iteration_count (count_reg, pre_header,
- &loop_count);
+ gcc_assert(count_step != 0);
+
+ /* Check what operand of compare insn is a counter register. */
+ if (count_reg == XEXP (SET_SRC (single_set (cmp)), 0))
+ cmp_side = 0;
+ else if (count_reg == XEXP (SET_SRC (single_set (cmp)), 1))
+ cmp_side = 1;
+ else
+ gcc_unreachable ();
+
+ /* Extract finish border for counter reg. */
+ end = XEXP (SET_SRC (single_set (cmp)), 1 - cmp_side);
+
+ if (CONST_INT_P (end))
+ {
+ /* Constant finish border. loop until (reg != const). */
+ count_fin_isconst = true;
+ count_fin_val = INTVAL (end);
+ count_fin_reg = NULL_RTX;
+ count_fin_init_insn = NULL_RTX;
+ }
+ else if (REG_P (end))
+ {
+ /* Register is a border. Loop until (reg != fin_reg). */
+ count_fin_reg = end;
+ count_fin_isconst = false;
+ /* Try to find constant initinalization of fin_reg
+ * in preheader. */
+ count_fin_init_insn = search_const_init (pre_header,
+ count_fin_reg,
+ &count_fin_isconst,
+ &count_fin_val);
+ }
+ else
+ gcc_unreachable ();
+ }
+ /* Try to find a constant initalization of count_reg in preheader. */
+ count_init_insn = search_const_init (pre_header,
+ count_reg,
+ &count_init_isconst,
+ &count_init_val);
+ }
+ else /* Loop is incompatible now, but it was OK on while analyzing! */
+ gcc_assert (count_reg);
+
+
+ desc = get_simple_loop_desc (loop);
+ gcc_assert (desc);
+ /* nonsimple_loop means it's impossible to analyze the loop
+ or there are some assumptions to make the analyzis results right
+ or there is a condition of non-infinite number of iterations.
+ We want doloops to be scheduled even if analyzis shows they are
+ nonsimple (backward compatibility). */
+ nonsimple_loop = !desc->simple_p;
+ /* We allow scheduling loop with some assumptions or infinite condition
+ only when unsafe_loop_optimizations flag is enabled. */
+ if (flag_unsafe_loop_optimizations)
+ {
+ desc->infinite = NULL_RTX;
+ desc->assumptions = NULL_RTX;
+ desc->noloop_assumptions = NULL_RTX;
+ }
+ nonsimple_loop = nonsimple_loop || (desc->assumptions != NULL_RTX)
+ || (desc->noloop_assumptions != NULL_RTX)
+ || (desc->infinite != NULL_RTX);
+ /* Only doloops can be nonsimple_loops for SMS. */
+ if (nonsimple_loop && !doloop_p)
+ {
+ free_ddg (g);
+ continue;
+ }
+ /* Manually set some description fields in non-simple doloop. */
+ if (nonsimple_loop)
+ {
+ gcc_assert(doloop_p);
+ desc->const_iter = false;
+ desc->infinite = NULL_RTX;
}
- gcc_assert (count_reg);
- if (dump_file && count_init)
+ if (desc->const_iter)
+ {
+ gcc_assert (!desc->infinite);
+ loop_count = desc->niter;
+ if (dump_file)
+ fprintf (dump_file, "SMS const loop iterations = "
+ HOST_WIDEST_INT_PRINT_DEC "\n", loop_count);
+ }
+ if (count_init_isconst && count_fin_isconst)
{
- fprintf (dump_file, "SMS const-doloop ");
- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
- loop_count);
- fprintf (dump_file, "\n");
+ gcc_assert (doloop_p || desc->const_iter);
+ if (doloop_p)
+ {
+ if (nonsimple_loop)
+ {
+ loop_count = count_init_val;
+ desc->const_iter = true;
+ }
+ gcc_assert (desc->const_iter && loop_count == count_init_val);
+ }
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS const-%s ",
+ doloop_p ? "doloop" : "loop");
+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC " to "
+ HOST_WIDEST_INT_PRINT_DEC " step "
+ HOST_WIDEST_INT_PRINT_DEC,
+ count_init_val, count_fin_val, count_step);
+ fprintf (dump_file, "\n");
+ }
}
node_order = XNEWVEC (int, g->num_nodes);
@@ -1608,8 +2096,8 @@ sms_schedule (void)
/* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1 means that there is no interleaving between iterations thus
we let the scheduling passes do the job in this case. */
- if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC)
- || (count_init && (loop_count <= stage_count))
+ if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
+ || (desc->const_iter && (loop_count <= stage_count))
|| (flag_branch_probabilities && (trip_count <= stage_count)))
{
if (dump_file)
@@ -1625,8 +2113,10 @@ sms_schedule (void)
else
{
struct undo_replace_buff_elem *reg_move_replaces;
+ int row, cmp_stage = -1;
+ ps_insn_ptr crr_insn;
- if (!opt_sc_p)
+ if (!opt_sc_p)
{
/* Rotate the partial schedule to have the branch in row ii-1. */
int amount = SCHED_TIME (g->closing_branch) + 1;
@@ -1647,23 +2137,24 @@ sms_schedule (void)
print_partial_schedule (ps, dump_file);
}
- /* case the BCT count is not known , Do loop-versioning */
- if (count_reg && ! count_init)
- {
- rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
- GEN_INT(stage_count));
- unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
- * REG_BR_PROB_BASE) / 100;
-
- loop_version (loop, comp_rtx, &condition_bb,
- prob, prob, REG_BR_PROB_BASE - prob,
- true);
- }
+ if (!desc->const_iter)
+ {
+ /* Loop versioning if the number of iterations is unknown. */
+ unsigned prob;
+ rtx vers_cond;
+ vers_cond = gen_rtx_fmt_ee (GT, VOIDmode, nonsimple_loop ?
+ count_reg : desc->niter_expr,
+ GEN_INT (stage_count));
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nLoop versioning condition:\n");
+ print_rtl_single (dump_file, vers_cond);
+ }
- /* Set new iteration count of loop kernel. */
- if (count_reg && count_init)
- SET_SRC (single_set (count_init)) = GEN_INT (loop_count
- - stage_count + 1);
+ prob = (PROB_SMS_ENOUGH_ITERATIONS * REG_BR_PROB_BASE) / 100;
+ loop_version (loop, vers_cond, &condition_bb, prob,
+ prob, REG_BR_PROB_BASE - prob, true);
+ }
/* Now apply the scheduled kernel to the RTL of the loop. */
permute_partial_schedule (ps, g->closing_branch->first_note);
@@ -1678,8 +2169,116 @@ sms_schedule (void)
reg_move_replaces = generate_reg_moves (ps, true);
if (dump_file)
print_node_sched_params (dump_file, g->num_nodes, g);
- /* Generate prolog and epilog. */
- generate_prolog_epilog (ps, loop, count_reg, count_init);
+
+ if (doloop_p && count_init_isconst)
+ {
+ /* Change counter reg initialization constant. In more complex
+ cases this adjustment is done with adding some insns
+ to loop prologue in generate_prolog_epilog function. */
+ gcc_assert (single_set (count_init_insn) != NULL_RTX);
+ SET_SRC (single_set (count_init_insn))
+ = GEN_INT (count_init_val - stage_count + 1);
+ df_insn_rescan (count_init_insn);
+ }
+
+ if (!doloop_p)
+ {
+ /* Calculation of the compare insn stage in schedule. */
+ for (row = 0; row < ps->ii; row++)
+ for (crr_insn = ps->rows[row];
+ crr_insn;
+ crr_insn = crr_insn->next_in_row)
+ {
+ gcc_assert (0 <= SCHED_STAGE (crr_insn->node));
+ gcc_assert (SCHED_STAGE (crr_insn->node) < stage_count);
+ if (rtx_equal_p (crr_insn->node->insn, cmp))
+ {
+ gcc_assert (cmp_stage == -1);
+ cmp_stage = SCHED_STAGE (crr_insn->node);
+ }
+ }
+ if (dump_file)
+ fprintf (dump_file, "cmp_stage=%d\n", cmp_stage);
+ gcc_assert (cmp_stage >= 0);
+ }
+
+ /* When compare insn stage is non-zero we are to shift the final
+ counter reg value (which counter is compared to exit loop).
+ Final value can be an immediate or can be a register, which
+ constant initialization we find in preheader. */
+ was_immediate = false;
+ if (!doloop_p && count_fin_isconst && cmp_stage > 0)
+ {
+ gcc_assert (0 <= cmp_side && cmp_side <= 1);
+ /* New finish value. */
+ count_fin_newval = count_fin_val - count_step * cmp_stage;
+ was_immediate = CONST_INT_P (XEXP (SET_SRC (single_set (cmp)),
+ 1 - cmp_side));
+ if (was_immediate)
+ {
+ /* Check whether new value also can be an immediate.
+ For exapmle, on ARM not all values can be encoded as
+ an immediate, so we have to load it to a register once
+ before the loop starts. */
+ rtx to = GEN_INT (count_fin_newval);
+ prolog_create_reg = rtx_cost (to, GET_CODE (to), false)
+ > rtx_cost (GEN_INT(1), CONST_INT, false);
+ }
+ else
+ {
+ /* A value is already in a register and we easily change
+ initialization instruction in preheader. */
+ gcc_assert (count_fin_init_insn);
+ SET_SRC (single_set (count_fin_init_insn))
+ = GEN_INT (count_fin_newval);
+ df_insn_rescan (count_fin_init_insn);
+ }
+ }
+
+ /* The adjustment of finish register value.
+ Zero means no adjustment needed or adjusment is done
+ without additional insn in prologue. */
+ if (!doloop_p && !count_fin_isconst)
+ prolog_fin_nonconst_adjust = count_step * cmp_stage;
+
+ /* Ready to generate prolog and epilog. */
+ generate_prolog_epilog (ps, loop, count_reg, doloop_p,
+ count_init_isconst, count_fin_reg,
+ prolog_fin_nonconst_adjust,
+ prolog_create_reg, count_fin_newval,
+ &new_comp_reg);
+
+ /* And only after generating prolog and epilog it is possible
+ to modify the compare instruction (to prevent copying wrong insn
+ form to first and last stages). */
+ if (!doloop_p && cmp_stage > 0)
+ {
+ gcc_assert (0 <= cmp_side && cmp_side <= 1);
+ if (was_immediate && !prolog_create_reg)
+ {
+ /* Easy case - just modify a constant. */
+ gcc_assert (new_comp_reg == NULL_RTX);
+ XEXP (SET_SRC (single_set (cmp)), 1 - cmp_side)
+ = GEN_INT (count_fin_newval);
+ }
+ else
+ {
+ if (count_fin_isconst && !was_immediate)
+ /* Value is in a register and we already changed
+ initialization instruction in preheader. */
+ gcc_assert (new_comp_reg == NULL_RTX);
+ else
+ {
+ /* Another case - use created by generate_prolog_epilog
+ register, which value is initialized in prologue. */
+ gcc_assert (new_comp_reg != NULL_RTX);
+ XEXP (SET_SRC (single_set (cmp)), 1 - cmp_side)
+ = new_comp_reg;
+ }
+ }
+ df_insn_rescan (cmp);
+ }
+ else gcc_assert (new_comp_reg == NULL_RTX);
free_undo_replace_buff (reg_move_replaces);
}
@@ -1690,7 +2289,9 @@ sms_schedule (void)
free_ddg (g);
}
+ df_set_flags (DF_LR_RUN_DCE);
free (g_arr);
+ iv_analysis_done ();
/* Release scheduler data, needed until now because of DFA. */
haifa_sched_finish ();
@@ -1229,6 +1229,8 @@ extern void sched_deps_finish (void);
extern void haifa_note_reg_set (int);
extern void haifa_note_reg_clobber (int);
extern void haifa_note_reg_use (int);
+void sms_sched_analyze_init (void);
+regset extract_from_insn_map (rtx, bool);
extern void maybe_extend_reg_info_p (void);