@@ -35,6 +35,11 @@ along with GCC; see the file COPYING3. If not see
#include "dojump.h"
#include "expr.h"
#include "dumpfile.h"
+#include "regs.h"
+#include "ira.h"
+#include "rtl-iter.h"
+#include "regset.h"
+#include "df.h"
/* This pass performs loop unrolling. We only perform this
optimization on innermost loops (with single exception) because
@@ -65,6 +70,39 @@ along with GCC; see the file COPYING3. If not see
showed that this choice may affect performance in order of several %.
*/
+class loop_data
+{
+public:
+ class loop *outermost_exit; /* The outermost exit of the loop. */
+ bool has_call; /* True if the loop contains a call. */
+ /* Maximal register pressure inside loop for given register class
+ (defined only for the pressure classes). */
+ int max_reg_pressure[N_REG_CLASSES];
+ int regs_needed[N_REG_CLASSES];
+ /* Loop regs referenced and live pseudo-registers. */
+ bitmap_head regs_ref;
+ bitmap_head regs_live;
+};
+
+#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
+
+/* Record all regs that are set in any one insn. Communication from
+ mark_reg_{store,clobber} and global_conflicts. Asm can refer to
+ all hard-registers. */
+static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
+ ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
+/* Number of regs stored in the previous array. */
+static int n_regs_set;
+
+/* Currently processed loop. */
+static class loop *curr_loop;
+
+/* Registers currently living. */
+static bitmap_head curr_regs_live;
+
+/* Current reg pressure for each pressure class. */
+static int curr_reg_pressure[N_REG_CLASSES];
+
/* Information about induction variables to split. */
struct iv_to_split
@@ -102,6 +140,7 @@ struct iv_split_hasher : free_ptr_hash <iv_to_split>
static inline bool equal (const iv_to_split *, const iv_to_split *);
};
+void mark_insn_with_unroller (class loop *loop, basic_block bb);
/* A hash function for information about insns to split. */
@@ -272,11 +311,263 @@ decide_unrolling (int flags)
}
}
+/* Return pressure class and number of needed hard registers (through
+ *NREGS) of register REGNO. */
+static enum reg_class
+get_regno_pressure_class (int regno, int *nregs)
+{
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ enum reg_class pressure_class;
+ pressure_class = reg_allocno_class (regno);
+ pressure_class = ira_pressure_class_translate[pressure_class];
+ *nregs
+ = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
+ return pressure_class;
+ }
+ else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
+ && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
+ {
+ *nregs = 1;
+ return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
+ }
+ else
+ {
+ *nregs = 0;
+ return NO_REGS;
+ }
+}
+
+/* Increase (if INCR_P) or decrease current register pressure for
+ register REGNO. */
+static void
+change_pressure (int regno, bool incr_p)
+{
+ int nregs;
+ enum reg_class pressure_class;
+
+ pressure_class = get_regno_pressure_class (regno, &nregs);
+ if (! incr_p)
+ curr_reg_pressure[pressure_class] -= nregs;
+ else
+ {
+ curr_reg_pressure[pressure_class] += nregs;
+ if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
+ < curr_reg_pressure[pressure_class])
+ LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
+ = curr_reg_pressure[pressure_class];
+ }
+}
+
+/* Mark REGNO birth. */
+static void
+mark_regno_live (int regno)
+{
+ class loop *loop;
+
+ for (loop = curr_loop;
+ loop != current_loops->tree_root;
+ loop = loop_outer (loop))
+ bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
+ if (!bitmap_set_bit (&curr_regs_live, regno))
+ return;
+ change_pressure (regno, true);
+}
+
+/* Mark REGNO death. */
+static void
+mark_regno_death (int regno)
+{
+ if (! bitmap_clear_bit (&curr_regs_live, regno))
+ return;
+ change_pressure (regno, false);
+}
+
+/* Mark setting register REG. */
+static void
+mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
+ void *data ATTRIBUTE_UNUSED)
+{
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+
+ if (! REG_P (reg))
+ return;
+
+ regs_set[n_regs_set++] = reg;
+
+ unsigned int end_regno = END_REGNO (reg);
+ for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+ mark_regno_live (regno);
+}
+
+/* Mark clobbering register REG. */
+static void
+mark_reg_clobber (rtx reg, const_rtx setter, void *data)
+{
+ if (GET_CODE (setter) == CLOBBER)
+ mark_reg_store (reg, setter, data);
+}
+
+/* Mark register REG death. */
+static void
+mark_reg_death (rtx reg)
+{
+ unsigned int end_regno = END_REGNO (reg);
+ for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+ mark_regno_death (regno);
+}
+
+/* Mark occurrence of registers in X for the current loop. */
+static void
+mark_ref_regs (rtx x)
+{
+ RTX_CODE code;
+ int i;
+ const char *fmt;
+
+ if (!x)
+ return;
+
+ code = GET_CODE (x);
+ if (code == REG)
+ {
+ class loop *loop;
+
+ for (loop = curr_loop;
+ loop != current_loops->tree_root;
+ loop = loop_outer (loop))
+ bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
+ return;
+ }
+
+ fmt = GET_RTX_FORMAT (code);
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ if (fmt[i] == 'e')
+ mark_ref_regs (XEXP (x, i));
+ else if (fmt[i] == 'E')
+ {
+ int j;
+
+ for (j = 0; j < XVECLEN (x, i); j++)
+ mark_ref_regs (XVECEXP (x, i, j));
+ }
+}
+
+static void
+mark_insn (rtx_insn *insn)
+{
+ rtx link;
+ mark_ref_regs (PATTERN (insn));
+ n_regs_set = 0;
+ note_stores (insn, mark_reg_clobber, NULL);
+
+ /* Mark any registers dead after INSN as dead now. */
+
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == REG_DEAD)
+ mark_reg_death (XEXP (link, 0));
+
+ /* Mark any registers set in INSN as live,
+ and mark them as conflicting with all other live regs.
+ Clobbers are processed again, so they conflict with
+ the registers that are set. */
+
+ note_stores (insn, mark_reg_store, NULL);
+
+ if (AUTO_INC_DEC)
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == REG_INC)
+ mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
+
+ while (n_regs_set-- > 0)
+ {
+ rtx note = find_regno_note (insn, REG_UNUSED,
+ REGNO (regs_set[n_regs_set]));
+ if (! note)
+ continue;
+
+ mark_reg_death (XEXP (note, 0));
+ }
+}
+
+/* Calculate register pressure in the loops. */
+static void
+calculate_loop_reg_pressure (void)
+{
+ int i;
+ unsigned int j;
+ bitmap_iterator bi;
+ basic_block bb;
+ class loop *parent;
+
+ for (auto loop : loops_list (cfun, 0))
+ if (loop->aux == NULL)
+ {
+ loop->aux = xcalloc (1, sizeof (class loop_data));
+ bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
+ bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
+ }
+ ira_setup_eliminable_regset ();
+ bitmap_initialize (&curr_regs_live, ®_obstack);
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ curr_loop = bb->loop_father;
+ if (curr_loop == current_loops->tree_root)
+ continue;
+
+ for (class loop *loop = curr_loop;
+ loop != current_loops->tree_root;
+ loop = loop_outer (loop))
+ bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
+
+ bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ curr_reg_pressure[ira_pressure_classes[i]] = 0;
+ EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
+ change_pressure (j, true);
+ mark_insn_with_unroller (curr_loop, bb);
+ }
+ bitmap_release (&curr_regs_live);
+ if (dump_file == NULL)
+ return;
+ for (auto loop : loops_list (cfun, 0))
+ {
+ parent = loop_outer (loop);
+ fprintf (dump_file, "\n Loop %d (parent %d, header bb%d, depth %d)\n",
+ loop->num, (parent == NULL ? -1 : parent->num),
+ loop->header->index, loop_depth (loop));
+ fprintf (dump_file, "\n ref. regnos:");
+ EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
+ fprintf (dump_file, " %d", j);
+ fprintf (dump_file, "\n live regnos:");
+ EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
+ fprintf (dump_file, " %d", j);
+ fprintf (dump_file, "\n Pressure:");
+ for (i = 0; (int) i < ira_pressure_classes_num; i++)
+ {
+ enum reg_class pressure_class;
+
+ pressure_class = ira_pressure_classes[i];
+ if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
+ continue;
+ fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
+ LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
+ }
+ fprintf (dump_file, "\n");
+ }
+}
+
/* Unroll LOOPS. */
void
unroll_loops (int flags)
{
bool changed = false;
+ df_analyze ();
+ regstat_init_n_sets_and_refs ();
+ ira_set_pseudo_classes (true, dump_file);
+ calculate_loop_reg_pressure ();
+ regstat_free_n_sets_and_refs ();
/* Now decide rest of unrolling. */
decide_unrolling (flags);
@@ -339,6 +630,40 @@ loop_exit_at_end_p (class loop *loop)
return true;
}
+/* Determine unroll factor for a given LOOP. Unroll factor
+ is calculated with maximum number of available registers.
+ If available registers is more unrolling factor is more. */
+int
+determine_unroll_factor (class loop *loop)
+{
+ int nunroll = 0;
+ int avail = 0;
+ int regs_needed = 0;
+
+ for (int i = 0; (int) i < ira_pressure_classes_num; i++)
+ {
+ enum reg_class pressure_class;
+
+ pressure_class = ira_pressure_classes[i];
+ int nregs_needed = LOOP_DATA (loop)->regs_needed[pressure_class];
+
+ if (nregs_needed <= 0)
+ continue;
+
+ if (regs_needed > nregs_needed || regs_needed == 0)
+ regs_needed = nregs_needed;
+
+ int hard_nregs = ira_class_hard_regs_num[pressure_class];
+ int avail_nregs = hard_nregs - regs_needed;
+
+ if (avail_nregs > avail || avail == 0)
+ avail = avail_nregs;
+ }
+ if (avail > 0 && regs_needed > 0)
+ nunroll = avail / regs_needed;
+ return nunroll;
+}
+
/* Decide whether to unroll LOOP iterating constant number of times
and how much. */
@@ -357,10 +682,11 @@ decide_unroll_constant_iterations (class loop *loop, int flags)
dump_printf (MSG_NOTE,
"considering unrolling loop with constant "
"number of iterations\n");
-
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */
- nunroll = param_max_unrolled_insns / loop->ninsns;
+ nunroll = determine_unroll_factor (loop);
+ if (nunroll <= 0)
+ nunroll = param_max_unrolled_insns / loop->ninsns;
nunroll_by_av
= param_max_average_unrolled_insns / loop->av_ninsns;
if (nunroll > nunroll_by_av)
@@ -678,7 +1004,9 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
- nunroll = param_max_unrolled_insns / loop->ninsns;
+ nunroll = determine_unroll_factor (loop);
+ if (nunroll <= 0)
+ nunroll = param_max_unrolled_insns / loop->ninsns;
nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
if (nunroll > nunroll_by_av)
nunroll = nunroll_by_av;
@@ -1640,6 +1968,99 @@ analyze_insns_in_loop (class loop *loop)
free (body);
return opt_info;
}
+/* Return pressure class and number of hard registers (through *NREGS)
+ for destination of INSN. */
+static enum reg_class
+get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
+{
+ rtx reg;
+ enum reg_class pressure_class;
+ rtx set = single_set (insn);
+
+ if (set == NULL_RTX)
+ {
+ *nregs = 0;
+ return NO_REGS;
+ }
+ reg = SET_DEST (set);
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+ if (MEM_P (reg))
+ {
+ *nregs = 0;
+ pressure_class = NO_REGS;
+ }
+ else
+ {
+ if (! REG_P (reg))
+ reg = NULL_RTX;
+ if (reg == NULL_RTX)
+ pressure_class = GENERAL_REGS;
+ else
+ {
+ pressure_class = reg_allocno_class (REGNO (reg));
+ pressure_class = ira_pressure_class_translate[pressure_class];
+ }
+ *nregs
+ = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
+ }
+ return pressure_class;
+}
+
+/* Mark register in insn with available reg info for iv-split-unroller
+ and variable expansion. */
+void
+mark_insn_with_unroller (class loop *loop, basic_block bb)
+{
+ struct opt_info *opt_info = NULL;
+ struct iv_to_split *ivts;
+ struct var_to_expand *ves;
+ rtx_insn *insn;
+
+ curr_loop= loop;
+ opt_info = analyze_insns_in_loop (loop);
+
+ if (opt_info->iv_to_split_head == NULL && opt_info->var_to_expand_head == NULL)
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (! NONDEBUG_INSN_P (insn))
+ continue;
+ int nregs;
+ enum reg_class pressure_class;
+ pressure_class = get_pressure_class_and_nregs (insn, &nregs);
+ LOOP_DATA (loop)->regs_needed[pressure_class] += nregs;
+ mark_insn (insn);
+ }
+ if (opt_info->insns_to_split)
+ {
+ for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
+ {
+ insn = ivts->insn;
+ if (! NONDEBUG_INSN_P (insn))
+ continue;
+ int nregs;
+ enum reg_class pressure_class;
+ pressure_class = get_pressure_class_and_nregs (insn, &nregs);
+ LOOP_DATA (loop)->regs_needed[pressure_class] += nregs;
+ mark_insn (insn);
+ }
+ }
+
+ if (opt_info->insns_with_var_to_expand)
+ {
+ for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
+ {
+ insn = ves->insn;
+ if (! NONDEBUG_INSN_P (insn))
+ continue;
+ int nregs;
+ enum reg_class pressure_class;
+ pressure_class = get_pressure_class_and_nregs (insn, &nregs);
+ LOOP_DATA (loop)->regs_needed[pressure_class] += nregs;
+ mark_insn (insn);
+ }
+ }
+}
/* Called just before loop duplication. Records start of duplicated area
to OPT_INFO. */