@@ -35,6 +35,11 @@ along with GCC; see the file COPYING3. If not see
#include "dojump.h"
#include "expr.h"
#include "dumpfile.h"
+#include "regs.h"
+#include "ira.h"
+#include "rtl-iter.h"
+#include "regset.h"
+#include "df.h"
/* This pass performs loop unrolling. We only perform this
optimization on innermost loops (with single exception) because
@@ -65,6 +70,38 @@ along with GCC; see the file COPYING3. If not see
showed that this choice may affect performance in order of several %.
*/
+class loop_data
+{
+public:
+ class loop *outermost_exit; /* The outermost exit of the loop. */
+ bool has_call; /* True if the loop contains a call. */
+ /* Maximal register pressure inside loop for given register class
+ (defined only for the pressure classes). */
+ int max_reg_pressure[N_REG_CLASSES];
+ /* Loop regs referenced and live pseudo-registers. */
+ bitmap_head regs_ref;
+ bitmap_head regs_live;
+};
+
+#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
+
+/* Record all regs that are set in any one insn. Communication from
+ mark_reg_{store,clobber} and global_conflicts. Asm can refer to
+ all hard-registers. */
+static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
+ ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
+/* Number of regs stored in the previous array. */
+static int n_regs_set;
+
+/* Currently processed loop. */
+static class loop *curr_loop;
+
+/* Registers currently living. */
+static bitmap_head curr_regs_live;
+
+/* Current reg pressure for each pressure class. */
+static int curr_reg_pressure[N_REG_CLASSES];
+
/* Information about induction variables to split. */
struct iv_to_split
@@ -272,11 +309,262 @@ decide_unrolling (int flags)
}
}
+/* Return pressure class and number of needed hard registers (through
+ *NREGS) of register REGNO. */
+static enum reg_class
+get_regno_pressure_class (int regno, int *nregs)
+{
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ enum reg_class pressure_class;
+ pressure_class = reg_allocno_class (regno);
+ pressure_class = ira_pressure_class_translate[pressure_class];
+ *nregs
+ = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
+ return pressure_class;
+ }
+ else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
+ && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
+ {
+ *nregs = 1;
+ return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
+ }
+ else
+ {
+ *nregs = 0;
+ return NO_REGS;
+ }
+}
+
+/* Increase (if INCR_P) or decrease current register pressure for
+ register REGNO. */
+static void
+change_pressure (int regno, bool incr_p)
+{
+ int nregs;
+ enum reg_class pressure_class;
+
+ pressure_class = get_regno_pressure_class (regno, &nregs);
+ if (! incr_p)
+ curr_reg_pressure[pressure_class] -= nregs;
+ else
+ {
+ curr_reg_pressure[pressure_class] += nregs;
+ if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
+ < curr_reg_pressure[pressure_class])
+ LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
+ = curr_reg_pressure[pressure_class];
+ }
+}
+
+/* Mark REGNO birth. */
+static void
+mark_regno_live (int regno)
+{
+ class loop *loop;
+
+ for (loop = curr_loop;
+ loop != current_loops->tree_root;
+ loop = loop_outer (loop))
+ bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
+ if (!bitmap_set_bit (&curr_regs_live, regno))
+ return;
+ change_pressure (regno, true);
+}
+
+/* Mark REGNO death. */
+static void
+mark_regno_death (int regno)
+{
+ if (! bitmap_clear_bit (&curr_regs_live, regno))
+ return;
+ change_pressure (regno, false);
+}
+
+/* Mark setting register REG. */
+static void
+mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
+ void *data ATTRIBUTE_UNUSED)
+{
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+
+ if (! REG_P (reg))
+ return;
+
+ regs_set[n_regs_set++] = reg;
+
+ unsigned int end_regno = END_REGNO (reg);
+ for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+ mark_regno_live (regno);
+}
+
+/* Mark clobbering register REG. */
+static void
+mark_reg_clobber (rtx reg, const_rtx setter, void *data)
+{
+ if (GET_CODE (setter) == CLOBBER)
+ mark_reg_store (reg, setter, data);
+}
+
+/* Mark register REG death. */
+static void
+mark_reg_death (rtx reg)
+{
+ unsigned int end_regno = END_REGNO (reg);
+ for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+ mark_regno_death (regno);
+}
+
+/* Mark occurrence of registers in X for the current loop. */
+static void
+mark_ref_regs (rtx x)
+{
+ RTX_CODE code;
+ int i;
+ const char *fmt;
+
+ if (!x)
+ return;
+
+ code = GET_CODE (x);
+ if (code == REG)
+ {
+ class loop *loop;
+
+ for (loop = curr_loop;
+ loop != current_loops->tree_root;
+ loop = loop_outer (loop))
+ bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
+ return;
+ }
+
+ fmt = GET_RTX_FORMAT (code);
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ if (fmt[i] == 'e')
+ mark_ref_regs (XEXP (x, i));
+ else if (fmt[i] == 'E')
+ {
+ int j;
+
+ for (j = 0; j < XVECLEN (x, i); j++)
+ mark_ref_regs (XVECEXP (x, i, j));
+ }
+}
+
+/* Calculate register pressure in the loops. */
+static void
+calculate_loop_reg_pressure (void)
+{
+ int i;
+ unsigned int j;
+ bitmap_iterator bi;
+ basic_block bb;
+ rtx_insn *insn;
+ rtx link;
+ class loop *parent;
+
+ for (auto loop : loops_list (cfun, 0))
+ if (loop->aux == NULL)
+ {
+ loop->aux = xcalloc (1, sizeof (class loop_data));
+ bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack);
+ bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack);
+ }
+ ira_setup_eliminable_regset ();
+ bitmap_initialize (&curr_regs_live, ®_obstack);
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ curr_loop = bb->loop_father;
+ if (curr_loop == current_loops->tree_root)
+ continue;
+
+ for (class loop *loop = curr_loop;
+ loop != current_loops->tree_root;
+ loop = loop_outer (loop))
+ bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
+
+ bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
+ for (i = 0; i < ira_pressure_classes_num; i++)
+ curr_reg_pressure[ira_pressure_classes[i]] = 0;
+ EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
+ change_pressure (j, true);
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (! NONDEBUG_INSN_P (insn))
+ continue;
+
+ mark_ref_regs (PATTERN (insn));
+ n_regs_set = 0;
+ note_stores (insn, mark_reg_clobber, NULL);
+
+ /* Mark any registers dead after INSN as dead now. */
+
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == REG_DEAD)
+ mark_reg_death (XEXP (link, 0));
+
+ /* Mark any registers set in INSN as live,
+ and mark them as conflicting with all other live regs.
+ Clobbers are processed again, so they conflict with
+ the registers that are set. */
+
+ note_stores (insn, mark_reg_store, NULL);
+
+ if (AUTO_INC_DEC)
+ for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+ if (REG_NOTE_KIND (link) == REG_INC)
+ mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
+
+ while (n_regs_set-- > 0)
+ {
+ rtx note = find_regno_note (insn, REG_UNUSED,
+ REGNO (regs_set[n_regs_set]));
+ if (! note)
+ continue;
+
+ mark_reg_death (XEXP (note, 0));
+ }
+ }
+ }
+ bitmap_release (&curr_regs_live);
+ if (dump_file == NULL)
+ return;
+ for (auto loop : loops_list (cfun, 0))
+ {
+ parent = loop_outer (loop);
+ fprintf (dump_file, "\n Loop %d (parent %d, header bb%d, depth %d)\n",
+ loop->num, (parent == NULL ? -1 : parent->num),
+ loop->header->index, loop_depth (loop));
+ fprintf (dump_file, "\n ref. regnos:");
+ EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
+ fprintf (dump_file, " %d", j);
+ fprintf (dump_file, "\n live regnos:");
+ EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
+ fprintf (dump_file, " %d", j);
+ fprintf (dump_file, "\n Pressure:");
+ for (i = 0; (int) i < ira_pressure_classes_num; i++)
+ {
+ enum reg_class pressure_class;
+
+ pressure_class = ira_pressure_classes[i];
+ if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
+ continue;
+ fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
+ LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
+ }
+ fprintf (dump_file, "\n");
+ }
+}
+
/* Unroll LOOPS. */
void
unroll_loops (int flags)
{
bool changed = false;
+ df_analyze ();
+ calculate_loop_reg_pressure ();
/* Now decide rest of unrolling. */
decide_unrolling (flags);
@@ -339,6 +627,40 @@ loop_exit_at_end_p (class loop *loop)
return true;
}
+/* Determine unroll factor for a given LOOP. Unroll factor
+ is calculated with quotient of maximum number of available registers
+ with number of liveness.
+ If available registers is more unrolling factor is more. And if
+ number of liveness is more unroll factor is less. */
+int
+determine_unroll_factor (class loop *loop)
+{
+ int nunroll = 0;
+ int avail = 0;
+ int max_pressure = 0;
+ int num_live = 0;
+ for (int i = 0; (int) i < ira_pressure_classes_num; i++)
+ {
+ enum reg_class pressure_class;
+
+ pressure_class = ira_pressure_classes[i];
+ max_pressure = LOOP_DATA (loop)->max_reg_pressure[pressure_class];
+ if (max_pressure == 0)
+ continue;
+
+ if (((ira_class_hard_regs_num[pressure_class] - max_pressure) > avail)
+ || avail == 0)
+ avail = ira_class_hard_regs_num[pressure_class] - max_pressure;
+
+ num_live++;
+ }
+
+ if (avail > 0 && num_live > 0)
+ nunroll = avail / num_live;
+
+ return nunroll;
+}
+
/* Decide whether to unroll LOOP iterating constant number of times
and how much. */
@@ -357,10 +679,11 @@ decide_unroll_constant_iterations (class loop *loop, int flags)
dump_printf (MSG_NOTE,
"considering unrolling loop with constant "
"number of iterations\n");
-
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */
- nunroll = param_max_unrolled_insns / loop->ninsns;
+ nunroll = determine_unroll_factor (loop);
+ if (nunroll <= 0)
+ nunroll = param_max_unrolled_insns / loop->ninsns;
nunroll_by_av
= param_max_average_unrolled_insns / loop->av_ninsns;
if (nunroll > nunroll_by_av)
@@ -678,7 +1001,9 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
- nunroll = param_max_unrolled_insns / loop->ninsns;
+ nunroll = determine_unroll_factor (loop);
+ if (nunroll <= 0)
+ nunroll = param_max_unrolled_insns / loop->ninsns;
nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
if (nunroll > nunroll_by_av)
nunroll = nunroll_by_av;