diff mbox series

[rtl-optimization] : Loop unroll factor based on register pressure

Message ID b4350266-e91f-4df2-b65f-74e6a9312e53@linux.ibm.com
State New
Headers show
Series [rtl-optimization] : Loop unroll factor based on register pressure | expand

Commit Message

Ajit Agarwal June 29, 2024, 12:19 p.m. UTC
Hello All:

This patch determines Unroll factor based on loop register pressure.

Unroll factor is quotient of max of available registers in loop
by number of liveness.

If available registers increases unroll factor increases.
Wherein unroll factor decreases if number of liveness increases.

Loop unrolling is based on loop variables that determines unroll
factor. Loop variables of the loop are the variables that increases
register pressure and take advantage of existing register pressure
calculation.

Available registers are determined by the number of hard registers
available for each register class minus max reg pressure of loop
for given register class.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


rtl-optimization: Loop unroll factor based on register pressure

Unroll factor is calculated based on loop register pressure.

Unroll factor is quotient of max of available registers in loop
by number of liveness.

If available registers increases unroll factor increases.
Wherein unroll factor decreases if number of liveness increases.

Loop unrolling is based on loop variables that determines unroll
factor. Loop variables of the loop are the variables that increases
register pressure and take advantage of existing register pressure
calculation.

Available registers are determined by the number of hard registers
available for each register class minus max reg pressure of loop
for given register class.

2024-06-29  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	* loop-unroll.cc: Add calculation of register pressure of
	the loop and use of that to calculate unroll factor.
---
 gcc/loop-unroll.cc | 331 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 328 insertions(+), 3 deletions(-)

Comments

Richard Biener July 3, 2024, 8:48 a.m. UTC | #1
On Sun, Jun 30, 2024 at 4:15 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>
> Hello All:
>
> This patch determines Unroll factor based on loop register pressure.
>
> Unroll factor is quotient of max of available registers in loop
> by number of liveness.
>
> If available registers increases unroll factor increases.
> Wherein unroll factor decreases if number of liveness increases.

Unrolling as implemented does not increase register lifetime unless
-fsplit-ivs-in-unroller or -fvariable-expansion-in-unroller.  But I do not
see you looking at those transforms at all.

Richard.

> Loop unrolling is based on loop variables that determines unroll
> factor. Loop variables of the loop are the variables that increases
> register pressure and take advantage of existing register pressure
> calculation.
>
> Available registers are determined by the number of hard registers
> available for each register class minus max reg pressure of loop
> for given register class.
>
> Bootstrapped and regtested on powerpc64-linux-gnu.
>
> Thanks & Regards
> Ajit
>
>
> rtl-optimization: Loop unroll factor based on register pressure
>
> Unroll factor is calculated based on loop register pressure.
>
> Unroll factor is quotient of max of available registers in loop
> by number of liveness.
>
> If available registers increases unroll factor increases.
> Wherein unroll factor decreases if number of liveness increases.
>
> Loop unrolling is based on loop variables that determines unroll
> factor. Loop variables of the loop are the variables that increases
> register pressure and take advantage of existing register pressure
> calculation.
>
> Available registers are determined by the number of hard registers
> available for each register class minus max reg pressure of loop
> for given register class.
>
> 2024-06-29  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>
> gcc/ChangeLog:
>
>         * loop-unroll.cc: Add calculation of register pressure of
>         the loop and use of that to calculate unroll factor.
> ---
>  gcc/loop-unroll.cc | 331 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 328 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
> index bfdfe6c2bb7..6936ba7afb9 100644
> --- a/gcc/loop-unroll.cc
> +++ b/gcc/loop-unroll.cc
> @@ -35,6 +35,11 @@ along with GCC; see the file COPYING3.  If not see
>  #include "dojump.h"
>  #include "expr.h"
>  #include "dumpfile.h"
> +#include "regs.h"
> +#include "ira.h"
> +#include "rtl-iter.h"
> +#include "regset.h"
> +#include "df.h"
>
>  /* This pass performs loop unrolling.  We only perform this
>     optimization on innermost loops (with single exception) because
> @@ -65,6 +70,38 @@ along with GCC; see the file COPYING3.  If not see
>     showed that this choice may affect performance in order of several %.
>     */
>
> +class loop_data
> +{
> +public:
> +  class loop *outermost_exit;  /* The outermost exit of the loop.  */
> +  bool has_call;               /* True if the loop contains a call.  */
> +  /* Maximal register pressure inside loop for given register class
> +     (defined only for the pressure classes).  */
> +  int max_reg_pressure[N_REG_CLASSES];
> +  /* Loop regs referenced and live pseudo-registers.  */
> +  bitmap_head regs_ref;
> +  bitmap_head regs_live;
> +};
> +
> +#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
> +
> +/* Record all regs that are set in any one insn.  Communication from
> +   mark_reg_{store,clobber} and global_conflicts.  Asm can refer to
> +   all hard-registers.  */
> +static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
> +                    ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
> +/* Number of regs stored in the previous array.  */
> +static int n_regs_set;
> +
> +/* Currently processed loop.  */
> +static class loop *curr_loop;
> +
> +/* Registers currently living.  */
> +static bitmap_head curr_regs_live;
> +
> +/* Current reg pressure for each pressure class.  */
> +static int curr_reg_pressure[N_REG_CLASSES];
> +
>  /* Information about induction variables to split.  */
>
>  struct iv_to_split
> @@ -272,11 +309,262 @@ decide_unrolling (int flags)
>      }
>  }
>
> +/* Return pressure class and number of needed hard registers (through
> +   *NREGS) of register REGNO.  */
> +static enum reg_class
> +get_regno_pressure_class (int regno, int *nregs)
> +{
> +  if (regno >= FIRST_PSEUDO_REGISTER)
> +    {
> +      enum reg_class pressure_class;
> +      pressure_class = reg_allocno_class (regno);
> +      pressure_class = ira_pressure_class_translate[pressure_class];
> +      *nregs
> +       = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
> +      return pressure_class;
> +    }
> +  else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
> +          && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
> +    {
> +      *nregs = 1;
> +      return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
> +    }
> +  else
> +    {
> +      *nregs = 0;
> +      return NO_REGS;
> +    }
> +}
> +
> +/* Increase (if INCR_P) or decrease current register pressure for
> +   register REGNO.  */
> +static void
> +change_pressure (int regno, bool incr_p)
> +{
> +  int nregs;
> +  enum reg_class pressure_class;
> +
> +  pressure_class = get_regno_pressure_class (regno, &nregs);
> +  if (! incr_p)
> +    curr_reg_pressure[pressure_class] -= nregs;
> +  else
> +    {
> +      curr_reg_pressure[pressure_class] += nregs;
> +      if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
> +         < curr_reg_pressure[pressure_class])
> +       LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
> +         = curr_reg_pressure[pressure_class];
> +    }
> +}
> +
> +/* Mark REGNO birth.  */
> +static void
> +mark_regno_live (int regno)
> +{
> +  class loop *loop;
> +
> +  for (loop = curr_loop;
> +       loop != current_loops->tree_root;
> +       loop = loop_outer (loop))
> +    bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
> +  if (!bitmap_set_bit (&curr_regs_live, regno))
> +    return;
> +  change_pressure (regno, true);
> +}
> +
> +/* Mark REGNO death.  */
> +static void
> +mark_regno_death (int regno)
> +{
> +  if (! bitmap_clear_bit (&curr_regs_live, regno))
> +    return;
> +  change_pressure (regno, false);
> +}
> +
> +/* Mark setting register REG.  */
> +static void
> +mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
> +               void *data ATTRIBUTE_UNUSED)
> +{
> +  if (GET_CODE (reg) == SUBREG)
> +    reg = SUBREG_REG (reg);
> +
> +  if (! REG_P (reg))
> +    return;
> +
> +  regs_set[n_regs_set++] = reg;
> +
> +  unsigned int end_regno = END_REGNO (reg);
> +  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
> +    mark_regno_live (regno);
> +}
> +
> +/* Mark clobbering register REG.  */
> +static void
> +mark_reg_clobber (rtx reg, const_rtx setter, void *data)
> +{
> +  if (GET_CODE (setter) == CLOBBER)
> +    mark_reg_store (reg, setter, data);
> +}
> +
> +/* Mark register REG death.  */
> +static void
> +mark_reg_death (rtx reg)
> +{
> +  unsigned int end_regno = END_REGNO (reg);
> +  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
> +    mark_regno_death (regno);
> +}
> +
> +/* Mark occurrence of registers in X for the current loop.  */
> +static void
> +mark_ref_regs (rtx x)
> +{
> +  RTX_CODE code;
> +  int i;
> +  const char *fmt;
> +
> +  if (!x)
> +    return;
> +
> +  code = GET_CODE (x);
> +  if (code == REG)
> +    {
> +      class loop *loop;
> +
> +      for (loop = curr_loop;
> +          loop != current_loops->tree_root;
> +          loop = loop_outer (loop))
> +       bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
> +      return;
> +    }
> +
> +  fmt = GET_RTX_FORMAT (code);
> +  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
> +    if (fmt[i] == 'e')
> +      mark_ref_regs (XEXP (x, i));
> +    else if (fmt[i] == 'E')
> +      {
> +       int j;
> +
> +       for (j = 0; j < XVECLEN (x, i); j++)
> +         mark_ref_regs (XVECEXP (x, i, j));
> +      }
> +}
> +
> +/* Calculate register pressure in the loops.  */
> +static void
> +calculate_loop_reg_pressure (void)
> +{
> +  int i;
> +  unsigned int j;
> +  bitmap_iterator bi;
> +  basic_block bb;
> +  rtx_insn *insn;
> +  rtx link;
> +  class loop *parent;
> +
> +  for (auto loop : loops_list (cfun, 0))
> +    if (loop->aux == NULL)
> +      {
> +       loop->aux = xcalloc (1, sizeof (class loop_data));
> +       bitmap_initialize (&LOOP_DATA (loop)->regs_ref, &reg_obstack);
> +       bitmap_initialize (&LOOP_DATA (loop)->regs_live, &reg_obstack);
> +      }
> +  ira_setup_eliminable_regset ();
> +  bitmap_initialize (&curr_regs_live, &reg_obstack);
> +  FOR_EACH_BB_FN (bb, cfun)
> +    {
> +      curr_loop = bb->loop_father;
> +      if (curr_loop == current_loops->tree_root)
> +       continue;
> +
> +      for (class loop *loop = curr_loop;
> +          loop != current_loops->tree_root;
> +          loop = loop_outer (loop))
> +       bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
> +
> +      bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
> +      for (i = 0; i < ira_pressure_classes_num; i++)
> +       curr_reg_pressure[ira_pressure_classes[i]] = 0;
> +      EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
> +       change_pressure (j, true);
> +
> +      FOR_BB_INSNS (bb, insn)
> +       {
> +         if (! NONDEBUG_INSN_P (insn))
> +           continue;
> +
> +         mark_ref_regs (PATTERN (insn));
> +         n_regs_set = 0;
> +         note_stores (insn, mark_reg_clobber, NULL);
> +
> +         /* Mark any registers dead after INSN as dead now.  */
> +
> +         for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
> +           if (REG_NOTE_KIND (link) == REG_DEAD)
> +             mark_reg_death (XEXP (link, 0));
> +
> +         /* Mark any registers set in INSN as live,
> +            and mark them as conflicting with all other live regs.
> +            Clobbers are processed again, so they conflict with
> +            the registers that are set.  */
> +
> +         note_stores (insn, mark_reg_store, NULL);
> +
> +         if (AUTO_INC_DEC)
> +           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
> +             if (REG_NOTE_KIND (link) == REG_INC)
> +               mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
> +
> +         while (n_regs_set-- > 0)
> +           {
> +             rtx note = find_regno_note (insn, REG_UNUSED,
> +                                         REGNO (regs_set[n_regs_set]));
> +             if (! note)
> +               continue;
> +
> +             mark_reg_death (XEXP (note, 0));
> +           }
> +       }
> +    }
> +  bitmap_release (&curr_regs_live);
> +  if (dump_file == NULL)
> +    return;
> +  for (auto loop : loops_list (cfun, 0))
> +    {
> +      parent = loop_outer (loop);
> +      fprintf (dump_file, "\n  Loop %d (parent %d, header bb%d, depth %d)\n",
> +              loop->num, (parent == NULL ? -1 : parent->num),
> +              loop->header->index, loop_depth (loop));
> +      fprintf (dump_file, "\n    ref. regnos:");
> +      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
> +       fprintf (dump_file, " %d", j);
> +      fprintf (dump_file, "\n    live regnos:");
> +      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
> +       fprintf (dump_file, " %d", j);
> +      fprintf (dump_file, "\n    Pressure:");
> +      for (i = 0; (int) i < ira_pressure_classes_num; i++)
> +       {
> +         enum reg_class pressure_class;
> +
> +         pressure_class = ira_pressure_classes[i];
> +         if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
> +           continue;
> +         fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
> +                  LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
> +       }
> +      fprintf (dump_file, "\n");
> +    }
> +}
> +
>  /* Unroll LOOPS.  */
>  void
>  unroll_loops (int flags)
>  {
>    bool changed = false;
> +  df_analyze ();
> +  calculate_loop_reg_pressure ();
>
>    /* Now decide rest of unrolling.  */
>    decide_unrolling (flags);
> @@ -339,6 +627,40 @@ loop_exit_at_end_p (class loop *loop)
>    return true;
>  }
>
> +/* Determine unroll factor for a given LOOP.  Unroll factor
> +   is calculated with quotient of maximum number of available registers
> +   with number of liveness.
> +   If available registers is more unrolling factor is more.  And if
> +   number of liveness is more unroll factor is less.  */
> +int
> +determine_unroll_factor (class loop  *loop)
> +{
> +  int nunroll = 0;
> +  int avail = 0;
> +  int max_pressure = 0;
> +  int num_live = 0;
> +  for (int i = 0; (int) i < ira_pressure_classes_num; i++)
> +    {
> +      enum reg_class pressure_class;
> +
> +      pressure_class = ira_pressure_classes[i];
> +      max_pressure = LOOP_DATA (loop)->max_reg_pressure[pressure_class];
> +      if (max_pressure == 0)
> +       continue;
> +
> +      if (((ira_class_hard_regs_num[pressure_class] - max_pressure) > avail)
> +         || avail == 0)
> +       avail = ira_class_hard_regs_num[pressure_class] - max_pressure;
> +
> +      num_live++;
> +    }
> +
> +  if (avail > 0 && num_live > 0)
> +    nunroll = avail / num_live;
> +
> +  return nunroll;
> +}
> +
>  /* Decide whether to unroll LOOP iterating constant number of times
>     and how much.  */
>
> @@ -357,10 +679,11 @@ decide_unroll_constant_iterations (class loop *loop, int flags)
>      dump_printf (MSG_NOTE,
>                  "considering unrolling loop with constant "
>                  "number of iterations\n");
> -
>    /* nunroll = total number of copies of the original loop body in
>       unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
> -  nunroll = param_max_unrolled_insns / loop->ninsns;
> +  nunroll = determine_unroll_factor (loop);
> +  if (nunroll <= 0)
> +    nunroll = param_max_unrolled_insns / loop->ninsns;
>    nunroll_by_av
>      = param_max_average_unrolled_insns / loop->av_ninsns;
>    if (nunroll > nunroll_by_av)
> @@ -678,7 +1001,9 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
>
>    /* nunroll = total number of copies of the original loop body in
>       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
> -  nunroll = param_max_unrolled_insns / loop->ninsns;
> +  nunroll = determine_unroll_factor (loop);
> +  if (nunroll <= 0)
> +    nunroll = param_max_unrolled_insns / loop->ninsns;
>    nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
>    if (nunroll > nunroll_by_av)
>      nunroll = nunroll_by_av;
> --
> 2.43.5
>
Ajit Agarwal July 4, 2024, 3:06 p.m. UTC | #2
Hello Richard:

On 03/07/24 2:18 pm, Richard Biener wrote:
> On Sun, Jun 30, 2024 at 4:15 AM Ajit Agarwal <aagarwa1@linux.ibm.com> wrote:
>>
>> Hello All:
>>
>> This patch determines Unroll factor based on loop register pressure.
>>
>> Unroll factor is quotient of max of available registers in loop
>> by number of liveness.
>>
>> If available registers increases unroll factor increases.
>> Wherein unroll factor decreases if number of liveness increases.
> 
> Unrolling as implemented does not increase register lifetime unless
> -fsplit-ivs-in-unroller or -fvariable-expansion-in-unroller.  But I do not
> see you looking at those transforms at all.
> 
Based on your feedback I have changed the logic of determining
unroll factor for loops in version-1 of the patch.

Unroll factor is calculated based on available registers and regs
needed inside the loops.

Unroll factor is quotient of max of available registers in loop
over regs needed inside the loops.

Considered -fsplit-ivs-in-unroller or -fvariable-expansion-in-unroller
for controlling unroll factor for loops.

Unroll factor is directly proportional to available register.
Wherein unroll factor is inversely proportional to register
needed inside loops.

Registers needed are the Loop variables/Loop induction
variables inside the loops.

Available registers are determined by the number of hard registers
available for each register class minus register needed inside the
loops for given register class.

Thanks & Regards
Ajit

> Richard.
> 
>> Loop unrolling is based on loop variables that determines unroll
>> factor. Loop variables of the loop are the variables that increases
>> register pressure and take advantage of existing register pressure
>> calculation.
>>
>> Available registers are determined by the number of hard registers
>> available for each register class minus max reg pressure of loop
>> for given register class.
>>
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>>
>> rtl-optimization: Loop unroll factor based on register pressure
>>
>> Unroll factor is calculated based on loop register pressure.
>>
>> Unroll factor is quotient of max of available registers in loop
>> by number of liveness.
>>
>> If available registers increases unroll factor increases.
>> Wherein unroll factor decreases if number of liveness increases.
>>
>> Loop unrolling is based on loop variables that determines unroll
>> factor. Loop variables of the loop are the variables that increases
>> register pressure and take advantage of existing register pressure
>> calculation.
>>
>> Available registers are determined by the number of hard registers
>> available for each register class minus max reg pressure of loop
>> for given register class.
>>
>> 2024-06-29  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>
>>
>> gcc/ChangeLog:
>>
>>         * loop-unroll.cc: Add calculation of register pressure of
>>         the loop and use of that to calculate unroll factor.
>> ---
>>  gcc/loop-unroll.cc | 331 ++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 328 insertions(+), 3 deletions(-)
>>
>> diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
>> index bfdfe6c2bb7..6936ba7afb9 100644
>> --- a/gcc/loop-unroll.cc
>> +++ b/gcc/loop-unroll.cc
>> @@ -35,6 +35,11 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "dojump.h"
>>  #include "expr.h"
>>  #include "dumpfile.h"
>> +#include "regs.h"
>> +#include "ira.h"
>> +#include "rtl-iter.h"
>> +#include "regset.h"
>> +#include "df.h"
>>
>>  /* This pass performs loop unrolling.  We only perform this
>>     optimization on innermost loops (with single exception) because
>> @@ -65,6 +70,38 @@ along with GCC; see the file COPYING3.  If not see
>>     showed that this choice may affect performance in order of several %.
>>     */
>>
>> +class loop_data
>> +{
>> +public:
>> +  class loop *outermost_exit;  /* The outermost exit of the loop.  */
>> +  bool has_call;               /* True if the loop contains a call.  */
>> +  /* Maximal register pressure inside loop for given register class
>> +     (defined only for the pressure classes).  */
>> +  int max_reg_pressure[N_REG_CLASSES];
>> +  /* Loop regs referenced and live pseudo-registers.  */
>> +  bitmap_head regs_ref;
>> +  bitmap_head regs_live;
>> +};
>> +
>> +#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
>> +
>> +/* Record all regs that are set in any one insn.  Communication from
>> +   mark_reg_{store,clobber} and global_conflicts.  Asm can refer to
>> +   all hard-registers.  */
>> +static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
>> +                    ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
>> +/* Number of regs stored in the previous array.  */
>> +static int n_regs_set;
>> +
>> +/* Currently processed loop.  */
>> +static class loop *curr_loop;
>> +
>> +/* Registers currently living.  */
>> +static bitmap_head curr_regs_live;
>> +
>> +/* Current reg pressure for each pressure class.  */
>> +static int curr_reg_pressure[N_REG_CLASSES];
>> +
>>  /* Information about induction variables to split.  */
>>
>>  struct iv_to_split
>> @@ -272,11 +309,262 @@ decide_unrolling (int flags)
>>      }
>>  }
>>
>> +/* Return pressure class and number of needed hard registers (through
>> +   *NREGS) of register REGNO.  */
>> +static enum reg_class
>> +get_regno_pressure_class (int regno, int *nregs)
>> +{
>> +  if (regno >= FIRST_PSEUDO_REGISTER)
>> +    {
>> +      enum reg_class pressure_class;
>> +      pressure_class = reg_allocno_class (regno);
>> +      pressure_class = ira_pressure_class_translate[pressure_class];
>> +      *nregs
>> +       = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
>> +      return pressure_class;
>> +    }
>> +  else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
>> +          && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
>> +    {
>> +      *nregs = 1;
>> +      return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
>> +    }
>> +  else
>> +    {
>> +      *nregs = 0;
>> +      return NO_REGS;
>> +    }
>> +}
>> +
>> +/* Increase (if INCR_P) or decrease current register pressure for
>> +   register REGNO.  */
>> +static void
>> +change_pressure (int regno, bool incr_p)
>> +{
>> +  int nregs;
>> +  enum reg_class pressure_class;
>> +
>> +  pressure_class = get_regno_pressure_class (regno, &nregs);
>> +  if (! incr_p)
>> +    curr_reg_pressure[pressure_class] -= nregs;
>> +  else
>> +    {
>> +      curr_reg_pressure[pressure_class] += nregs;
>> +      if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
>> +         < curr_reg_pressure[pressure_class])
>> +       LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
>> +         = curr_reg_pressure[pressure_class];
>> +    }
>> +}
>> +
>> +/* Mark REGNO birth.  */
>> +static void
>> +mark_regno_live (int regno)
>> +{
>> +  class loop *loop;
>> +
>> +  for (loop = curr_loop;
>> +       loop != current_loops->tree_root;
>> +       loop = loop_outer (loop))
>> +    bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
>> +  if (!bitmap_set_bit (&curr_regs_live, regno))
>> +    return;
>> +  change_pressure (regno, true);
>> +}
>> +
>> +/* Mark REGNO death.  */
>> +static void
>> +mark_regno_death (int regno)
>> +{
>> +  if (! bitmap_clear_bit (&curr_regs_live, regno))
>> +    return;
>> +  change_pressure (regno, false);
>> +}
>> +
>> +/* Mark setting register REG.  */
>> +static void
>> +mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
>> +               void *data ATTRIBUTE_UNUSED)
>> +{
>> +  if (GET_CODE (reg) == SUBREG)
>> +    reg = SUBREG_REG (reg);
>> +
>> +  if (! REG_P (reg))
>> +    return;
>> +
>> +  regs_set[n_regs_set++] = reg;
>> +
>> +  unsigned int end_regno = END_REGNO (reg);
>> +  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
>> +    mark_regno_live (regno);
>> +}
>> +
>> +/* Mark clobbering register REG.  */
>> +static void
>> +mark_reg_clobber (rtx reg, const_rtx setter, void *data)
>> +{
>> +  if (GET_CODE (setter) == CLOBBER)
>> +    mark_reg_store (reg, setter, data);
>> +}
>> +
>> +/* Mark register REG death.  */
>> +static void
>> +mark_reg_death (rtx reg)
>> +{
>> +  unsigned int end_regno = END_REGNO (reg);
>> +  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
>> +    mark_regno_death (regno);
>> +}
>> +
>> +/* Mark occurrence of registers in X for the current loop.  */
>> +static void
>> +mark_ref_regs (rtx x)
>> +{
>> +  RTX_CODE code;
>> +  int i;
>> +  const char *fmt;
>> +
>> +  if (!x)
>> +    return;
>> +
>> +  code = GET_CODE (x);
>> +  if (code == REG)
>> +    {
>> +      class loop *loop;
>> +
>> +      for (loop = curr_loop;
>> +          loop != current_loops->tree_root;
>> +          loop = loop_outer (loop))
>> +       bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
>> +      return;
>> +    }
>> +
>> +  fmt = GET_RTX_FORMAT (code);
>> +  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
>> +    if (fmt[i] == 'e')
>> +      mark_ref_regs (XEXP (x, i));
>> +    else if (fmt[i] == 'E')
>> +      {
>> +       int j;
>> +
>> +       for (j = 0; j < XVECLEN (x, i); j++)
>> +         mark_ref_regs (XVECEXP (x, i, j));
>> +      }
>> +}
>> +
>> +/* Calculate register pressure in the loops.  */
>> +static void
>> +calculate_loop_reg_pressure (void)
>> +{
>> +  int i;
>> +  unsigned int j;
>> +  bitmap_iterator bi;
>> +  basic_block bb;
>> +  rtx_insn *insn;
>> +  rtx link;
>> +  class loop *parent;
>> +
>> +  for (auto loop : loops_list (cfun, 0))
>> +    if (loop->aux == NULL)
>> +      {
>> +       loop->aux = xcalloc (1, sizeof (class loop_data));
>> +       bitmap_initialize (&LOOP_DATA (loop)->regs_ref, &reg_obstack);
>> +       bitmap_initialize (&LOOP_DATA (loop)->regs_live, &reg_obstack);
>> +      }
>> +  ira_setup_eliminable_regset ();
>> +  bitmap_initialize (&curr_regs_live, &reg_obstack);
>> +  FOR_EACH_BB_FN (bb, cfun)
>> +    {
>> +      curr_loop = bb->loop_father;
>> +      if (curr_loop == current_loops->tree_root)
>> +       continue;
>> +
>> +      for (class loop *loop = curr_loop;
>> +          loop != current_loops->tree_root;
>> +          loop = loop_outer (loop))
>> +       bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
>> +
>> +      bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
>> +      for (i = 0; i < ira_pressure_classes_num; i++)
>> +       curr_reg_pressure[ira_pressure_classes[i]] = 0;
>> +      EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
>> +       change_pressure (j, true);
>> +
>> +      FOR_BB_INSNS (bb, insn)
>> +       {
>> +         if (! NONDEBUG_INSN_P (insn))
>> +           continue;
>> +
>> +         mark_ref_regs (PATTERN (insn));
>> +         n_regs_set = 0;
>> +         note_stores (insn, mark_reg_clobber, NULL);
>> +
>> +         /* Mark any registers dead after INSN as dead now.  */
>> +
>> +         for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
>> +           if (REG_NOTE_KIND (link) == REG_DEAD)
>> +             mark_reg_death (XEXP (link, 0));
>> +
>> +         /* Mark any registers set in INSN as live,
>> +            and mark them as conflicting with all other live regs.
>> +            Clobbers are processed again, so they conflict with
>> +            the registers that are set.  */
>> +
>> +         note_stores (insn, mark_reg_store, NULL);
>> +
>> +         if (AUTO_INC_DEC)
>> +           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
>> +             if (REG_NOTE_KIND (link) == REG_INC)
>> +               mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
>> +
>> +         while (n_regs_set-- > 0)
>> +           {
>> +             rtx note = find_regno_note (insn, REG_UNUSED,
>> +                                         REGNO (regs_set[n_regs_set]));
>> +             if (! note)
>> +               continue;
>> +
>> +             mark_reg_death (XEXP (note, 0));
>> +           }
>> +       }
>> +    }
>> +  bitmap_release (&curr_regs_live);
>> +  if (dump_file == NULL)
>> +    return;
>> +  for (auto loop : loops_list (cfun, 0))
>> +    {
>> +      parent = loop_outer (loop);
>> +      fprintf (dump_file, "\n  Loop %d (parent %d, header bb%d, depth %d)\n",
>> +              loop->num, (parent == NULL ? -1 : parent->num),
>> +              loop->header->index, loop_depth (loop));
>> +      fprintf (dump_file, "\n    ref. regnos:");
>> +      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
>> +       fprintf (dump_file, " %d", j);
>> +      fprintf (dump_file, "\n    live regnos:");
>> +      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
>> +       fprintf (dump_file, " %d", j);
>> +      fprintf (dump_file, "\n    Pressure:");
>> +      for (i = 0; (int) i < ira_pressure_classes_num; i++)
>> +       {
>> +         enum reg_class pressure_class;
>> +
>> +         pressure_class = ira_pressure_classes[i];
>> +         if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
>> +           continue;
>> +         fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
>> +                  LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
>> +       }
>> +      fprintf (dump_file, "\n");
>> +    }
>> +}
>> +
>>  /* Unroll LOOPS.  */
>>  void
>>  unroll_loops (int flags)
>>  {
>>    bool changed = false;
>> +  df_analyze ();
>> +  calculate_loop_reg_pressure ();
>>
>>    /* Now decide rest of unrolling.  */
>>    decide_unrolling (flags);
>> @@ -339,6 +627,40 @@ loop_exit_at_end_p (class loop *loop)
>>    return true;
>>  }
>>
>> +/* Determine unroll factor for a given LOOP.  Unroll factor
>> +   is calculated with quotient of maximum number of available registers
>> +   with number of liveness.
>> +   If available registers is more unrolling factor is more.  And if
>> +   number of liveness is more unroll factor is less.  */
>> +int
>> +determine_unroll_factor (class loop  *loop)
>> +{
>> +  int nunroll = 0;
>> +  int avail = 0;
>> +  int max_pressure = 0;
>> +  int num_live = 0;
>> +  for (int i = 0; (int) i < ira_pressure_classes_num; i++)
>> +    {
>> +      enum reg_class pressure_class;
>> +
>> +      pressure_class = ira_pressure_classes[i];
>> +      max_pressure = LOOP_DATA (loop)->max_reg_pressure[pressure_class];
>> +      if (max_pressure == 0)
>> +       continue;
>> +
>> +      if (((ira_class_hard_regs_num[pressure_class] - max_pressure) > avail)
>> +         || avail == 0)
>> +       avail = ira_class_hard_regs_num[pressure_class] - max_pressure;
>> +
>> +      num_live++;
>> +    }
>> +
>> +  if (avail > 0 && num_live > 0)
>> +    nunroll = avail / num_live;
>> +
>> +  return nunroll;
>> +}
>> +
>>  /* Decide whether to unroll LOOP iterating constant number of times
>>     and how much.  */
>>
>> @@ -357,10 +679,11 @@ decide_unroll_constant_iterations (class loop *loop, int flags)
>>      dump_printf (MSG_NOTE,
>>                  "considering unrolling loop with constant "
>>                  "number of iterations\n");
>> -
>>    /* nunroll = total number of copies of the original loop body in
>>       unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
>> -  nunroll = param_max_unrolled_insns / loop->ninsns;
>> +  nunroll = determine_unroll_factor (loop);
>> +  if (nunroll <= 0)
>> +    nunroll = param_max_unrolled_insns / loop->ninsns;
>>    nunroll_by_av
>>      = param_max_average_unrolled_insns / loop->av_ninsns;
>>    if (nunroll > nunroll_by_av)
>> @@ -678,7 +1001,9 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
>>
>>    /* nunroll = total number of copies of the original loop body in
>>       unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
>> -  nunroll = param_max_unrolled_insns / loop->ninsns;
>> +  nunroll = determine_unroll_factor (loop);
>> +  if (nunroll <= 0)
>> +    nunroll = param_max_unrolled_insns / loop->ninsns;
>>    nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
>>    if (nunroll > nunroll_by_av)
>>      nunroll = nunroll_by_av;
>> --
>> 2.43.5
>>
diff mbox series

Patch

diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
index bfdfe6c2bb7..6936ba7afb9 100644
--- a/gcc/loop-unroll.cc
+++ b/gcc/loop-unroll.cc
@@ -35,6 +35,11 @@  along with GCC; see the file COPYING3.  If not see
 #include "dojump.h"
 #include "expr.h"
 #include "dumpfile.h"
+#include "regs.h"
+#include "ira.h"
+#include "rtl-iter.h"
+#include "regset.h"
+#include "df.h"
 
 /* This pass performs loop unrolling.  We only perform this
    optimization on innermost loops (with single exception) because
@@ -65,6 +70,38 @@  along with GCC; see the file COPYING3.  If not see
    showed that this choice may affect performance in order of several %.
    */
 
+class loop_data
+{
+public:
+  class loop *outermost_exit;	/* The outermost exit of the loop.  */
+  bool has_call;		/* True if the loop contains a call.  */
+  /* Maximal register pressure inside loop for given register class
+     (defined only for the pressure classes).  */
+  int max_reg_pressure[N_REG_CLASSES];
+  /* Loop regs referenced and live pseudo-registers.  */
+  bitmap_head regs_ref;
+  bitmap_head regs_live;
+};
+
+#define LOOP_DATA(LOOP) ((class loop_data *) (LOOP)->aux)
+
+/* Record all regs that are set in any one insn.  Communication from
+   mark_reg_{store,clobber} and global_conflicts.  Asm can refer to
+   all hard-registers.  */
+static rtx regs_set[(FIRST_PSEUDO_REGISTER > MAX_RECOG_OPERANDS
+		     ? FIRST_PSEUDO_REGISTER : MAX_RECOG_OPERANDS) * 2];
+/* Number of regs stored in the previous array.  */
+static int n_regs_set;
+
+/* Currently processed loop.  */
+static class loop *curr_loop;
+
+/* Registers currently living.  */
+static bitmap_head curr_regs_live;
+
+/* Current reg pressure for each pressure class.  */
+static int curr_reg_pressure[N_REG_CLASSES];
+
 /* Information about induction variables to split.  */
 
 struct iv_to_split
@@ -272,11 +309,262 @@  decide_unrolling (int flags)
     }
 }
 
+/* Return pressure class and number of needed hard registers (through
+   *NREGS) of register REGNO.  */
+static enum reg_class
+get_regno_pressure_class (int regno, int *nregs)
+{
+  if (regno >= FIRST_PSEUDO_REGISTER)
+    {
+      enum reg_class pressure_class;
+      pressure_class = reg_allocno_class (regno);
+      pressure_class = ira_pressure_class_translate[pressure_class];
+      *nregs
+	= ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
+      return pressure_class;
+    }
+  else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
+	   && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
+    {
+      *nregs = 1;
+      return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
+    }
+  else
+    {
+      *nregs = 0;
+      return NO_REGS;
+    }
+}
+
+/* Increase (if INCR_P) or decrease current register pressure for
+   register REGNO.  */
+static void
+change_pressure (int regno, bool incr_p)
+{
+  int nregs;
+  enum reg_class pressure_class;
+
+  pressure_class = get_regno_pressure_class (regno, &nregs);
+  if (! incr_p)
+    curr_reg_pressure[pressure_class] -= nregs;
+  else
+    {
+      curr_reg_pressure[pressure_class] += nregs;
+      if (LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
+	  < curr_reg_pressure[pressure_class])
+	LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class]
+	  = curr_reg_pressure[pressure_class];
+    }
+}
+
+/* Mark REGNO birth.  */
+static void
+mark_regno_live (int regno)
+{
+  class loop *loop;
+
+  for (loop = curr_loop;
+       loop != current_loops->tree_root;
+       loop = loop_outer (loop))
+    bitmap_set_bit (&LOOP_DATA (loop)->regs_live, regno);
+  if (!bitmap_set_bit (&curr_regs_live, regno))
+    return;
+  change_pressure (regno, true);
+}
+
+/* Mark REGNO death.  */
+static void
+mark_regno_death (int regno)
+{
+  if (! bitmap_clear_bit (&curr_regs_live, regno))
+    return;
+  change_pressure (regno, false);
+}
+
+/* Mark setting register REG.  */
+static void
+mark_reg_store (rtx reg, const_rtx setter ATTRIBUTE_UNUSED,
+		void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  if (! REG_P (reg))
+    return;
+
+  regs_set[n_regs_set++] = reg;
+
+  unsigned int end_regno = END_REGNO (reg);
+  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+    mark_regno_live (regno);
+}
+
+/* Mark clobbering register REG.  */
+static void
+mark_reg_clobber (rtx reg, const_rtx setter, void *data)
+{
+  if (GET_CODE (setter) == CLOBBER)
+    mark_reg_store (reg, setter, data);
+}
+
+/* Mark register REG death.  */
+static void
+mark_reg_death (rtx reg)
+{
+  unsigned int end_regno = END_REGNO (reg);
+  for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
+    mark_regno_death (regno);
+}
+
+/* Mark occurrence of registers in X for the current loop.  */
+static void
+mark_ref_regs (rtx x)
+{
+  RTX_CODE code;
+  int i;
+  const char *fmt;
+
+  if (!x)
+    return;
+
+  code = GET_CODE (x);
+  if (code == REG)
+    {
+      class loop *loop;
+
+      for (loop = curr_loop;
+	   loop != current_loops->tree_root;
+	   loop = loop_outer (loop))
+	bitmap_set_bit (&LOOP_DATA (loop)->regs_ref, REGNO (x));
+      return;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    if (fmt[i] == 'e')
+      mark_ref_regs (XEXP (x, i));
+    else if (fmt[i] == 'E')
+      {
+	int j;
+
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  mark_ref_regs (XVECEXP (x, i, j));
+      }
+}
+
+/* Calculate register pressure in the loops.  */
+static void
+calculate_loop_reg_pressure (void)
+{
+  int i;
+  unsigned int j;
+  bitmap_iterator bi;
+  basic_block bb;
+  rtx_insn *insn;
+  rtx link;
+  class loop *parent;
+
+  for (auto loop : loops_list (cfun, 0))
+    if (loop->aux == NULL)
+      {
+	loop->aux = xcalloc (1, sizeof (class loop_data));
+	bitmap_initialize (&LOOP_DATA (loop)->regs_ref, &reg_obstack);
+	bitmap_initialize (&LOOP_DATA (loop)->regs_live, &reg_obstack);
+      }
+  ira_setup_eliminable_regset ();
+  bitmap_initialize (&curr_regs_live, &reg_obstack);
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      curr_loop = bb->loop_father;
+      if (curr_loop == current_loops->tree_root)
+	continue;
+
+      for (class loop *loop = curr_loop;
+	   loop != current_loops->tree_root;
+	   loop = loop_outer (loop))
+	bitmap_ior_into (&LOOP_DATA (loop)->regs_live, DF_LR_IN (bb));
+
+      bitmap_copy (&curr_regs_live, DF_LR_IN (bb));
+      for (i = 0; i < ira_pressure_classes_num; i++)
+	curr_reg_pressure[ira_pressure_classes[i]] = 0;
+      EXECUTE_IF_SET_IN_BITMAP (&curr_regs_live, 0, j, bi)
+	change_pressure (j, true);
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (! NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  mark_ref_regs (PATTERN (insn));
+	  n_regs_set = 0;
+	  note_stores (insn, mark_reg_clobber, NULL);
+
+	  /* Mark any registers dead after INSN as dead now.  */
+
+	  for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+	    if (REG_NOTE_KIND (link) == REG_DEAD)
+	      mark_reg_death (XEXP (link, 0));
+
+	  /* Mark any registers set in INSN as live,
+	     and mark them as conflicting with all other live regs.
+	     Clobbers are processed again, so they conflict with
+	     the registers that are set.  */
+
+	  note_stores (insn, mark_reg_store, NULL);
+
+	  if (AUTO_INC_DEC)
+	    for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+	      if (REG_NOTE_KIND (link) == REG_INC)
+		mark_reg_store (XEXP (link, 0), NULL_RTX, NULL);
+
+	  while (n_regs_set-- > 0)
+	    {
+	      rtx note = find_regno_note (insn, REG_UNUSED,
+					  REGNO (regs_set[n_regs_set]));
+	      if (! note)
+		continue;
+
+	      mark_reg_death (XEXP (note, 0));
+	    }
+	}
+    }
+  bitmap_release (&curr_regs_live);
+  if (dump_file == NULL)
+    return;
+  for (auto loop : loops_list (cfun, 0))
+    {
+      parent = loop_outer (loop);
+      fprintf (dump_file, "\n  Loop %d (parent %d, header bb%d, depth %d)\n",
+	       loop->num, (parent == NULL ? -1 : parent->num),
+	       loop->header->index, loop_depth (loop));
+      fprintf (dump_file, "\n    ref. regnos:");
+      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_ref, 0, j, bi)
+	fprintf (dump_file, " %d", j);
+      fprintf (dump_file, "\n    live regnos:");
+      EXECUTE_IF_SET_IN_BITMAP (&LOOP_DATA (loop)->regs_live, 0, j, bi)
+	fprintf (dump_file, " %d", j);
+      fprintf (dump_file, "\n    Pressure:");
+      for (i = 0; (int) i < ira_pressure_classes_num; i++)
+	{
+	  enum reg_class pressure_class;
+
+	  pressure_class = ira_pressure_classes[i];
+	  if (LOOP_DATA (loop)->max_reg_pressure[pressure_class] == 0)
+	    continue;
+	  fprintf (dump_file, " %s=%d", reg_class_names[pressure_class],
+		   LOOP_DATA (loop)->max_reg_pressure[pressure_class]);
+	}
+      fprintf (dump_file, "\n");
+    }
+}
+
 /* Unroll LOOPS.  */
 void
 unroll_loops (int flags)
 {
   bool changed = false;
+  df_analyze ();
+  calculate_loop_reg_pressure ();
 
   /* Now decide rest of unrolling.  */
   decide_unrolling (flags);
@@ -339,6 +627,40 @@  loop_exit_at_end_p (class loop *loop)
   return true;
 }
 
+/* Determine unroll factor for a given LOOP.  Unroll factor
+   is calculated with quotient of maximum number of available registers
+   with number of liveness.
+   If available registers is more unrolling factor is more.  And if
+   number of liveness is more unroll factor is less.  */
+int
+determine_unroll_factor (class loop  *loop)
+{
+  int nunroll = 0;
+  int avail = 0;
+  int max_pressure = 0;
+  int num_live = 0;
+  for (int i = 0; (int) i < ira_pressure_classes_num; i++)
+    {
+      enum reg_class pressure_class;
+
+      pressure_class = ira_pressure_classes[i];
+      max_pressure = LOOP_DATA (loop)->max_reg_pressure[pressure_class];
+      if (max_pressure == 0)
+	continue;
+
+      if (((ira_class_hard_regs_num[pressure_class] - max_pressure) > avail)
+	  || avail == 0)
+	avail = ira_class_hard_regs_num[pressure_class] - max_pressure;
+
+      num_live++;
+    }
+
+  if (avail > 0 && num_live > 0)
+    nunroll = avail / num_live;
+
+  return nunroll;
+}
+
 /* Decide whether to unroll LOOP iterating constant number of times
    and how much.  */
 
@@ -357,10 +679,11 @@  decide_unroll_constant_iterations (class loop *loop, int flags)
     dump_printf (MSG_NOTE,
 		 "considering unrolling loop with constant "
 		 "number of iterations\n");
-
   /* nunroll = total number of copies of the original loop body in
      unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
-  nunroll = param_max_unrolled_insns / loop->ninsns;
+  nunroll = determine_unroll_factor (loop);
+  if (nunroll <= 0)
+    nunroll = param_max_unrolled_insns / loop->ninsns;
   nunroll_by_av
     = param_max_average_unrolled_insns / loop->av_ninsns;
   if (nunroll > nunroll_by_av)
@@ -678,7 +1001,9 @@  decide_unroll_runtime_iterations (class loop *loop, int flags)
 
   /* nunroll = total number of copies of the original loop body in
      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
-  nunroll = param_max_unrolled_insns / loop->ninsns;
+  nunroll = determine_unroll_factor (loop);
+  if (nunroll <= 0)
+    nunroll = param_max_unrolled_insns / loop->ninsns;
   nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
   if (nunroll > nunroll_by_av)
     nunroll = nunroll_by_av;