diff mbox

[RFC] Preferred rename register in regrename pass

Message ID B5E67142681B53468FAF6B7C31356562442BE781@hhmail02.hh.imgtec.org
State New
Headers show

Commit Message

Robert Suchanek Nov. 9, 2015, 1:32 p.m. UTC
Hi Bernd,

Sorry for late reply.

The updated patch was bootstrapped on x86_64-unknown-linux-gnu and cross tested
on mips-img-linux-gnu using r229786.

The results below were generated for CSiBE benchmark and the numbers in
columns express bytes in format 'net (gain/loss)' to show the difference
with and without the patch when -frename-registers switch is used. 

I looked at the gains, especially for MIPS and 'teem', and it appears
that renaming registers affects the rtl_dce pass i.e. makes it less effective.
However, on average case the patch appears to reduce the code size
slightly and moves are genuinely removed.  

I haven't tested the performance extensively but the SPEC benchmarks
showed almost the same results, which could be just the noise. 


               | MIPS n64 -Os   | MIPS o32 -Os   | x86_64 -Os       |
---------------+----------------+----------------+------------------+
bzip2-1.0.2    | -32  (0/-32)   | -24  (0/-24)   | -34   (1/-35)    |
cg_compiler    | -172 (0/-172)  | -156 (0/-156)  | -46   (0/-46)    |
compiler       | -36  (0/-36)   | -24  (0/-24)   | -6    (0/-6)     |
flex-2.5.31    | -68  (0/-68)   | -80  (0/-80)   | -98   (7/-105)   |
jikespg-1.3    | -284 (0/-284)  | -204 (0/-204)  | -127  (9/-136)   |
jpeg-6b        | -52  (8/-60)   | -20  (0/-20)   | -80   (11/-91)   |
libmspack      | -136 (0/-136)  | -28  (0/-28)   | -33   (23/-56)   |
libpng-1.2.5   | -72  (0/-72)   | -64  (0/-64)   | -176  (14/-190)  |
linux-2.4.23   | -700 (20/-720) | -384 (0/-384)  | -691  (44/-735)  |
lwip-0.5.3     | -4   (0/-4)    | -4   (0/-4)    | +4    (13/-9)    |
mpeg2dec-0.3.1 | -16  (0/-16)   |                | -142  (6/-148)   |
mpgcut-1.1     | -24  (0/-24)   | -12  (4/-16)   | -2    (0/-2)     |
OpenTCP-1.0.4  | -28  (0/-28)   | -12  (0/-12)   | -1    (0/-1)     |
replaypc-0.4.0 | -32  (0/-32)   | -12  (0/-12)   | -4    (2/-6)     |
teem-1.6.0     | -88  (480/-568)| +108 (564/-456)| -1272 (117/-1389)|
ttt-0.10.1     | -24  (0/-24)   | -20  (0/-20)   | -16   (0/-16)    |
unrarlib-0.4.0 | -20  (0/-20)   | -8   (0/-8)    | -59   (9/-68)    |
zlib-1.1.4     | -12  (0/-12)   | -4   (0/-4)    | -23   (8/-31)    |


               | MIPS n64 -O2   | MIPS o32 -O2   | x86_64 -O2       |
---------------+----------------+----------------+------------------+
bzip2-1.0.2    | -104 (0/-104)  | -48  (0/-48)   | -55   (0/-55)    |
cg_compiler    | -184 (4/-188)  | -232 (0/-232)  | -31   (5/-36)    |
compiler       | -32  (0/-32)   | -12  (0/-12)   | -4    (1/-5)     |
flex-2.5.31    | -96  (0/-96)   | -112 (0/-112)  | -12   (34/-46)   |
jikespg-1.3    | -540 (20/-560) | -476 (4/-480)  | -154  (30/-184)  |
jpeg-6b        | -112 (16/-128) | -60  (0/-60)   | -136  (84/-220)  |
libmspack      | -164 (0/-164)  | -40  (0/-40)   | -87   (32/-119)  |
libpng-1.2.5   | -120 (8/-128)  | -92  (4/-96)   | -140  (53/-193)  |
linux-2.4.23   | -596 (12/-608) | -320 (8/-328)  | -794  (285/-1079)|
lwip-0.5.3     | -8   (0/-8)    | -8   (0/-8)    | +2    (4/-2)     |
mpeg2dec-0.3.1 | -44  (0/-44)   | -4   (0/-4)    | -122  (8/-130)   |
mpgcut-1.1     | -8   (0/-8)    | -8   (0/-8)    | +28   (32/-4)    |
OpenTCP-1.0.4  | -4   (0/-4)    | -4   (0/-4)    | -2    (0/-2)     |
replaypc-0.4.0 | -20  (0/-20)   | -24  (0/-24)   | -13   (0/-13)    |
teem-1.6.0     | +100 (740/-640)| +84  (736/-652)| -1998 (168/-2166)|
ttt-0.10.1     | -16  (0/-16)   |                |                  |
unrarlib-0.4.0 | -16  (0/-16)   | -8   (0/-8)    | +19   (37/-18)   |
zlib-1.1.4     | -12  (0/-12)   | -4   (0/-4)    | -15   (1/-16)    |

Regards,
Robert

> Hi Robert,
> > gcc/
> > 	* regrename.c (create_new_chain): Initialize terminated_dead,
> > 	renamed and tied_chain.
> > 	(find_best_rename_reg): Pick and check register from the tied chain.
> > 	(regrename_do_replace): Mark head as renamed.
> > 	(scan_rtx_reg): Tie chains in move insns.  Set terminate_dead flag.
> > 	* regrename.h (struct du_head): Add tied_chain, renamed and
> > 	terminated_dead members.
> 
> Thanks - this looks a lot better already. You didn't say how it was
> bootstrapped and tested; please include this information for future
> submissions. For a patch like this, some data on the improvement you got
> would also be appreciated.
> 
> I'd still like to investigate the possibility of further simplification:
> 
> > +	    {
> > +	      /* Find the input chain.  */
> > +	      for (i = c->id - 1; id_to_chain.iterate (i, &head); i--)
> > +		if (head->last && head->last->insn == insn
> > +		    && head->terminated_dead)
> > +		  {
> > +		    gcc_assert (head->regno == REGNO (recog_data.operand[1]));
> > +		    c->tied_chain = head;
> > +		    head->tied_chain = c;
> > +
> > +		    if (dump_file)
> > +		      fprintf (dump_file, "Tying chain %s (%d) with %s (%d)\n",
> > +			       reg_names[c->regno], c->id,
> > +			       reg_names[head->regno], head->id);
> > +		    /* Once tied, we're done.  */
> > +		    break;
> > +		  }
> > +	    }
> > +	}
> > +
> This looks like it's a little more complicated than necessary. Couldn't
> you add a static var "terminated_this_insn" which gets initialized to
> NULL and set when a reg dies, and then you check this here rather than
> having a loop? That would also eliminate the new "terminated_dead" field.
> 
> Other than that I'm pretty happy with this.
> 
> 
> Bernd

gcc/
	* regrename.c (create_new_chain): Initialize renamed and tied_chain.
	(build_def_use): Initialize terminated_this_insn.
	(find_best_rename_reg): Pick and check register from the tied chain.
	(regrename_do_replace): Mark head as renamed.
	(struct du_head *terminated_this_insn). New static variable.
	(scan_rtx_reg): Tie chains in move insns.  Set terminated_this_insn.
	* regrename.h (struct du_head): Add tied_chain, renamed members.
---
 gcc/regrename.c | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 gcc/regrename.h |  4 ++++
 2 files changed, 49 insertions(+), 1 deletion(-)

Comments

Bernd Schmidt Nov. 9, 2015, 4:30 p.m. UTC | #1
On 11/09/2015 02:32 PM, Robert Suchanek wrote:
> The results below were generated for CSiBE benchmark and the numbers in
> columns express bytes in format 'net (gain/loss)' to show the difference
> with and without the patch when -frename-registers switch is used.

I'm not entirely sure what the numbers represent. I can see how you'd 
measure at a net size change (I assume a negative net is the intended 
goal), but how did you arrive at gain/loss numbers?

In any case, assuming negative is good, the results seem pretty decent.

> +	      gcc_assert
> +		(terminated_this_insn->regno == REGNO (recog_data.operand[1]));

Maybe break the line before the == so that you can start the arguments 
on the same line as the assert.

> +  /* Nonzero if the chain is renamed.  */
> +  unsigned int renamed:1;

I'd write "has already been renamed" since that is maybe slightly less 
ambiguous.

Ok with those changes.


Bernd
Robert Suchanek Nov. 9, 2015, 5:01 p.m. UTC | #2
Hi,
 
> On 11/09/2015 02:32 PM, Robert Suchanek wrote:
> > The results below were generated for CSiBE benchmark and the numbers in
> > columns express bytes in format 'net (gain/loss)' to show the difference
> > with and without the patch when -frename-registers switch is used.
> 
> I'm not entirely sure what the numbers represent. I can see how you'd
> measure at a net size change (I assume a negative net is the intended
> goal), but how did you arrive at gain/loss numbers?
> 
> In any case, assuming negative is good, the results seem pretty decent.

The gain/loss was calculated based on per function analysis.
Each flavour e.g. MIPS n64 -Os was ran with/without the patch and compared to
the base i.e. without the patch. The patched version of each function may
show either positive (larger code size), negative or no difference to
the code size. The gain/loss in a cell is the sum of all positive/negative
numbers for a test. The negatives, as you said, are the good ones.

> 
> > +	      gcc_assert
> > +		(terminated_this_insn->regno == REGNO (recog_data.operand[1]));
> 
> Maybe break the line before the == so that you can start the arguments
> on the same line as the assert.
> 
> > +  /* Nonzero if the chain is renamed.  */
> > +  unsigned int renamed:1;
> 
> I'd write "has already been renamed" since that is maybe slightly less
> ambiguous.
> 
> Ok with those changes.
> 
> 
> Bernd

Will do the changes and apply.

Regards,
Robert
Christophe Lyon Nov. 10, 2015, 11:21 a.m. UTC | #3
On 9 November 2015 at 18:01, Robert Suchanek <Robert.Suchanek@imgtec.com> wrote:
> Hi,
>
>> On 11/09/2015 02:32 PM, Robert Suchanek wrote:
>> > The results below were generated for CSiBE benchmark and the numbers in
>> > columns express bytes in format 'net (gain/loss)' to show the difference
>> > with and without the patch when -frename-registers switch is used.
>>
>> I'm not entirely sure what the numbers represent. I can see how you'd
>> measure at a net size change (I assume a negative net is the intended
>> goal), but how did you arrive at gain/loss numbers?
>>
>> In any case, assuming negative is good, the results seem pretty decent.
>
> The gain/loss was calculated based on per function analysis.
> Each flavour e.g. MIPS n64 -Os was ran with/without the patch and compared to
> the base i.e. without the patch. The patched version of each function may
> show either positive (larger code size), negative or no difference to
> the code size. The gain/loss in a cell is the sum of all positive/negative
> numbers for a test. The negatives, as you said, are the good ones.
>
>>
>> > +         gcc_assert
>> > +           (terminated_this_insn->regno == REGNO (recog_data.operand[1]));
>>
>> Maybe break the line before the == so that you can start the arguments
>> on the same line as the assert.
>>
>> > +  /* Nonzero if the chain is renamed.  */
>> > +  unsigned int renamed:1;
>>
>> I'd write "has already been renamed" since that is maybe slightly less
>> ambiguous.
>>
>> Ok with those changes.
>>
>>
>> Bernd
>
> Will do the changes and apply.
>

Hi,

Since you committed this (r230087 if I'm correct), I can see that GCC
fails to build
ligfortran for target arm-none-linuxgnueabi --with-cpu=cortex-a9.

The backtrace is:
/tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgfortran/generated/matmul_i8.c:
In function
 'matmul_i8':
/tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libgfortran/generated/matmul_i8.c:374:1:
internal compiler error: in scan_rtx_reg, at regrename.c:1074
 }
 ^
0xa13940 scan_rtx_reg
        /tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/regrename.c:1074
0xa1451d record_out_operands
        /tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/regrename.c:1554
0xa14d12 build_def_use
        /tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/regrename.c:1802
0xa1533e regrename_analyze(bitmap_head*)
        /tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/regrename.c:726
0xa161f9 regrename_optimize
        /tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/regrename.c:1871
0xa161f9 execute
        /tmp/8079076_3.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/regrename.c:1908
Please submit a full bug report,

Can you have a look?


> Regards,
> Robert
>
Robert Suchanek Nov. 10, 2015, 11:41 a.m. UTC | #4
Hi Christophe,

> Hi,

> 

> Since you committed this (r230087 if I'm correct), I can see that GCC

> fails to build

> ligfortran for target arm-none-linuxgnueabi --with-cpu=cortex-a9.

...
> 

> Can you have a look?


Sorry for the breakage. I see that my assertion is being triggered.
I'll investigate this and check whether the assertion is correct or
something else needs to be done.

Robert
Bernd Schmidt Nov. 10, 2015, 12:10 p.m. UTC | #5
On 11/09/2015 02:32 PM, Robert Suchanek wrote:
> @@ -1707,6 +1749,8 @@ build_def_use (basic_block bb)
>   	      scan_rtx (insn, &XEXP (note, 0), ALL_REGS, mark_read,
>   			OP_INOUT);
>
> +	  terminated_this_insn = NULL;
> +
>   	  /* Step 4: Close chains for registers that die here, unless
>   	     the register is mentioned in a REG_UNUSED note.  In that
>   	     case we keep the chain open until step #7 below to ensure

I suspect you'll want to move this earlier, just before step 1. My guess 
would be that the reported failure was for an earlyclobber operand.


Bernd
Christophe Lyon Nov. 10, 2015, 4:22 p.m. UTC | #6
On 10 November 2015 at 12:41, Robert Suchanek
<Robert.Suchanek@imgtec.com> wrote:
> Hi Christophe,
>
>> Hi,
>>
>> Since you committed this (r230087 if I'm correct), I can see that GCC
>> fails to build
>> ligfortran for target arm-none-linuxgnueabi --with-cpu=cortex-a9.
> ...
>>
>> Can you have a look?
>
> Sorry for the breakage. I see that my assertion is being triggered.
> I'll investigate this and check whether the assertion is correct or
> something else needs to be done.
>

Now that 'make check' has had enough time to run, I can see several
regressions in the configurations where GCC still builds.
For more details:
http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/230087/report-build-info.html


> Robert
James Greenhalgh Nov. 10, 2015, 5:43 p.m. UTC | #7
On Tue, Nov 10, 2015 at 05:22:40PM +0100, Christophe Lyon wrote:
> On 10 November 2015 at 12:41, Robert Suchanek
> <Robert.Suchanek@imgtec.com> wrote:
> > Hi Christophe,
> >
> >> Hi,
> >>
> >> Since you committed this (r230087 if I'm correct), I can see that GCC
> >> fails to build
> >> ligfortran for target arm-none-linuxgnueabi --with-cpu=cortex-a9.
> > ...
> >>
> >> Can you have a look?
> >
> > Sorry for the breakage. I see that my assertion is being triggered.
> > I'll investigate this and check whether the assertion is correct or
> > something else needs to be done.
> >
> 
> Now that 'make check' has had enough time to run, I can see several
> regressions in the configurations where GCC still builds.
> For more details:
> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/230087/report-build-info.html
> 

This also causes failures for AArch64 -mcpu=cortex-a57 targets. This
testcase:

  void
  foo (unsigned char *out, const unsigned char *in, int a)
  {
    for (int i = 0; i < a; i++)
      {
        out[0] = in[2];
        out[1] = in[1];
        out[2] = in[0];
        in += 3;
        out += 3;
      }
  }

Fails as so:

  foo.c: In function 'void foo(unsigned char*, const unsigned char*, int)':
  foo.c:12:1: internal compiler error: in scan_rtx_reg, at regrename.c:1074
   }
   ^

  0xbe00f8 scan_rtx_reg
	..../gcc/regrename.c:1073
  0xbe0ad5 scan_rtx
	..../gcc/regrename.c:1401
  0xbe1038 record_out_operands
	..../gcc/regrename.c:1554
  0xbe1f50 build_def_use
	..../gcc/regrename.c:1802
  0xbe1f50 regrename_analyze(bitmap_head*)
	..../gcc/regrename.c:726
  0xf7a0c7 func_fma_steering::execute_fma_steering()
	..../gcc/config/aarch64/cortex-a57-fma-steering.c:1026
  0xf7a9c1 pass_fma_steering::execute(function*)
	..../gcc/config/aarch64/cortex-a57-fma-steering.c:1063
  Please submit a full bug report,
  with preprocessed source if appropriate.
  Please include the complete backtrace with any bug report.
  See <http://gcc.gnu.org/bugs.html> for instructions.

When compiled with:

  <gcc-aarch64> -O3 -mcpu=cortex-a57 foo.c

Thanks,
James
diff mbox

Patch

diff --git a/gcc/regrename.c b/gcc/regrename.c
index 5f383fc..d3f9951 100644
--- a/gcc/regrename.c
+++ b/gcc/regrename.c
@@ -130,6 +130,9 @@  static HARD_REG_SET live_hard_regs;
    record_operand_use.  */
 static operand_rr_info *cur_operand;
 
+/* Set while scanning RTL if a register dies.  Used to tie chains.  */
+static struct du_head *terminated_this_insn;
+
 /* Return the chain corresponding to id number ID.  Take into account that
    chains may have been merged.  */
 du_head_p
@@ -224,6 +227,8 @@  create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
   head->nregs = this_nregs;
   head->need_caller_save_reg = 0;
   head->cannot_rename = 0;
+  head->renamed = 0;
+  head->tied_chain = NULL;
 
   id_to_chain.safe_push (head);
   head->id = current_id++;
@@ -366,6 +371,13 @@  find_rename_reg (du_head_p this_head, enum reg_class super_class,
   preferred_class
     = (enum reg_class) targetm.preferred_rename_class (super_class);
 
+  /* Pick and check the register from the tied chain iff the tied chain
+     is not renamed.  */
+  if (this_head->tied_chain && !this_head->tied_chain->renamed
+      && check_new_reg_p (old_reg, this_head->tied_chain->regno,
+			  this_head, *unavailable))
+    return this_head->tied_chain->regno;
+
   /* If PREFERRED_CLASS is not NO_REGS, we iterate in the first pass
      over registers that belong to PREFERRED_CLASS and try to find the
      best register within the class.  If that failed, we iterate in
@@ -960,6 +972,7 @@  regrename_do_replace (struct du_head *head, int reg)
     return false;
 
   mode = GET_MODE (*head->first->loc);
+  head->renamed = 1;
   head->regno = reg;
   head->nregs = hard_regno_nregs[reg][mode];
   return true;
@@ -1043,7 +1056,34 @@  scan_rtx_reg (rtx_insn *insn, rtx *loc, enum reg_class cl, enum scan_actions act
   if (action == mark_write)
     {
       if (type == OP_OUT)
-	create_new_chain (this_regno, this_nregs, loc, insn, cl);
+	{
+	  du_head_p c;
+	  rtx pat = PATTERN (insn);
+
+	  c = create_new_chain (this_regno, this_nregs, loc, insn, cl);
+
+	  /* We try to tie chains in a move instruction for
+	     a single output.  */
+	  if (recog_data.n_operands == 2
+	      && GET_CODE (pat) == SET
+	      && GET_CODE (SET_DEST (pat)) == REG
+	      && GET_CODE (SET_SRC (pat)) == REG
+	      && terminated_this_insn)
+	    {
+	      gcc_assert
+		(terminated_this_insn->regno == REGNO (recog_data.operand[1]));
+
+	      c->tied_chain = terminated_this_insn;
+	      terminated_this_insn->tied_chain = c;
+
+	      if (dump_file)
+		fprintf (dump_file, "Tying chain %s (%d) with %s (%d)\n",
+			 reg_names[c->regno], c->id,
+			 reg_names[terminated_this_insn->regno],
+			 terminated_this_insn->id);
+	    }
+	}
+
       return;
     }
 
@@ -1151,6 +1191,8 @@  scan_rtx_reg (rtx_insn *insn, rtx *loc, enum reg_class cl, enum scan_actions act
 		SET_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
 	    }
 
+	  if (action == terminate_dead)
+	    terminated_this_insn = *p;
 	  *p = next;
 	  if (dump_file)
 	    fprintf (dump_file,
@@ -1707,6 +1749,8 @@  build_def_use (basic_block bb)
 	      scan_rtx (insn, &XEXP (note, 0), ALL_REGS, mark_read,
 			OP_INOUT);
 
+	  terminated_this_insn = NULL;
+
 	  /* Step 4: Close chains for registers that die here, unless
 	     the register is mentioned in a REG_UNUSED note.  In that
 	     case we keep the chain open until step #7 below to ensure
diff --git a/gcc/regrename.h b/gcc/regrename.h
index bbe156d..9c72181 100644
--- a/gcc/regrename.h
+++ b/gcc/regrename.h
@@ -28,6 +28,8 @@  struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
+  /* The chain that this chain is tied to.  */
+  struct du_head *tied_chain;
   /* Describes the register being tracked.  */
   unsigned regno;
   int nregs;
@@ -45,6 +47,8 @@  struct du_head
      such as the SET_DEST of a CALL_INSN or an asm operand that used
      to be a hard register.  */
   unsigned int cannot_rename:1;
+  /* Nonzero if the chain is renamed.  */
+  unsigned int renamed:1;
 };
 
 typedef struct du_head *du_head_p;