diff mbox

[AArch64] Improve TI mode address offsets

Message ID AM5PR0802MB261001A3882D2BA9904AF3BC83BB0@AM5PR0802MB2610.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra Nov. 11, 2016, 1:14 p.m. UTC
Richard Earnshaw wrote:

> Has this patch been truncated?  The last line above looks to be part-way
> through a hunk.

Oops sorry, it seems the last few lines are missing. Here is the full version:

Comments

James Greenhalgh Dec. 6, 2016, 5 p.m. UTC | #1
On Fri, Nov 11, 2016 at 01:14:15PM +0000, Wilco Dijkstra wrote:
> Richard Earnshaw wrote:
> 
> > Has this patch been truncated?  The last line above looks to be part-way
> > through a hunk.
> 
> Oops sorry, it seems the last few lines are missing. Here is the full version:

OK.

Thanks,
James

> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
>  	     instruction memory accesses.  */
>  	  if (mode == TImode || mode == TFmode)
>  	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> -		    && offset_9bit_signed_unscaled_p (mode, offset));
> +		    && (offset_9bit_signed_unscaled_p (mode, offset)
> +			|| offset_12bit_unsigned_scaled_p (mode, offset)));
>  
>  	  /* A 7bit offset check because OImode will emit a ldp/stp
>  	     instruction (only big endian will get here).
> @@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
>  /* Split an out-of-range address displacement into a base and offset.
>     Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
>     to increase opportunities for sharing the base address of different sizes.
> -   For TI/TFmode and unaligned accesses use a 256-byte range.  */
> +   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
>  static bool
>  aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
>  {
> -  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> +  HOST_WIDE_INT offset = INTVAL (*disp);
> +  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
>  
> -  if (mode == TImode || mode == TFmode ||
> -      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> -    mask = 0xff;
> +  if (mode == TImode || mode == TFmode
> +      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
> +    base = (offset + 0x100) & ~0x1ff;
>  
> -  *off = GEN_INT (INTVAL (*disp) & ~mask);
> -  *disp = GEN_INT (INTVAL (*disp) & mask);
> +  *off = GEN_INT (base);
> +  *disp = GEN_INT (offset - base);
>    return true;
>  }
>  
> @@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
>  	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
>  	}
>  
> -      /* Does it look like we'll need a load/store-pair operation?  */
> +      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
>        HOST_WIDE_INT base_offset;
> -      if (GET_MODE_SIZE (mode) > 16
> -	  || mode == TImode)
> -	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
> -		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
> +      if (GET_MODE_SIZE (mode) > 16)
> +	base_offset = (offset + 0x400) & ~0x7f0;
>        /* For offsets aren't a multiple of the access size, the limit is
>  	 -256...255.  */
>        else if (offset & (GET_MODE_SIZE (mode) - 1))
> @@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
>        /* Small negative offsets are supported.  */
>        else if (IN_RANGE (offset, -256, 0))
>  	base_offset = 0;
> +      else if (mode == TImode || mode == TFmode)
> +	base_offset = (offset + 0x100) & ~0x1ff;
>        /* Use 12-bit offset by access size.  */
>        else
>  	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1094,9 +1094,9 @@
>  
>  (define_insn "*movti_aarch64"
>    [(set (match_operand:TI 0
> -	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
> +	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
>  	(match_operand:TI 1
> -	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
> +	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
>    "(register_operand (operands[0], TImode)
>      || aarch64_reg_or_zero (operands[1], TImode))"
>    "@
> @@ -1211,9 +1211,9 @@
>  
>  (define_insn "*movtf_aarch64"
>    [(set (match_operand:TF 0
> -	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
> +	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
>  	(match_operand:TF 1
> -	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
> +	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
>    "TARGET_FLOAT && (register_operand (operands[0], TFmode)
>      || aarch64_reg_or_fp_zero (operands[1], TFmode))"
>    "@
>
James Greenhalgh Dec. 8, 2016, 10:03 a.m. UTC | #2
On Tue, Dec 06, 2016 at 05:00:25PM +0000, James Greenhalgh wrote:
> On Fri, Nov 11, 2016 at 01:14:15PM +0000, Wilco Dijkstra wrote:
> > Richard Earnshaw wrote:
> > 
> > > Has this patch been truncated?  The last line above looks to be part-way
> > > through a hunk.
> > 
> > Oops sorry, it seems the last few lines are missing. Here is the full version:
> 
> OK.

This patch has caused around 250 new failures when using the tiny memory
model or when using -mfix-cortex-a53-843419 (causing a bootstrap failure
with --enable-fix-cortex-a53-843419 )

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78733 for more details.

Thanks,
James

> > 
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
> >  	     instruction memory accesses.  */
> >  	  if (mode == TImode || mode == TFmode)
> >  	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> > -		    && offset_9bit_signed_unscaled_p (mode, offset));
> > +		    && (offset_9bit_signed_unscaled_p (mode, offset)
> > +			|| offset_12bit_unsigned_scaled_p (mode, offset)));
> >  
> >  	  /* A 7bit offset check because OImode will emit a ldp/stp
> >  	     instruction (only big endian will get here).
> > @@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
> >  /* Split an out-of-range address displacement into a base and offset.
> >     Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
> >     to increase opportunities for sharing the base address of different sizes.
> > -   For TI/TFmode and unaligned accesses use a 256-byte range.  */
> > +   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
> >  static bool
> >  aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
> >  {
> > -  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> > +  HOST_WIDE_INT offset = INTVAL (*disp);
> > +  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
> >  
> > -  if (mode == TImode || mode == TFmode ||
> > -      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> > -    mask = 0xff;
> > +  if (mode == TImode || mode == TFmode
> > +      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
> > +    base = (offset + 0x100) & ~0x1ff;
> >  
> > -  *off = GEN_INT (INTVAL (*disp) & ~mask);
> > -  *disp = GEN_INT (INTVAL (*disp) & mask);
> > +  *off = GEN_INT (base);
> > +  *disp = GEN_INT (offset - base);
> >    return true;
> >  }
> >  
> > @@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
> >  	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
> >  	}
> >  
> > -      /* Does it look like we'll need a load/store-pair operation?  */
> > +      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
> >        HOST_WIDE_INT base_offset;
> > -      if (GET_MODE_SIZE (mode) > 16
> > -	  || mode == TImode)
> > -	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
> > -		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
> > +      if (GET_MODE_SIZE (mode) > 16)
> > +	base_offset = (offset + 0x400) & ~0x7f0;
> >        /* For offsets aren't a multiple of the access size, the limit is
> >  	 -256...255.  */
> >        else if (offset & (GET_MODE_SIZE (mode) - 1))
> > @@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
> >        /* Small negative offsets are supported.  */
> >        else if (IN_RANGE (offset, -256, 0))
> >  	base_offset = 0;
> > +      else if (mode == TImode || mode == TFmode)
> > +	base_offset = (offset + 0x100) & ~0x1ff;
> >        /* Use 12-bit offset by access size.  */
> >        else
> >  	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -1094,9 +1094,9 @@
> >  
> >  (define_insn "*movti_aarch64"
> >    [(set (match_operand:TI 0
> > -	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
> > +	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
> >  	(match_operand:TI 1
> > -	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
> > +	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
> >    "(register_operand (operands[0], TImode)
> >      || aarch64_reg_or_zero (operands[1], TImode))"
> >    "@
> > @@ -1211,9 +1211,9 @@
> >  
> >  (define_insn "*movtf_aarch64"
> >    [(set (match_operand:TF 0
> > -	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
> > +	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
> >  	(match_operand:TF 1
> > -	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
> > +	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
> >    "TARGET_FLOAT && (register_operand (operands[0], TFmode)
> >      || aarch64_reg_or_fp_zero (operands[1], TFmode))"
> >    "@
> >
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4066,7 +4066,8 @@  aarch64_classify_address (struct aarch64_address_info *info,
 	     instruction memory accesses.  */
 	  if (mode == TImode || mode == TFmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
-		    && offset_9bit_signed_unscaled_p (mode, offset));
+		    && (offset_9bit_signed_unscaled_p (mode, offset)
+			|| offset_12bit_unsigned_scaled_p (mode, offset)));
 
 	  /* A 7bit offset check because OImode will emit a ldp/stp
 	     instruction (only big endian will get here).
@@ -4270,18 +4271,19 @@  aarch64_legitimate_address_p (machine_mode mode, rtx x,
 /* Split an out-of-range address displacement into a base and offset.
    Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
    to increase opportunities for sharing the base address of different sizes.
-   For TI/TFmode and unaligned accesses use a 256-byte range.  */
+   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
 static bool
 aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
 {
-  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+  HOST_WIDE_INT offset = INTVAL (*disp);
+  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
 
-  if (mode == TImode || mode == TFmode ||
-      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
-    mask = 0xff;
+  if (mode == TImode || mode == TFmode
+      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
+    base = (offset + 0x100) & ~0x1ff;
 
-  *off = GEN_INT (INTVAL (*disp) & ~mask);
-  *disp = GEN_INT (INTVAL (*disp) & mask);
+  *off = GEN_INT (base);
+  *disp = GEN_INT (offset - base);
   return true;
 }
 
@@ -5148,12 +5150,10 @@  aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
 	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
 	}
 
-      /* Does it look like we'll need a load/store-pair operation?  */
+      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
       HOST_WIDE_INT base_offset;
-      if (GET_MODE_SIZE (mode) > 16
-	  || mode == TImode)
-	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
-		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
+      if (GET_MODE_SIZE (mode) > 16)
+	base_offset = (offset + 0x400) & ~0x7f0;
       /* For offsets aren't a multiple of the access size, the limit is
 	 -256...255.  */
       else if (offset & (GET_MODE_SIZE (mode) - 1))
@@ -5167,6 +5167,8 @@  aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
       /* Small negative offsets are supported.  */
       else if (IN_RANGE (offset, -256, 0))
 	base_offset = 0;
+      else if (mode == TImode || mode == TFmode)
+	base_offset = (offset + 0x100) & ~0x1ff;
       /* Use 12-bit offset by access size.  */
       else
 	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1094,9 +1094,9 @@ 
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
+	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
 	(match_operand:TI 1
-	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
+	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
@@ -1211,9 +1211,9 @@ 
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
+	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
 	(match_operand:TF 1
-	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
+	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@