diff mbox

[AArch64] optimize integer immediate moves with partial masks.

Message ID VI1PR0801MB203123C1CA3BF5826A3DD1BDFFC80@VI1PR0801MB2031.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Tamar Christina June 7, 2017, 11:38 a.m. UTC
Hi All, 

This patch optimizes integer moves for cases where where the move could be done
move efficiently using a smaller mode.

For example:

long long f1(void)
{
  return 0xffff6666;
}

long f2(void)
{
  return 0x11110000ffff6666;
}

generates:


f1:
	mov	w0, 4294927974
	ret

f2:
	mov	w0, 4294927974
	movk	x0, 0x1111, lsl 48
	ret

instead of:

f1:
	mov	x0, 26214
	movk	x0, 0xffff, lsl 16
	ret

f2:
	mov	x0, 26214
	movk	x0, 0xffff, lsl 16
	movk	x0, 0x1111, lsl 48

This works when the low 32 bits are either 0xffffXXXX or 0xXXXXffff (with XXXX non-zero), 
a 32-bit MOVN instruction can be used as if the type was int rather than long (due to zero-extend to 64 bits).

Regression tested on aarch64-none-linux-gnu and no regressions.

OK for trunk?

Thanks,
Tamar


gcc/
2017-06-07  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/aarch64.c
	(aarch64_internal_mov_immediate): Add new special pattern.
	* config/aarch64/aarch64.md (*movdi_aarch64):
	Add reg/32bit const mov case.

gcc/testsuite/
2017-06-07  Tamar Christina  <tamar.christina@arm.com>

	* gcc.target/aarch64/int_mov_immediate_1.c: New.

Comments

James Greenhalgh June 9, 2017, 9:50 a.m. UTC | #1
On Wed, Jun 07, 2017 at 12:38:22PM +0100, Tamar Christina wrote:
> Hi All, 
> 
> This patch optimizes integer moves for cases where where the move could be done
> move efficiently using a smaller mode.
> 
> For example:
> 
> long long f1(void)
> {
>   return 0xffff6666;
> }
> 
> long f2(void)
> {
>   return 0x11110000ffff6666;
> }
> 
> generates:
> 
> 
> f1:
> 	mov	w0, 4294927974
> 	ret
> 
> f2:
> 	mov	w0, 4294927974
> 	movk	x0, 0x1111, lsl 48
> 	ret
> 
> instead of:
> 
> f1:
> 	mov	x0, 26214
> 	movk	x0, 0xffff, lsl 16
> 	ret
> 
> f2:
> 	mov	x0, 26214
> 	movk	x0, 0xffff, lsl 16
> 	movk	x0, 0x1111, lsl 48
> 
> This works when the low 32 bits are either 0xffffXXXX or 0xXXXXffff (with XXXX non-zero), 
> a 32-bit MOVN instruction can be used as if the type was int rather than long (due to zero-extend to 64 bits).
> 
> Regression tested on aarch64-none-linux-gnu and no regressions.
> 
> OK for trunk?

I have a couple of comments below asking for comments on the new code
you're adding, and on an unrealted whitespace change but otherwise this
patch is OK.

> gcc/
> 2017-06-07  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* config/aarch64/aarch64.c
> 	(aarch64_internal_mov_immediate): Add new special pattern.
> 	* config/aarch64/aarch64.md (*movdi_aarch64):
> 	Add reg/32bit const mov case.
> 
> gcc/testsuite/
> 2017-06-07  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* gcc.target/aarch64/int_mov_immediate_1.c: New.

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a99a13460c2314ca9b40f82a466b6d492c49db97..e91586fa03c64b22c4c8efdf7b98d48c0086126d 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1794,6 +1794,27 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>        return 1;
>      }

A comment similar to that in your cover letter describing what patterrn
you are catching would be useful for future reference.

> +  val2 = val & 0xffffffff;
> +  if (mode == DImode
> +      && aarch64_move_imm (val2, SImode)
> +      && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
> +    {
> +      if (generate)
> +	emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> +
> +      /* Check if we have to emit a second instruction.  */

Likewise here, for which conditions would require a second instruction.

> +      if (val == val2)
> +	return 1;
> +
> +      i = (val >> 48) ? 48 : 32;
> +
> +      if (generate)
> +	 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> +				    GEN_INT ((val >> i) & 0xffff)));
> +
> +      return 2;
> +    }
> +
>    if ((val >> 32) == 0 || mode == SImode)
>      {
>        if (generate)
> @@ -1810,7 +1831,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
>      }
>  
>    /* Remaining cases are all for DImode.  */
> -

Unrelated change?

>    mask = 0xffff;
>    zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
>      ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);

Thanks,
James
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a99a13460c2314ca9b40f82a466b6d492c49db97..e91586fa03c64b22c4c8efdf7b98d48c0086126d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1794,6 +1794,27 @@  aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
       return 1;
     }
 
+  val2 = val & 0xffffffff;
+  if (mode == DImode
+      && aarch64_move_imm (val2, SImode)
+      && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
+    {
+      if (generate)
+	emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+
+      /* Check if we have to emit a second instruction.  */
+      if (val == val2)
+	return 1;
+
+      i = (val >> 48) ? 48 : 32;
+
+      if (generate)
+	 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+				    GEN_INT ((val >> i) & 0xffff)));
+
+      return 2;
+    }
+
   if ((val >> 32) == 0 || mode == SImode)
     {
       if (generate)
@@ -1810,7 +1831,6 @@  aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
     }
 
   /* Remaining cases are all for DImode.  */
-
   mask = 0xffff;
   zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
     ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index fdba2d0adde2ef9e8519f6321f7456517c5e916a..5fcf809ae47552395667647e7299dcfe4ebdf7dd 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1060,8 +1060,8 @@ 
 )
 
 (define_insn_and_split "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w, m, m,r,r  ,*w, r,*w,w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w, m, m,r,r  ,*w, r,*w,w")
+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   "@
@@ -1069,6 +1069,7 @@ 
    mov\\t%0, %x1
    mov\\t%x0, %1
    mov\\t%x0, %1
+   mov\\t%w0, %1
    #
    ldr\\t%x0, %1
    ldr\\t%d0, %1
@@ -1087,10 +1088,10 @@ 
        aarch64_expand_mov_immediate (operands[0], operands[1]);
        DONE;
     }"
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
-		    adr,adr,f_mcr,f_mrc,fmov,neon_move")
-   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm, mov_imm,mov_imm,load1,\
+		    load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+   (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
 )
 
 (define_insn "insv_imm<mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c b/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..6ac9065037f881c96ca81661a7d717133c6cc83d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/int_mov_immediate_1.c
@@ -0,0 +1,59 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3" } */
+
+long long f1(void)
+{
+  return 0xffff6666;
+}
+
+int f3(void)
+{
+  return 0xffff6666;
+}
+
+
+long f2(void)
+{
+  return 0x11110000ffff6666;
+}
+
+long f4(void)
+{
+  return 0x11110001ffff6666;
+}
+
+long f5(void)
+{
+  return 0x111100001ff6666;
+}
+
+long f6(void)
+{
+  return 0x00001111ffff6666;
+}
+
+long f7(void)
+{
+  return 0x000011116666ffff;
+}
+
+long f8(void)
+{
+  return 0x0f0011116666ffff;
+}
+
+/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, -39322"      1 } } */
+/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 4294927974"  3 } } */
+/* { dg-final { scan-assembler-times "mov\tw\[0-9\]+, 1718026239"  1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -2576941057" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -39322"      1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 26214"       1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0xf00, lsl 48" 1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 48" 2 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1000, lsl 32" 1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1111, lsl 32" 3 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x111, lsl 48"  1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1ff, lsl 16"  1 } } */
+/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x1, lsl 32"    1 } } */
+