diff mbox

[AArch64] optimize float immediate moves (2 /4) - HF/DF/SF mode.

Message ID VI1PR0801MB20316A7D3165E0EDD14CD43DFFCD0@VI1PR0801MB2031.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Tamar Christina June 12, 2017, 7:31 a.m. UTC
Hi All,

Updating this patch with the feedback I've received from patch 1/4.

Thanks,
Tamar

Comments

Richard Sandiford June 14, 2017, 10:06 a.m. UTC | #1
In addition to James's comments:

Tamar Christina <Tamar.Christina@arm.com> writes:
> +  [(const_int 0)]
> +  "{

"{ ... }" isn't necessary, we can just use { ... }

> +    unsigned HOST_WIDE_INT ival;
> +    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> +      FAIL;
> +
> +    rtx tmp = gen_reg_rtx (SImode);
> +    aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
> +    tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);

This looks wrong for big-endian, and...

> +    emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));

...either it should be OK to go directly from tmp to the HFmode lowpart,
or we should move the HImode temporary into a fresh REG.  Current
validate_subreg seems to suggest that we need the latter.

Isn't it possible to use a HImode move immediate instead of an SImode one?

Thanks,
Richard
Tamar Christina June 15, 2017, 1:25 p.m. UTC | #2
Hi Richard,

> > +    rtx tmp = gen_reg_rtx (SImode);
> > +    aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
> > +    tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> 
> This looks wrong for big-endian, and...
> 
> > +    emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> 
> ...either it should be OK to go directly from tmp to the HFmode lowpart, or
> we should move the HImode temporary into a fresh REG.  Current
> validate_subreg seems to suggest that we need the latter.
> 
> Isn't it possible to use a HImode move immediate instead of an SImode one?

We don't really have a movehi pattern, currently a movhi would end up in the general
mov<mode>_aarch64 pattern which would then use end up using a w register as well.

I'll take a look at big-endian. 

> 
> Thanks,
> Richard
Tamar Christina June 15, 2017, 2:28 p.m. UTC | #3
Hi Richard,
 
> > > +    rtx tmp = gen_reg_rtx (SImode);
> > > +    aarch64_expand_mov_immediate (tmp, gen_int_mode (ival,
> SImode));
> > > +    tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> >
> > This looks wrong for big-endian, and...
> >
> > > +    emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> >
> > ...either it should be OK to go directly from tmp to the HFmode
> > lowpart, or we should move the HImode temporary into a fresh REG.
> > Current validate_subreg seems to suggest that we need the latter.
> >
> > Isn't it possible to use a HImode move immediate instead of an SImode
> one?
> 
> We don't really have a movehi pattern, currently a movhi would end up in
> the general
> mov<mode>_aarch64 pattern which would then use end up using a w
> register as well.

Also aarch64_expand_mov_immediate doesn't allow HImode moves, only SI and DI.

> 
> I'll take a look at big-endian.
> 
> >
> > Thanks,
> > Richard
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..62ad76731d2c5b4b3d02def8c2b1457c713c2d8e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1167,66 +1167,120 @@ 
   }
 )
 
-(define_insn "*movhf_aarch64"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w,m,r,m ,r")
-	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,m,w,m,rY,r"))]
+(define_insn_and_split "*movhf_aarch64"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r")
+	(match_operand:HF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
   "TARGET_FLOAT && (register_operand (operands[0], HFmode)
-    || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+    || aarch64_reg_or_fp_float (operands[1], HFmode))"
   "@
    movi\\t%0.4h, #0
-   mov\\t%0.h[0], %w1
+   fmov\\t%s0, %w1
    umov\\t%w0, %1.h[0]
    mov\\t%0.h[0], %1.h[0]
+   fmov\\t%s0, %1
+   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
    ldr\\t%h0, %1
    str\\t%h1, %0
    ldrh\\t%w0, %1
    strh\\t%w1, %0
    mov\\t%w0, %w1"
-  [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
-                     f_loads,f_stores,load1,store1,mov_reg")
-   (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
+  "&& can_create_pseudo_p ()
+   && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
+   && !aarch64_float_const_representable_p (operands[1])
+   &&  aarch64_float_const_rtx_p (operands[1])"
+  [(const_int 0)]
+  "{
+    unsigned HOST_WIDE_INT ival;
+    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+      FAIL;
+
+    rtx tmp = gen_reg_rtx (SImode);
+    aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
+    tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
+    emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
+    DONE;
+  }"
+  [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+		     neon_move,f_loads,f_stores,load1,store1,mov_reg")
+   (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
 )
 
-(define_insn "*movsf_aarch64"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
-	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+(define_insn_and_split "*movsf_aarch64"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
+	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
   "TARGET_FLOAT && (register_operand (operands[0], SFmode)
-    || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+    || aarch64_reg_or_fp_float (operands[1], SFmode))"
   "@
    movi\\t%0.2s, #0
    fmov\\t%s0, %w1
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1
    fmov\\t%s0, %1
+   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
    ldr\\t%s0, %1
    str\\t%s1, %0
    ldr\\t%w0, %1
    str\\t%w1, %0
-   mov\\t%w0, %w1"
-  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
-                     f_loads,f_stores,load1,store1,mov_reg")
-   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+   mov\\t%w0, %w1
+   mov\\t%w0, %1"
+  "&& can_create_pseudo_p ()
+   && !aarch64_can_const_movi_rtx_p (operands[1], SFmode)
+   && !aarch64_float_const_representable_p (operands[1])
+   &&  aarch64_float_const_rtx_p (operands[1])"
+  [(const_int 0)]
+  "{
+    unsigned HOST_WIDE_INT ival;
+    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+      FAIL;
+
+    rtx tmp = gen_reg_rtx (SImode);
+    aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, SImode));
+    emit_move_insn (operands[0], gen_lowpart (SFmode, tmp));
+    DONE;
+  }"
+  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
+		     f_loads,f_stores,load1,store1,mov_reg,\
+		     fconsts")
+   (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
 )
 
-(define_insn "*movdf_aarch64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w,m,r,m ,r")
-	(match_operand:DF 1 "general_operand"      "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+(define_insn_and_split "*movdf_aarch64"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
+	(match_operand:DF 1 "general_operand"      "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
   "TARGET_FLOAT && (register_operand (operands[0], DFmode)
-    || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+    || aarch64_reg_or_fp_float (operands[1], DFmode))"
   "@
    movi\\t%d0, #0
    fmov\\t%d0, %x1
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    fmov\\t%d0, %1
+   * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
    ldr\\t%d0, %1
    str\\t%d1, %0
    ldr\\t%x0, %1
    str\\t%x1, %0
-   mov\\t%x0, %x1"
-  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
-                     f_loadd,f_stored,load1,store1,mov_reg")
-   (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+   mov\\t%x0, %x1
+   mov\\t%x0, %1"
+  "&& can_create_pseudo_p ()
+   && !aarch64_can_const_movi_rtx_p (operands[1], DFmode)
+   && !aarch64_float_const_representable_p (operands[1])
+   &&  aarch64_float_const_rtx_p (operands[1])"
+  [(const_int 0)]
+  "{
+    unsigned HOST_WIDE_INT ival;
+    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+      FAIL;
+
+    rtx tmp = gen_reg_rtx (DImode);
+    aarch64_expand_mov_immediate (tmp, gen_int_mode (ival, DImode));
+    emit_move_insn (operands[0], gen_lowpart (DFmode, tmp));
+    DONE;
+  }"
+  [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
+		     f_loadd,f_stored,load1,store1,mov_reg,\
+		     fconstd")
+   (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
 )
 
 (define_insn "*movtf_aarch64"