Message ID | VI1PR0801MB203117D353E8A26AA9909037FFC80@VI1PR0801MB2031.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
On Wed, Jun 07, 2017 at 12:38:37PM +0100, Tamar Christina wrote: > Hi All, > > > This patch adds support for creating floating point constants > using mov immediate instructions. The movi SIMD instruction can > be used for HFmode and SFmode constants, eg. for -0.0f we generate: > > movi v0.2s, 0x80, lsl 24 > > More complex constants can be generated using an integer MOV or > MOV+MOVK: > > mov w0, 48128 > movk w0, 0x47f0, lsl 16 > fmov s0, w0 > > We allow up to 3 instructions as this allows all HF, SF and most DF > constants to be generated without a literal load, and is overall best > for codesize. > > > Regression tested on aarch64-none-linux-gnu and no regressions. > > OK for trunk? > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..7f107672882b13809be01355ffafbc2807cc5adb 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1167,66 +1167,120 @@ > } > ) > > -(define_insn "*movhf_aarch64" > - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r") > - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))] > +(define_insn_and_split "*movhf_aarch64" > + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r") > + (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))] > "TARGET_FLOAT && (register_operand (operands[0], HFmode) > - || aarch64_reg_or_fp_zero (operands[1], HFmode))" > + || aarch64_reg_or_fp_float (operands[1], HFmode))" > "@ > movi\\t%0.4h, #0 > - mov\\t%0.h[0], %w1 > + fmov\\t%s0, %w1 Should this not be %h0? > umov\\t%w0, %1.h[0] > mov\\t%0.h[0], %1.h[0] > + fmov\\t%s0, %1 Likewise, and much more important for correctness as it changes the way the bit pattern ends up in the register (see table C2-1 in release B.a of the ARM Architecture Reference Manual for ARMv8-A), here. > + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); > ldr\\t%h0, %1 > str\\t%h1, %0 > ldrh\\t%w0, %1 > strh\\t%w1, %0 > mov\\t%w0, %w1" > - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\ > - f_loads,f_stores,load1,store1,mov_reg") > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] > + "&& can_create_pseudo_p () > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode) > + && !aarch64_float_const_representable_p (operands[1]) > + && aarch64_float_const_rtx_p (operands[1])" > + [(const_int 0)] > + "{ > + unsigned HOST_WIDE_INT ival; > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) > + FAIL; > + > + rtx tmp = gen_reg_rtx (SImode); > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival)); > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0); > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp)); > + DONE; > + }" > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ > + neon_move,f_loads,f_stores,load1,store1,mov_reg") > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")] > ) Thanks, James
> > movi\\t%0.4h, #0 > > - mov\\t%0.h[0], %w1 > > + fmov\\t%s0, %w1 > > Should this not be %h0? The problem is that H registers are only available in ARMv8.2+, I'm not sure what to do about ARMv8.1 given your other feedback Pointing out that the bit patterns between how it's stored in s vs h registers differ. > > > umov\\t%w0, %1.h[0] > > mov\\t%0.h[0], %1.h[0] > > + fmov\\t%s0, %1 > > Likewise, and much more important for correctness as it changes the way the > bit pattern ends up in the register (see table C2-1 in release B.a of the ARM > Architecture Reference Manual for ARMv8-A), here. > > > + * return aarch64_output_scalar_simd_mov_immediate (operands[1], > > + SImode); > > ldr\\t%h0, %1 > > str\\t%h1, %0 > > ldrh\\t%w0, %1 > > strh\\t%w1, %0 > > mov\\t%w0, %w1" > > - [(set_attr "type" > "neon_move,neon_from_gp,neon_to_gp,neon_move,\ > > - f_loads,f_stores,load1,store1,mov_reg") > > - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] > > + "&& can_create_pseudo_p () > > + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode) > > + && !aarch64_float_const_representable_p (operands[1]) > > + && aarch64_float_const_rtx_p (operands[1])" > > + [(const_int 0)] > > + "{ > > + unsigned HOST_WIDE_INT ival; > > + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) > > + FAIL; > > + > > + rtx tmp = gen_reg_rtx (SImode); > > + aarch64_expand_mov_immediate (tmp, GEN_INT (ival)); > > + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0); > > + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp)); > > + DONE; > > + }" > > + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, > \ > > + neon_move,f_loads,f_stores,load1,store1,mov_reg") > > + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")] > > ) > > Thanks, > James
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..7f107672882b13809be01355ffafbc2807cc5adb 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1167,66 +1167,120 @@ } ) -(define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))] +(define_insn_and_split "*movhf_aarch64" + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))] "TARGET_FLOAT && (register_operand (operands[0], HFmode) - || aarch64_reg_or_fp_zero (operands[1], HFmode))" + || aarch64_reg_or_fp_float (operands[1], HFmode))" "@ movi\\t%0.4h, #0 - mov\\t%0.h[0], %w1 + fmov\\t%s0, %w1 umov\\t%w0, %1.h[0] mov\\t%0.h[0], %1.h[0] + fmov\\t%s0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%h0, %1 str\\t%h1, %0 ldrh\\t%w0, %1 strh\\t%w1, %0 mov\\t%w0, %w1" - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] + "&& can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + "{ + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (SImode); + aarch64_expand_mov_immediate (tmp, GEN_INT (ival)); + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0); + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp)); + DONE; + }" + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ + neon_move,f_loads,f_stores,load1,store1,mov_reg") + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")] ) -(define_insn "*movsf_aarch64" - [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] +(define_insn_and_split "*movsf_aarch64" + [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] "TARGET_FLOAT && (register_operand (operands[0], SFmode) - || aarch64_reg_or_fp_zero (operands[1], SFmode))" + || aarch64_reg_or_fp_float (operands[1], SFmode))" "@ movi\\t%0.2s, #0 fmov\\t%s0, %w1 fmov\\t%w0, %s1 fmov\\t%s0, %s1 fmov\\t%s0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%s0, %1 str\\t%s1, %0 ldr\\t%w0, %1 str\\t%w1, %0 - mov\\t%w0, %w1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%w0, %w1 + mov\\t%w0, %1" + "&& can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], SFmode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + "{ + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (SImode); + aarch64_expand_mov_immediate (tmp, GEN_INT (ival)); + emit_move_insn (operands[0], gen_lowpart (SFmode, tmp)); + DONE; + }" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\ + f_loads,f_stores,load1,store1,mov_reg,\ + fconsts") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] ) -(define_insn "*movdf_aarch64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] +(define_insn_and_split "*movdf_aarch64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] "TARGET_FLOAT && (register_operand (operands[0], DFmode) - || aarch64_reg_or_fp_zero (operands[1], DFmode))" + || aarch64_reg_or_fp_float (operands[1], DFmode))" "@ movi\\t%d0, #0 fmov\\t%d0, %x1 fmov\\t%x0, %d1 fmov\\t%d0, %d1 fmov\\t%d0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); ldr\\t%d0, %1 str\\t%d1, %0 ldr\\t%x0, %1 str\\t%x1, %0 - mov\\t%x0, %x1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\ - f_loadd,f_stored,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%x0, %x1 + mov\\t%x0, %1" + "&& can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], DFmode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + "{ + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (DImode); + aarch64_expand_mov_immediate (tmp, GEN_INT (ival)); + emit_move_insn (operands[0], gen_lowpart (DFmode, tmp)); + DONE; + }" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ + f_loadd,f_stored,load1,store1,mov_reg,\ + fconstd") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] ) (define_insn "*movtf_aarch64"