Message ID | VI1PR0801MB2031A4A46EED69A0B9BA06A4FF7E0@VI1PR0801MB2031.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
On Thu, Jan 19, 2017 at 06:05:52PM +0000, Tamar Christina wrote: > Hi James, > > I have corrected the testsuite changes and attached is the new file and changelog. > > Ok for trunk? > > Tamar > > Hi All, > > This patch vectorizes the copysign builtin for Aarch64 > similar to how it is done for Arm. > > AArch64 now generates: > > ... > .L4: > ldr q1, [x6, x3] > add w4, w4, 1 > ldr q0, [x5, x3] > cmp w4, w7 > bif v1.16b, v2.16b, v3.16b > fmul v0.2d, v0.2d, v1.2d > str q0, [x5, x3] > > for the input: > > x * copysign(1.0, y) > > On 481.wrf in Spec2006 on AArch64 this gives us a speedup of 9.1%. > Regtested on aarch64-none-linux-gnu and arm-none-eabi and no regressions. > > Ok for trunk? OK. I think this is now suitably minimal (and safe) for the last day of Stage 3. Thanks, James > gcc/ > 2017-01-19 Tamar Christina <tamar.christina@arm.com> > > * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup): > Change int to HOST_WIDE_INT. > * config/aarch64/aarch64-protos.h > (aarch64_simd_gen_const_vector_dup): Likewise. > * config/aarch64/aarch64-simd.md: Add copysign<mode>3. > > gcc/testsuite/ > 2017-01-19 Tamar Christina <tamar.christina@arm.com> > > * gcc/testsuite/lib/target-supports.exp > (check_effective_target_vect_call_copysignf): Enable for AArch64. > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -362,7 +362,7 @@ rtx aarch64_final_eh_return_addr (void); > rtx aarch64_mask_from_zextract_ops (rtx, rtx); > const char *aarch64_output_move_struct (rtx *operands); > rtx aarch64_return_addr (int, rtx); > -rtx aarch64_simd_gen_const_vector_dup (machine_mode, int); > +rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); > bool aarch64_simd_mem_operand_p (rtx); > rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool); > rtx aarch64_tls_get_addr (void); > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index a12e2268ef9b023112f8d05db0a86957fee83273..b61f79a09462b8cecca7dd2cc4ac0eb4be2dbc79 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -338,6 +338,24 @@ > } > ) > > +(define_expand "copysign<mode>3" > + [(match_operand:VHSDF 0 "register_operand") > + (match_operand:VHSDF 1 "register_operand") > + (match_operand:VHSDF 2 "register_operand")] > + "TARGET_FLOAT && TARGET_SIMD" > +{ > + rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode); > + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; > + > + emit_move_insn (v_bitmask, > + aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode, > + HOST_WIDE_INT_M1U << bits)); > + emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, > + operands[2], operands[1])); > + DONE; > +} > +) > + > (define_insn "*aarch64_mul3_elt<mode>" > [(set (match_operand:VMUL 0 "register_operand" "=w") > (mult:VMUL > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -11244,14 +11244,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) > > /* Return a const_int vector of VAL. */ > rtx > -aarch64_simd_gen_const_vector_dup (machine_mode mode, int val) > +aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val) > { > int nunits = GET_MODE_NUNITS (mode); > rtvec v = rtvec_alloc (nunits); > int i; > > + rtx cache = GEN_INT (val); > + > for (i=0; i < nunits; i++) > - RTVEC_ELT (v, i) = GEN_INT (val); > + RTVEC_ELT (v, i) = cache; > > return gen_rtx_CONST_VECTOR (mode, v); > } > diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp > index b88d13c13f277e8cdb88b5dc8545ffa01408a0fa..12dbf475e31933cff781c2f9e9c1cfbe2ce108bb 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -6158,7 +6158,8 @@ proc check_effective_target_vect_call_copysignf { } { > } else { > set et_vect_call_copysignf_saved($et_index) 0 > if { [istarget i?86-*-*] || [istarget x86_64-*-*] > - || [istarget powerpc*-*-*] } { > + || [istarget powerpc*-*-*] > + || [istarget aarch64*-*-*] } { > set et_vect_call_copysignf_saved($et_index) 1 > } > }
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -362,7 +362,7 @@ rtx aarch64_final_eh_return_addr (void); rtx aarch64_mask_from_zextract_ops (rtx, rtx); const char *aarch64_output_move_struct (rtx *operands); rtx aarch64_return_addr (int, rtx); -rtx aarch64_simd_gen_const_vector_dup (machine_mode, int); +rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); bool aarch64_simd_mem_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool); rtx aarch64_tls_get_addr (void); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index a12e2268ef9b023112f8d05db0a86957fee83273..b61f79a09462b8cecca7dd2cc4ac0eb4be2dbc79 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -338,6 +338,24 @@ } ) +(define_expand "copysign<mode>3" + [(match_operand:VHSDF 0 "register_operand") + (match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + "TARGET_FLOAT && TARGET_SIMD" +{ + rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode); + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; + + emit_move_insn (v_bitmask, + aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode, + HOST_WIDE_INT_M1U << bits)); + emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, + operands[2], operands[1])); + DONE; +} +) + (define_insn "*aarch64_mul3_elt<mode>" [(set (match_operand:VMUL 0 "register_operand" "=w") (mult:VMUL diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -11244,14 +11244,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) /* Return a const_int vector of VAL. */ rtx -aarch64_simd_gen_const_vector_dup (machine_mode mode, int val) +aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val) { int nunits = GET_MODE_NUNITS (mode); rtvec v = rtvec_alloc (nunits); int i; + rtx cache = GEN_INT (val); + for (i=0; i < nunits; i++) - RTVEC_ELT (v, i) = GEN_INT (val); + RTVEC_ELT (v, i) = cache; return gen_rtx_CONST_VECTOR (mode, v); } diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index b88d13c13f277e8cdb88b5dc8545ffa01408a0fa..12dbf475e31933cff781c2f9e9c1cfbe2ce108bb 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6158,7 +6158,8 @@ proc check_effective_target_vect_call_copysignf { } { } else { set et_vect_call_copysignf_saved($et_index) 0 if { [istarget i?86-*-*] || [istarget x86_64-*-*] - || [istarget powerpc*-*-*] } { + || [istarget powerpc*-*-*] + || [istarget aarch64*-*-*] } { set et_vect_call_copysignf_saved($et_index) 1 } }