diff mbox

[Aarch64] Add vectorize patten for copysign.

Message ID VI1PR0801MB2031A4A46EED69A0B9BA06A4FF7E0@VI1PR0801MB2031.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Tamar Christina Jan. 19, 2017, 6:05 p.m. UTC
Hi James,

I have corrected the testsuite changes and attached is the new file and changelog.

Ok for trunk?

Tamar

Hi All,

This patch vectorizes the copysign builtin for Aarch64
similar to how it is done for Arm.

AArch64 now generates:

...
.L4:
        ldr     q1, [x6, x3]
        add     w4, w4, 1
        ldr     q0, [x5, x3]
        cmp     w4, w7
        bif     v1.16b, v2.16b, v3.16b
        fmul    v0.2d, v0.2d, v1.2d
        str     q0, [x5, x3]

for the input:

     x * copysign(1.0, y)

On 481.wrf in Spec2006 on AArch64 this gives us a speedup of 9.1%.
Regtested on  aarch64-none-linux-gnu and arm-none-eabi and no regressions.

Ok for trunk?

gcc/
2017-01-19  Tamar Christina  <tamar.christina@arm.com>

        * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup):
        Change int to HOST_WIDE_INT.
        * config/aarch64/aarch64-protos.h
        (aarch64_simd_gen_const_vector_dup): Likewise.
        * config/aarch64/aarch64-simd.md: Add copysign<mode>3.

gcc/testsuite/
2017-01-19  Tamar Christina  <tamar.christina@arm.com>

        * gcc/testsuite/lib/target-supports.exp
        (check_effective_target_vect_call_copysignf): Enable for AArch64.

Comments

James Greenhalgh Jan. 19, 2017, 6:17 p.m. UTC | #1
On Thu, Jan 19, 2017 at 06:05:52PM +0000, Tamar Christina wrote:
> Hi James,
> 
> I have corrected the testsuite changes and attached is the new file and changelog.
> 
> Ok for trunk?
> 
> Tamar
> 
> Hi All,
> 
> This patch vectorizes the copysign builtin for Aarch64
> similar to how it is done for Arm.
> 
> AArch64 now generates:
> 
> ...
> .L4:
>         ldr     q1, [x6, x3]
>         add     w4, w4, 1
>         ldr     q0, [x5, x3]
>         cmp     w4, w7
>         bif     v1.16b, v2.16b, v3.16b
>         fmul    v0.2d, v0.2d, v1.2d
>         str     q0, [x5, x3]
> 
> for the input:
> 
>      x * copysign(1.0, y)
> 
> On 481.wrf in Spec2006 on AArch64 this gives us a speedup of 9.1%.
> Regtested on  aarch64-none-linux-gnu and arm-none-eabi and no regressions.
> 
> Ok for trunk?

OK. I think this is now suitably minimal (and safe) for the last
day of Stage 3.

Thanks,
James

> gcc/
> 2017-01-19  Tamar Christina  <tamar.christina@arm.com>
> 
>         * config/aarch64/aarch64.c (aarch64_simd_gen_const_vector_dup):
>         Change int to HOST_WIDE_INT.
>         * config/aarch64/aarch64-protos.h
>         (aarch64_simd_gen_const_vector_dup): Likewise.
>         * config/aarch64/aarch64-simd.md: Add copysign<mode>3.
> 
> gcc/testsuite/
> 2017-01-19  Tamar Christina  <tamar.christina@arm.com>
> 
>         * gcc/testsuite/lib/target-supports.exp
>         (check_effective_target_vect_call_copysignf): Enable for AArch64.

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -362,7 +362,7 @@ rtx aarch64_final_eh_return_addr (void);
>  rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>  const char *aarch64_output_move_struct (rtx *operands);
>  rtx aarch64_return_addr (int, rtx);
> -rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
> +rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
>  bool aarch64_simd_mem_operand_p (rtx);
>  rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool);
>  rtx aarch64_tls_get_addr (void);
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index a12e2268ef9b023112f8d05db0a86957fee83273..b61f79a09462b8cecca7dd2cc4ac0eb4be2dbc79 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -338,6 +338,24 @@
>    }
>  )
>  
> +(define_expand "copysign<mode>3"
> +  [(match_operand:VHSDF 0 "register_operand")
> +   (match_operand:VHSDF 1 "register_operand")
> +   (match_operand:VHSDF 2 "register_operand")]
> +  "TARGET_FLOAT && TARGET_SIMD"
> +{
> +  rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
> +  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
> +
> +  emit_move_insn (v_bitmask,
> +		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
> +						     HOST_WIDE_INT_M1U << bits));
> +  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
> +					 operands[2], operands[1]));
> +  DONE;
> +}
> +)
> +
>  (define_insn "*aarch64_mul3_elt<mode>"
>   [(set (match_operand:VMUL 0 "register_operand" "=w")
>      (mult:VMUL
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11244,14 +11244,16 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
>  
>  /* Return a const_int vector of VAL.  */
>  rtx
> -aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
> +aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
>  {
>    int nunits = GET_MODE_NUNITS (mode);
>    rtvec v = rtvec_alloc (nunits);
>    int i;
>  
> +  rtx cache = GEN_INT (val);
> +
>    for (i=0; i < nunits; i++)
> -    RTVEC_ELT (v, i) = GEN_INT (val);
> +    RTVEC_ELT (v, i) = cache;
>  
>    return gen_rtx_CONST_VECTOR (mode, v);
>  }
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index b88d13c13f277e8cdb88b5dc8545ffa01408a0fa..12dbf475e31933cff781c2f9e9c1cfbe2ce108bb 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -6158,7 +6158,8 @@ proc check_effective_target_vect_call_copysignf { } {
>      } else {
>  	set et_vect_call_copysignf_saved($et_index) 0
>  	if { [istarget i?86-*-*] || [istarget x86_64-*-*]
> -	     || [istarget powerpc*-*-*] } {
> +	     || [istarget powerpc*-*-*]
> +	     || [istarget aarch64*-*-*] } {
>  	   set et_vect_call_copysignf_saved($et_index) 1
>  	}
>      }
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 29a3bd71151aa4fb7c6728f0fb52e2f3f233f41d..e75ba29f93e9e749791803ca3fa8d716ca261064 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -362,7 +362,7 @@  rtx aarch64_final_eh_return_addr (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
-rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
+rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
 bool aarch64_simd_mem_operand_p (rtx);
 rtx aarch64_simd_vect_par_cnst_half (machine_mode, bool);
 rtx aarch64_tls_get_addr (void);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a12e2268ef9b023112f8d05db0a86957fee83273..b61f79a09462b8cecca7dd2cc4ac0eb4be2dbc79 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -338,6 +338,24 @@ 
   }
 )
 
+(define_expand "copysign<mode>3"
+  [(match_operand:VHSDF 0 "register_operand")
+   (match_operand:VHSDF 1 "register_operand")
+   (match_operand:VHSDF 2 "register_operand")]
+  "TARGET_FLOAT && TARGET_SIMD"
+{
+  rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
+  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+  emit_move_insn (v_bitmask,
+		  aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
+						     HOST_WIDE_INT_M1U << bits));
+  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
+					 operands[2], operands[1]));
+  DONE;
+}
+)
+
 (define_insn "*aarch64_mul3_elt<mode>"
  [(set (match_operand:VMUL 0 "register_operand" "=w")
     (mult:VMUL
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0cf7d12186af3e05ba8742af5a03425f61f51754..1a69605db5d2a4a0efb8c9f97a019de9dded40eb 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11244,14 +11244,16 @@  aarch64_mov_operand_p (rtx x, machine_mode mode)
 
 /* Return a const_int vector of VAL.  */
 rtx
-aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
+aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
 {
   int nunits = GET_MODE_NUNITS (mode);
   rtvec v = rtvec_alloc (nunits);
   int i;
 
+  rtx cache = GEN_INT (val);
+
   for (i=0; i < nunits; i++)
-    RTVEC_ELT (v, i) = GEN_INT (val);
+    RTVEC_ELT (v, i) = cache;
 
   return gen_rtx_CONST_VECTOR (mode, v);
 }
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index b88d13c13f277e8cdb88b5dc8545ffa01408a0fa..12dbf475e31933cff781c2f9e9c1cfbe2ce108bb 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6158,7 +6158,8 @@  proc check_effective_target_vect_call_copysignf { } {
     } else {
 	set et_vect_call_copysignf_saved($et_index) 0
 	if { [istarget i?86-*-*] || [istarget x86_64-*-*]
-	     || [istarget powerpc*-*-*] } {
+	     || [istarget powerpc*-*-*]
+	     || [istarget aarch64*-*-*] } {
 	   set et_vect_call_copysignf_saved($et_index) 1
 	}
     }