diff mbox series

[v3] AArch64: Fix copysign patterns

Message ID PAWPR08MB89826AA6F8C0807EAEE00CCD83472@PAWPR08MB8982.eurprd08.prod.outlook.com
State New
Headers show
Series [v3] AArch64: Fix copysign patterns | expand

Commit Message

Wilco Dijkstra Oct. 17, 2024, 1:21 p.m. UTC
The current copysign pattern has a mismatch in the predicates and constraints -
operand[2] is a register_operand but also has an alternative X which allows any
operand.  Since it is a floating point operation, having an integer alternative
makes no sense.  Change the expander to always use vector immediates which results
in better code and sharing of immediates between copysign and xorsign.

Passes bootstrap and regress, OK for commit?

gcc/Changelog:
        * config/aarch64/aarch64.md (copysign<GPF:mode>3): Widen immediate to vector.
        (copysign<GPF:mode>3_insn): Use VQ_INT_EQUIV in operand 3.
        * config/aarch64/iterators.md (VQ_INT_EQUIV): New iterator.
        (vq_int_equiv): Likewise.

testsuite/Changelog:
        * gcc.target/aarch64/copysign_3.c: New test.
        * gcc.target/aarch64/copysign_4.c: New test.
        * gcc.target/aarch64/fneg-abs_2.c: Fixup test.
        * gcc.target/aarch64/sve/fneg-abs_2.c: Likewise.

---
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c54b29cd64b9e0dc6c6d12735049386ccedc5408..71f9743df671b70e6a2d189f49de58995398abee 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7218,20 +7218,11 @@  (define_expand "lrint<GPF:mode><GPI:mode>2"
 }
 )
 
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
 ;;
-;;   LDR d2, #(1 << 63)
-;;   BSL v2.8b, [y], [x]
+;;	movi    v31.4s, 0x80, lsl 24
+;;	bit     v0.16b, v1.16b, v31.16b
 ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
-;; we expect these operations to nearly always operate on
-;; floating-point values, we do not want the operation to be
-;; simplified into a bit-field insert operation that operates on the
-;; integer side, since typically that would involve three inter-bank
-;; register copies.  As we do not expect copysign to be followed by
-;; other logical operations on the result, it seems preferable to keep
-;; this as an unspec operation, rather than exposing the underlying
-;; logic to the compiler.
 
 (define_expand "copysign<GPF:mode>3"
   [(match_operand:GPF 0 "register_operand")
@@ -7239,32 +7230,25 @@  (define_expand "copysign<GPF:mode>3"
    (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
-			       << (GET_MODE_BITSIZE (<MODE>mode) - 1));
-  /* copysign (x, -1) should instead be expanded as orr with the sign
-     bit.  */
+  rtx sign = GEN_INT (HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (<MODE>mode) - 1));
+  rtx v_bitmask = gen_const_vec_duplicate (<VQ_INT_EQUIV>mode, sign);
+  v_bitmask = force_reg (<VQ_INT_EQUIV>mode, v_bitmask);
+
+  /* copysign (x, -1) should instead be expanded as orr with the signbit.  */
   rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+
   if (GET_CODE (op2_elt) == CONST_DOUBLE
       && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
     {
-      rtx v_bitmask
-	= force_reg (V2<V_INT_EQUIV>mode,
-		     gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
-					      signbit_const));
-
-      emit_insn (gen_iorv2<v_int_equiv>3 (
-	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
-	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
+      emit_insn (gen_ior<vq_int_equiv>3 (
+	lowpart_subreg (<VQ_INT_EQUIV>mode, operands[0], <MODE>mode),
+	lowpart_subreg (<VQ_INT_EQUIV>mode, operands[1], <MODE>mode),
 	v_bitmask));
       DONE;
     }
-
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, signbit_const);
   operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
-				       bitmask));
+				       v_bitmask));
   DONE;
 }
 )
@@ -7273,23 +7257,21 @@  (define_insn "copysign<GPF:mode>3_insn"
   [(set (match_operand:GPF 0 "register_operand")
 	(unspec:GPF [(match_operand:GPF 1 "register_operand")
 		     (match_operand:GPF 2 "register_operand")
-		     (match_operand:<V_INT_EQUIV> 3 "register_operand")]
+		     (match_operand:<VQ_INT_EQUIV> 3 "register_operand")]
 	 UNSPEC_COPYSIGN))]
   "TARGET_SIMD"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type  ]
      [ w        , w , w , 0 ; neon_bsl<q>  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
      [ w        , 0 , w , w ; neon_bsl<q>  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
      [ w        , w , 0 , w ; neon_bsl<q>  ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
-     [ r        , r , 0 , X ; bfm          ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1>
   }
 )
 
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
 ;;
-;; LDR   d2, #1<<63
-;; AND   v3.8B, v1.8B, v2.8B
-;; EOR   v0.8B, v0.8B, v3.8B
+;;	movi    v31.4s, 0x80, lsl 24
+;;	and     v31.16b, v31.16b, v1.16b
+;;	eor     v0.16b, v31.16b, v0.16b
 ;;
 
 (define_expand "@xorsign<mode>3"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1322193b027c9ad1d45d5b5ebbeea0e8537615d3..a1ea66048b2a5066381194779ff4d2998228d8f7 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1889,6 +1889,14 @@  (define_mode_attr v_int_equiv [(V8QI "v8qi") (V16QI "v16qi")
 			       (VNx8SF  "vnx8si") (VNx16SF "vnx16si")
 ])
 
+;; Mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr VQ_INT_EQUIV [(DF   "V2DI")   (SF   "V4SI")
+])
+
+;; Lower case mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr vq_int_equiv [(DF   "v2di")   (SF   "v4si")
+])
+
 ;; Floating-point equivalent of selected modes.
 (define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
 			      (VNx8BF "VNx8HF")
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..be48682420f1ff84e80af9efd9d11f64bd6e8052
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..f3cec2fc9c21a4eaa3b6556479aeb15c04358a1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+  return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+  return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
index 18d10ee834d5d9b4361d890447060e78f09d3a73..9fe8e9bde6965875816e2aa722c36028ac233198 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,7 +9,7 @@ 
 
 /*
 ** f1:
-**	orr	v[0-9]+.2s, #?128, lsl #?24
+**	orr	v[0-9]+.4s, #?128, lsl #?24
 **	ret
 */
 float32_t f1 (float32_t a)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
index fe08fe31fe87aab4a7ce8497d05488a42fe9ae21..cc97c95d1521be6693f3182b485bab2aa4b1daa0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -7,7 +7,7 @@ 
 
 /*
 ** f1:
-**	orr	v0.2s, #?128, lsl #?24
+**	orr	v0.4s, #?128, lsl #?24
 **	ret
 */
 float32_t f1 (float32_t a)