@@ -7218,20 +7218,11 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
}
)
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
;;
-;; LDR d2, #(1 << 63)
-;; BSL v2.8b, [y], [x]
+;; movi v31.4s, 0x80, lsl 24
+;; bit v0.16b, v1.16b, v31.16b
;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because
-;; we expect these operations to nearly always operate on
-;; floating-point values, we do not want the operation to be
-;; simplified into a bit-field insert operation that operates on the
-;; integer side, since typically that would involve three inter-bank
-;; register copies. As we do not expect copysign to be followed by
-;; other logical operations on the result, it seems preferable to keep
-;; this as an unspec operation, rather than exposing the underlying
-;; logic to the compiler.
(define_expand "copysign<GPF:mode>3"
[(match_operand:GPF 0 "register_operand")
@@ -7239,32 +7230,25 @@ (define_expand "copysign<GPF:mode>3"
(match_operand:GPF 2 "nonmemory_operand")]
"TARGET_SIMD"
{
- rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
- << (GET_MODE_BITSIZE (<MODE>mode) - 1));
- /* copysign (x, -1) should instead be expanded as orr with the sign
- bit. */
+ rtx sign = GEN_INT (HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (<MODE>mode) - 1));
+ rtx v_bitmask = gen_const_vec_duplicate (<VQ_INT_EQUIV>mode, sign);
+ v_bitmask = force_reg (<VQ_INT_EQUIV>mode, v_bitmask);
+
+ /* copysign (x, -1) should instead be expanded as orr with the signbit. */
rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+
if (GET_CODE (op2_elt) == CONST_DOUBLE
&& real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
{
- rtx v_bitmask
- = force_reg (V2<V_INT_EQUIV>mode,
- gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
- signbit_const));
-
- emit_insn (gen_iorv2<v_int_equiv>3 (
- lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
- lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
+ emit_insn (gen_ior<vq_int_equiv>3 (
+ lowpart_subreg (<VQ_INT_EQUIV>mode, operands[0], <MODE>mode),
+ lowpart_subreg (<VQ_INT_EQUIV>mode, operands[1], <MODE>mode),
v_bitmask));
DONE;
}
-
- machine_mode int_mode = <V_INT_EQUIV>mode;
- rtx bitmask = gen_reg_rtx (int_mode);
- emit_move_insn (bitmask, signbit_const);
operands[2] = force_reg (<MODE>mode, operands[2]);
emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
- bitmask));
+ v_bitmask));
DONE;
}
)
@@ -7273,23 +7257,21 @@ (define_insn "copysign<GPF:mode>3_insn"
[(set (match_operand:GPF 0 "register_operand")
(unspec:GPF [(match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "register_operand")
- (match_operand:<V_INT_EQUIV> 3 "register_operand")]
+ (match_operand:<VQ_INT_EQUIV> 3 "register_operand")]
UNSPEC_COPYSIGN))]
"TARGET_SIMD"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: type ]
[ w , w , w , 0 ; neon_bsl<q> ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
[ w , 0 , w , w ; neon_bsl<q> ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
[ w , w , 0 , w ; neon_bsl<q> ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
- [ r , r , 0 , X ; bfm ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1>
}
)
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
;;
-;; LDR d2, #1<<63
-;; AND v3.8B, v1.8B, v2.8B
-;; EOR v0.8B, v0.8B, v3.8B
+;; movi v31.4s, 0x80, lsl 24
+;; and v31.16b, v31.16b, v1.16b
+;; eor v0.16b, v31.16b, v0.16b
;;
(define_expand "@xorsign<mode>3"
@@ -1889,6 +1889,14 @@ (define_mode_attr v_int_equiv [(V8QI "v8qi") (V16QI "v16qi")
(VNx8SF "vnx8si") (VNx16SF "vnx16si")
])
+;; Mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr VQ_INT_EQUIV [(DF "V2DI") (SF "V4SI")
+])
+
+;; Lower case mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr vq_int_equiv [(DF "v2di") (SF "v4si")
+])
+
;; Floating-point equivalent of selected modes.
(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
(VNx8BF "VNx8HF")
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+ return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+ return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+ return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+ return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
@@ -9,7 +9,7 @@
/*
** f1:
-** orr v[0-9]+.2s, #?128, lsl #?24
+** orr v[0-9]+.4s, #?128, lsl #?24
** ret
*/
float32_t f1 (float32_t a)
@@ -7,7 +7,7 @@
/*
** f1:
-** orr v0.2s, #?128, lsl #?24
+** orr v0.4s, #?128, lsl #?24
** ret
*/
float32_t f1 (float32_t a)