@@ -161,6 +161,7 @@ (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
[?w, r ; f_mcr , * , *] fmov\t%d0, %1
[?r, r ; mov_reg , * , *] mov\t%0, %1
[w , Dn; neon_move<q> , simd , *] << aarch64_output_simd_mov_immediate (operands[1], 64);
+ [w , vsl; * , sve , *] mov\t%Z0.<Vetype>, %1
[w , Dz; f_mcr , * , *] fmov\t%d0, xzr
[w , Dx; neon_move , simd , 8] #
}
@@ -190,6 +191,7 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
[?w , r ; multiple , * , 8] #
[?r , r ; multiple , * , 8] #
[w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
+ [w , vsl; * , sve, 4] mov\t%Z0.<Vetype>, %1
[w , Dz; fmov , * , 4] fmov\t%d0, xzr
[w , Dx; neon_move , simd, 8] #
}
@@ -208,7 +210,6 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
else
{
if (FP_REGNUM_P (REGNO (operands[0]))
- && <MODE>mode == V2DImode
&& aarch64_maybe_generate_simd_constant (operands[0], operands[1],
<MODE>mode))
;
@@ -648,7 +649,7 @@ (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><
[(set_attr "type" "neon_dot<VS:q>")]
)
-(define_expand "copysign<mode>3"
+(define_expand "@copysign<mode>3"
[(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "nonmemory_operand")]
@@ -1138,10 +1139,8 @@ (define_insn "ior<mode>3<vczle><vczbe>"
"TARGET_SIMD"
{@ [ cons: =0 , 1 , 2; attrs: arch ]
[ w , w , w ; simd ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
- [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
- [ w , 0 , Do ; simd ] \
- << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
- AARCH64_CHECK_ORR);
+ [ w , 0 , Do ; simd ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_ORR);
+ [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, %2
}
[(set_attr "type" "neon_logic<q>")]
)
@@ -7218,20 +7218,11 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
}
)
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
;;
-;; LDR d2, #(1 << 63)
-;; BSL v2.8b, [y], [x]
+;; movi v31.4s, 0x80, lsl 24
+;; bit v0.16b, v1.16b, v31.16b
;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because
-;; we expect these operations to nearly always operate on
-;; floating-point values, we do not want the operation to be
-;; simplified into a bit-field insert operation that operates on the
-;; integer side, since typically that would involve three inter-bank
-;; register copies. As we do not expect copysign to be followed by
-;; other logical operations on the result, it seems preferable to keep
-;; this as an unspec operation, rather than exposing the underlying
-;; logic to the compiler.
(define_expand "copysign<GPF:mode>3"
[(match_operand:GPF 0 "register_operand")
@@ -7239,57 +7230,22 @@ (define_expand "copysign<GPF:mode>3"
(match_operand:GPF 2 "nonmemory_operand")]
"TARGET_SIMD"
{
- rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
- << (GET_MODE_BITSIZE (<MODE>mode) - 1));
- /* copysign (x, -1) should instead be expanded as orr with the sign
- bit. */
- rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
- if (GET_CODE (op2_elt) == CONST_DOUBLE
- && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
- {
- rtx v_bitmask
- = force_reg (V2<V_INT_EQUIV>mode,
- gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
- signbit_const));
-
- emit_insn (gen_iorv2<v_int_equiv>3 (
- lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
- lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
- v_bitmask));
- DONE;
- }
-
- machine_mode int_mode = <V_INT_EQUIV>mode;
- rtx bitmask = gen_reg_rtx (int_mode);
- emit_move_insn (bitmask, signbit_const);
- operands[2] = force_reg (<MODE>mode, operands[2]);
- emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
- bitmask));
+ rtx tmp = gen_reg_rtx (<VCONQ>mode);
+ rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
+ rtx op2 = REG_P (operands[2])
+ ? lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode)
+ : gen_const_vec_duplicate (<VCONQ>mode, operands[2]);
+ emit_insn (gen_copysign3 (<VCONQ>mode, tmp, op1, op2));
+ emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
DONE;
}
)
-(define_insn "copysign<GPF:mode>3_insn"
- [(set (match_operand:GPF 0 "register_operand")
- (unspec:GPF [(match_operand:GPF 1 "register_operand")
- (match_operand:GPF 2 "register_operand")
- (match_operand:<V_INT_EQUIV> 3 "register_operand")]
- UNSPEC_COPYSIGN))]
- "TARGET_SIMD"
- {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type ]
- [ w , w , w , 0 ; neon_bsl<q> ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
- [ w , 0 , w , w ; neon_bsl<q> ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
- [ w , w , 0 , w ; neon_bsl<q> ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
- [ r , r , 0 , X ; bfm ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1>
- }
-)
-
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
;;
-;; LDR d2, #1<<63
-;; AND v3.8B, v1.8B, v2.8B
-;; EOR v0.8B, v0.8B, v3.8B
+;; movi v31.4s, 0x80, lsl 24
+;; and v31.16b, v31.16b, v1.16b
+;; eor v0.16b, v31.16b, v0.16b
;;
(define_expand "@xorsign<mode>3"
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+ return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+ return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+ return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+ return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
@@ -9,7 +9,7 @@
/*
** f1:
-** orr v[0-9]+.2s, #?128, lsl #?24
+** orr v[0-9]+.[24]s, #?128, lsl #?24
** ret
*/
float32_t f1 (float32_t a)