Message ID | PAWPR08MB8982A492455DBF862BFE973A83622@PAWPR08MB8982.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | [v2] AArch64: Fix copysign patterns | expand |
Hi Wilco, Thanks for the patch. This mostly looks good. Just added a couple clarifications. On 9/18/2024 8:17 PM, Wilco Dijkstra wrote: > v2: Add more testcase fixes. > > The current copysign pattern has a mismatch in the predicates and constraints - > operand[2] is a register_operand but also has an alternative X which allows any > operand. Since it is a floating point operation, having an integer alternative > makes no sense. Change the expander to always use the vector variant of copysign > which results in better code. Add a SVE bitmask move immediate alternative to > the aarch64_simd_mov patterns so we emit a single move when SVE is available. > > Passes bootstrap and regress, OK for commit? > > gcc: > * config/aarch64/aarch64.md (copysign<GPF:mode>3): Defer to AdvSIMD copysign. Should the things after "(copysign..") be on a newline? I have mostly seen gcc ChangeLogs have file name and individual elements separated by newlines. > (copysign<GPF:mode>3_insn): Remove pattern. > * config/aarch64/aarch64-simd.md (aarch64_simd_mov<VDMOV:mode>): Add SVE movimm > alternative. Similar comment about file name and the instruction pattern being on separate lines. > (aarch64_simd_mov<VQMOV:mode>): Likewise. Remove redundant V2DI check. > (copysign<mode>3): Make global. > (ior<mode>3<vczle><vczbe>): Move Neon immediate alternative before the SVE one. > > testsuite: > * gcc.target/aarch64/copysign_3.c: New test. > * gcc.target/aarch64/copysign_4.c: New test. > * gcc.target/aarch64/fneg-abs_2.c: Allow .2s and .4s. > * gcc.target/aarch64/sve/fneg-abs_1.c: Fixup test. > * gcc.target/aarch64/sve/fneg-abs_2.c: Likewise. > > --- > > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index e70d59380ed295577721f15277c28829d42a0189..3077e920ce623c92d21193124747ff7ad010d006 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -161,6 +161,7 @@ (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>" > [?w, r ; f_mcr , * , *] fmov\t%d0, %1 > [?r, r ; mov_reg , * , *] mov\t%0, %1 > [w , Dn; neon_move<q> , simd , *] << aarch64_output_simd_mov_immediate (operands[1], 64); > + [w , vsl; * , sve , *] mov\t%Z0.<Vetype>, %1 > [w , Dz; f_mcr , * , *] fmov\t%d0, xzr > [w , Dx; neon_move , simd , 8] # > } > @@ -190,6 +191,7 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>" > [?w , r ; multiple , * , 8] # > [?r , r ; multiple , * , 8] # > [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128); > + [w , vsl; * , sve, 4] mov\t%Z0.<Vetype>, %1 > [w , Dz; fmov , * , 4] fmov\t%d0, xzr > [w , Dx; neon_move , simd, 8] # > } > @@ -208,7 +210,6 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>" > else > { > if (FP_REGNUM_P (REGNO (operands[0])) > - && <MODE>mode == V2DImode > && aarch64_maybe_generate_simd_constant (operands[0], operands[1], > <MODE>mode)) > ; > @@ -648,7 +649,7 @@ (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle>< > [(set_attr "type" "neon_dot<VS:q>")] > ) > > -(define_expand "copysign<mode>3" > +(define_expand "@copysign<mode>3" > [(match_operand:VHSDF 0 "register_operand") > (match_operand:VHSDF 1 "register_operand") > (match_operand:VHSDF 2 "nonmemory_operand")] > @@ -1138,10 +1139,8 @@ (define_insn "ior<mode>3<vczle><vczbe>" > "TARGET_SIMD" > {@ [ cons: =0 , 1 , 2; attrs: arch ] > [ w , w , w ; simd ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> > - [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2 > - [ w , 0 , Do ; simd ] \ > - << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \ > - AARCH64_CHECK_ORR); > + [ w , 0 , Do ; simd ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_ORR); > + [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, %2 > } > [(set_attr "type" "neon_logic<q>")] > ) > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index c54b29cd64b9e0dc6c6d12735049386ccedc5408..e9b148e59abf81cee53cb0dd846af9a62bbad294 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -7218,20 +7218,11 @@ (define_expand "lrint<GPF:mode><GPI:mode>2" > } > ) > > -;; For copysign (x, y), we want to generate: > +;; For copysignf (x, y), we want to generate: > ;; > -;; LDR d2, #(1 << 63) > -;; BSL v2.8b, [y], [x] > +;; movi v31.4s, 0x80, lsl 24 > +;; bit v0.16b, v1.16b, v31.16b > ;; > -;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because > -;; we expect these operations to nearly always operate on > -;; floating-point values, we do not want the operation to be > -;; simplified into a bit-field insert operation that operates on the > -;; integer side, since typically that would involve three inter-bank > -;; register copies. As we do not expect copysign to be followed by > -;; other logical operations on the result, it seems preferable to keep > -;; this as an unspec operation, rather than exposing the underlying > -;; logic to the compiler. > > (define_expand "copysign<GPF:mode>3" > [(match_operand:GPF 0 "register_operand") > @@ -7239,57 +7230,22 @@ (define_expand "copysign<GPF:mode>3" > (match_operand:GPF 2 "nonmemory_operand")] > "TARGET_SIMD" > { > - rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U > - << (GET_MODE_BITSIZE (<MODE>mode) - 1)); > - /* copysign (x, -1) should instead be expanded as orr with the sign > - bit. */ > - rtx op2_elt = unwrap_const_vec_duplicate (operands[2]); > - if (GET_CODE (op2_elt) == CONST_DOUBLE > - && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) > - { > - rtx v_bitmask > - = force_reg (V2<V_INT_EQUIV>mode, > - gen_const_vec_duplicate (V2<V_INT_EQUIV>mode, > - signbit_const)); > - > - emit_insn (gen_iorv2<v_int_equiv>3 ( > - lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode), > - lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode), > - v_bitmask)); > - DONE; > - } > - > - machine_mode int_mode = <V_INT_EQUIV>mode; > - rtx bitmask = gen_reg_rtx (int_mode); > - emit_move_insn (bitmask, signbit_const); > - operands[2] = force_reg (<MODE>mode, operands[2]); > - emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2], > - bitmask)); > + rtx tmp = gen_reg_rtx (<VCONQ>mode); > + rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode); > + rtx op2 = REG_P (operands[2]) > + ? lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode) > + : gen_const_vec_duplicate (<VCONQ>mode, operands[2]); > + emit_insn (gen_copysign3 (<VCONQ>mode, tmp, op1, op2)); > + emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode)); > DONE; > } > ) > > -(define_insn "copysign<GPF:mode>3_insn" > - [(set (match_operand:GPF 0 "register_operand") > - (unspec:GPF [(match_operand:GPF 1 "register_operand") > - (match_operand:GPF 2 "register_operand") > - (match_operand:<V_INT_EQUIV> 3 "register_operand")] > - UNSPEC_COPYSIGN))] > - "TARGET_SIMD" > - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type ] > - [ w , w , w , 0 ; neon_bsl<q> ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> > - [ w , 0 , w , w ; neon_bsl<q> ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> > - [ w , w , 0 , w ; neon_bsl<q> ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype> > - [ r , r , 0 , X ; bfm ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1> > - } > -) > - > - > -;; For xorsign (x, y), we want to generate: > +;; For xorsignf (x, y), we want to generate: > ;; > -;; LDR d2, #1<<63 > -;; AND v3.8B, v1.8B, v2.8B > -;; EOR v0.8B, v0.8B, v3.8B > +;; movi v31.4s, 0x80, lsl 24 > +;; and v31.16b, v31.16b, v1.16b > +;; eor v0.16b, v31.16b, v0.16b > ;; > > (define_expand "@xorsign<mode>3" > diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c b/gcc/testsuite/gcc.target/aarch64/copysign_3.c > new file mode 100644 > index 0000000000000000000000000000000000000000..be48682420f1ff84e80af9efd9d11f64bd6e8052 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +float f1 (float x, float y) > +{ > + return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y); > +} > + > +double f2 (double x, double y) > +{ > + return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y); > +} > + > +/* { dg-final { scan-assembler-times "movi\t" 2 } } */ > +/* { dg-final { scan-assembler-not "copysign\tw" } } */ > +/* { dg-final { scan-assembler-not "dup\tw" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c b/gcc/testsuite/gcc.target/aarch64/copysign_4.c > new file mode 100644 > index 0000000000000000000000000000000000000000..f3cec2fc9c21a4eaa3b6556479aeb15c04358a1c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=armv8-a+sve" } */ > + > +float f1 (float x, float y) > +{ > + return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y); > +} > + > +double f2 (double x, double y) > +{ > + return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y); > +} > + > +/* { dg-final { scan-assembler-times "movi\t" 1 } } */ > +/* { dg-final { scan-assembler-times "mov\tz" 1 } } */ > +/* { dg-final { scan-assembler-not "copysign\tw" } } */ > +/* { dg-final { scan-assembler-not "dup\tw" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c > index 18d10ee834d5d9b4361d890447060e78f09d3a73..1544bc5f1a736e95dd8bd2c608405aebb54ded1f 100644 > --- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c > @@ -9,7 +9,7 @@ > > /* > ** f1: > -** orr v[0-9]+.2s, #?128, lsl #?24 > +** orr v[0-9]+.[24]s, #?128, lsl #?24 > ** ret > */ > float32_t f1 (float32_t a) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c > index a8b27199ff83d0eebadfc7dcf03f94e1229d76b8..1ebdc6aaeb102da25ad561b24641e72a652175fa 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c > @@ -6,7 +6,7 @@ > > /* > ** t1: > -** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 > +** orr v0.2s, #?128, lsl #?24 > ** ret > */ > float32x2_t t1 (float32x2_t a) > @@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a) > > /* > ** t2: > -** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 > +** orr v0.4s, #?128, lsl #?24 > ** ret > */ > float32x4_t t2 (float32x4_t a) > @@ -26,7 +26,7 @@ float32x4_t t2 (float32x4_t a) > > /* > ** t3: > -** orr z[0-9]+.d, z[0-9]+.d, #-9223372036854775808 > +** orr z[0-9]+.d, z[0-9]+.d, -9223372036854775808 > ** ret > */ > float64x2_t t3 (float64x2_t a) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > index 19a7695e605bc8aced486a9c450d1cdc6be4691a..122152c0ebe4ea6840e418e75a2cadbfc9b0aba4 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > @@ -7,7 +7,7 @@ > > /* > ** f1: > -** orr z0.s, z0.s, #-2147483648 > +** orr v0.4s, #?128, lsl #?24 > ** ret > */ > float32_t f1 (float32_t a) > @@ -17,7 +17,7 @@ float32_t f1 (float32_t a) > > /* > ** f2: > -** orr z0.d, z0.d, #-9223372036854775808 > +** orr z0.d, z0.d, -9223372036854775808 > ** ret > */ > float64_t f2 (float64_t a) >
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e70d59380ed295577721f15277c28829d42a0189..3077e920ce623c92d21193124747ff7ad010d006 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -161,6 +161,7 @@ (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>" [?w, r ; f_mcr , * , *] fmov\t%d0, %1 [?r, r ; mov_reg , * , *] mov\t%0, %1 [w , Dn; neon_move<q> , simd , *] << aarch64_output_simd_mov_immediate (operands[1], 64); + [w , vsl; * , sve , *] mov\t%Z0.<Vetype>, %1 [w , Dz; f_mcr , * , *] fmov\t%d0, xzr [w , Dx; neon_move , simd , 8] # } @@ -190,6 +191,7 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>" [?w , r ; multiple , * , 8] # [?r , r ; multiple , * , 8] # [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128); + [w , vsl; * , sve, 4] mov\t%Z0.<Vetype>, %1 [w , Dz; fmov , * , 4] fmov\t%d0, xzr [w , Dx; neon_move , simd, 8] # } @@ -208,7 +210,6 @@ (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>" else { if (FP_REGNUM_P (REGNO (operands[0])) - && <MODE>mode == V2DImode && aarch64_maybe_generate_simd_constant (operands[0], operands[1], <MODE>mode)) ; @@ -648,7 +649,7 @@ (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle>< [(set_attr "type" "neon_dot<VS:q>")] ) -(define_expand "copysign<mode>3" +(define_expand "@copysign<mode>3" [(match_operand:VHSDF 0 "register_operand") (match_operand:VHSDF 1 "register_operand") (match_operand:VHSDF 2 "nonmemory_operand")] @@ -1138,10 +1139,8 @@ (define_insn "ior<mode>3<vczle><vczbe>" "TARGET_SIMD" {@ [ cons: =0 , 1 , 2; attrs: arch ] [ w , w , w ; simd ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> - [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2 - [ w , 0 , Do ; simd ] \ - << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \ - AARCH64_CHECK_ORR); + [ w , 0 , Do ; simd ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_ORR); + [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, %2 } [(set_attr "type" "neon_logic<q>")] ) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c54b29cd64b9e0dc6c6d12735049386ccedc5408..e9b148e59abf81cee53cb0dd846af9a62bbad294 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -7218,20 +7218,11 @@ (define_expand "lrint<GPF:mode><GPI:mode>2" } ) -;; For copysign (x, y), we want to generate: +;; For copysignf (x, y), we want to generate: ;; -;; LDR d2, #(1 << 63) -;; BSL v2.8b, [y], [x] +;; movi v31.4s, 0x80, lsl 24 +;; bit v0.16b, v1.16b, v31.16b ;; -;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because -;; we expect these operations to nearly always operate on -;; floating-point values, we do not want the operation to be -;; simplified into a bit-field insert operation that operates on the -;; integer side, since typically that would involve three inter-bank -;; register copies. As we do not expect copysign to be followed by -;; other logical operations on the result, it seems preferable to keep -;; this as an unspec operation, rather than exposing the underlying -;; logic to the compiler. (define_expand "copysign<GPF:mode>3" [(match_operand:GPF 0 "register_operand") @@ -7239,57 +7230,22 @@ (define_expand "copysign<GPF:mode>3" (match_operand:GPF 2 "nonmemory_operand")] "TARGET_SIMD" { - rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U - << (GET_MODE_BITSIZE (<MODE>mode) - 1)); - /* copysign (x, -1) should instead be expanded as orr with the sign - bit. */ - rtx op2_elt = unwrap_const_vec_duplicate (operands[2]); - if (GET_CODE (op2_elt) == CONST_DOUBLE - && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) - { - rtx v_bitmask - = force_reg (V2<V_INT_EQUIV>mode, - gen_const_vec_duplicate (V2<V_INT_EQUIV>mode, - signbit_const)); - - emit_insn (gen_iorv2<v_int_equiv>3 ( - lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode), - lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode), - v_bitmask)); - DONE; - } - - machine_mode int_mode = <V_INT_EQUIV>mode; - rtx bitmask = gen_reg_rtx (int_mode); - emit_move_insn (bitmask, signbit_const); - operands[2] = force_reg (<MODE>mode, operands[2]); - emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2], - bitmask)); + rtx tmp = gen_reg_rtx (<VCONQ>mode); + rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode); + rtx op2 = REG_P (operands[2]) + ? lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode) + : gen_const_vec_duplicate (<VCONQ>mode, operands[2]); + emit_insn (gen_copysign3 (<VCONQ>mode, tmp, op1, op2)); + emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode)); DONE; } ) -(define_insn "copysign<GPF:mode>3_insn" - [(set (match_operand:GPF 0 "register_operand") - (unspec:GPF [(match_operand:GPF 1 "register_operand") - (match_operand:GPF 2 "register_operand") - (match_operand:<V_INT_EQUIV> 3 "register_operand")] - UNSPEC_COPYSIGN))] - "TARGET_SIMD" - {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type ] - [ w , w , w , 0 ; neon_bsl<q> ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> - [ w , 0 , w , w ; neon_bsl<q> ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> - [ w , w , 0 , w ; neon_bsl<q> ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype> - [ r , r , 0 , X ; bfm ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1> - } -) - - -;; For xorsign (x, y), we want to generate: +;; For xorsignf (x, y), we want to generate: ;; -;; LDR d2, #1<<63 -;; AND v3.8B, v1.8B, v2.8B -;; EOR v0.8B, v0.8B, v3.8B +;; movi v31.4s, 0x80, lsl 24 +;; and v31.16b, v31.16b, v1.16b +;; eor v0.16b, v31.16b, v0.16b ;; (define_expand "@xorsign<mode>3" diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c b/gcc/testsuite/gcc.target/aarch64/copysign_3.c new file mode 100644 index 0000000000000000000000000000000000000000..be48682420f1ff84e80af9efd9d11f64bd6e8052 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +float f1 (float x, float y) +{ + return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y); +} + +double f2 (double x, double y) +{ + return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y); +} + +/* { dg-final { scan-assembler-times "movi\t" 2 } } */ +/* { dg-final { scan-assembler-not "copysign\tw" } } */ +/* { dg-final { scan-assembler-not "dup\tw" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c b/gcc/testsuite/gcc.target/aarch64/copysign_4.c new file mode 100644 index 0000000000000000000000000000000000000000..f3cec2fc9c21a4eaa3b6556479aeb15c04358a1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8-a+sve" } */ + +float f1 (float x, float y) +{ + return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y); +} + +double f2 (double x, double y) +{ + return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y); +} + +/* { dg-final { scan-assembler-times "movi\t" 1 } } */ +/* { dg-final { scan-assembler-times "mov\tz" 1 } } */ +/* { dg-final { scan-assembler-not "copysign\tw" } } */ +/* { dg-final { scan-assembler-not "dup\tw" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c index 18d10ee834d5d9b4361d890447060e78f09d3a73..1544bc5f1a736e95dd8bd2c608405aebb54ded1f 100644 --- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c @@ -9,7 +9,7 @@ /* ** f1: -** orr v[0-9]+.2s, #?128, lsl #?24 +** orr v[0-9]+.[24]s, #?128, lsl #?24 ** ret */ float32_t f1 (float32_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c index a8b27199ff83d0eebadfc7dcf03f94e1229d76b8..1ebdc6aaeb102da25ad561b24641e72a652175fa 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c @@ -6,7 +6,7 @@ /* ** t1: -** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 +** orr v0.2s, #?128, lsl #?24 ** ret */ float32x2_t t1 (float32x2_t a) @@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a) /* ** t2: -** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 +** orr v0.4s, #?128, lsl #?24 ** ret */ float32x4_t t2 (float32x4_t a) @@ -26,7 +26,7 @@ float32x4_t t2 (float32x4_t a) /* ** t3: -** orr z[0-9]+.d, z[0-9]+.d, #-9223372036854775808 +** orr z[0-9]+.d, z[0-9]+.d, -9223372036854775808 ** ret */ float64x2_t t3 (float64x2_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c index 19a7695e605bc8aced486a9c450d1cdc6be4691a..122152c0ebe4ea6840e418e75a2cadbfc9b0aba4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -7,7 +7,7 @@ /* ** f1: -** orr z0.s, z0.s, #-2147483648 +** orr v0.4s, #?128, lsl #?24 ** ret */ float32_t f1 (float32_t a) @@ -17,7 +17,7 @@ float32_t f1 (float32_t a) /* ** f2: -** orr z0.d, z0.d, #-9223372036854775808 +** orr z0.d, z0.d, -9223372036854775808 ** ret */ float64_t f2 (float64_t a)