Message ID | patch-17724-tamar@arm.com |
---|---|
State | New |
Headers | show |
Series | AArch64: Use SVE unpredicated LOGICAL expressions when Advanced SIMD inefficient [PR109154] | expand |
Tamar Christina <tamar.christina@arm.com> writes: > Hi All, > > SVE has much bigger immediate encoding range for bitmasks than Advanced SIMD has > and so on a system that is SVE capable if we need an Advanced SIMD Inclusive-OR > by immediate and would require a reload then an unpredicated SVE ORR instead. > > This has both speed and size improvements. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/109154 > * config/aarch64/aarch64.md (<optab><mode>3): Convert to new syntax and > SVE split case. > * config/aarch64/iterators.md (VCONV, vconv): New. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/109154 > * gcc.target/aarch64/sve/fneg-abs_2.c: Updated. > * gcc.target/aarch64/sve/fneg-abs_4.c: Updated. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 60c92213c75a2a4c18a6b59ae52fe45d1e872718..377c5cafedd43d8d1320489a36267cc6e5f15239 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4551,17 +4551,27 @@ (define_insn_and_split "*aarch64_and<mode>_imm2" > } > ) > > -(define_insn "<optab><mode>3" > - [(set (match_operand:GPI 0 "register_operand" "=r,rk,w") > - (LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r,w") > - (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>,w")))] > - "" > - "@ > - <logical>\\t%<w>0, %<w>1, %<w>2 > - <logical>\\t%<w>0, %<w>1, %2 > - <logical>\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" > - [(set_attr "type" "logic_reg,logic_imm,neon_logic") > - (set_attr "arch" "*,*,simd")] > +(define_insn_and_split "<optab><mode>3" > + [(set (match_operand:GPI 0 "register_operand") > + (LOGICAL:GPI (match_operand:GPI 1 "register_operand") > + (match_operand:GPI 2 "aarch64_logical_operand")))] > + "" > + {@ [cons: =0, 1, 2; attrs: type, arch] > + [r , %r, r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 > + [rk, r , <lconst>; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 > + [w , 0 , <lconst>; * , sve ] # > + [w , w , w ; neon_logic, simd] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> > + } > + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) > + && satisfies_constraint_<lconst> (operands[2]) > + && FP_REGNUM_P (REGNO (operands[0]))" > + [(const_int 0)] > + { > + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], <MODE>mode); > + rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]); > + emit_insn (gen_<optab><vconv>3 (op1, op1, op2)); > + DONE; > + } > ) The WIP SME patches add a %Z modifier for 'z' register prefixes, similarly to b/h/s/d for scalar FP. With that I think the alternative can be: [w , 0 , <lconst>; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 although it would be nice to keep the hex constant. Will try to post the patches up to that part soon. Thanks, Richard > > ;; zero_extend version of above > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index d17becc37e230684beaee3c69e2a0f0ce612eda5..568cd5d1a3a9e00475376177ad13de72609df3d8 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -1432,6 +1432,11 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") > (HI "V8HI") (QI "V16QI") > (SF "V4SF") (DF "V2DF")]) > > +;; 128-bit container modes for the lower part of an SVE vector to the inner or > +;; scalar source mode. > +(define_mode_attr VCONV [(SI "VNx4SI") (DI "VNx2DI")]) > +(define_mode_attr vconv [(SI "vnx4si") (DI "vnx2di")]) > + > ;; Half modes of all vector modes. > (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") > (V4HI "V2HI") (V8HI "V4HI") > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > index a60cd31b9294af2dac69eed1c93f899bd5c78fca..fe9f27bf91b8fb18205a5891a5d5e847a5d88e4b 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > @@ -7,8 +7,7 @@ > > /* > ** f1: > -** movi v[0-9]+.2s, 0x80, lsl 24 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.s, z0.s, #0x80000000 > ** ret > */ > float32_t f1 (float32_t a) > @@ -18,9 +17,7 @@ float32_t f1 (float32_t a) > > /* > ** f2: > -** mov x0, -9223372036854775808 > -** fmov d[0-9]+, x0 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.d, z0.d, #0x8000000000000000 > ** ret > */ > float64_t f2 (float64_t a) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > index 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..707bcb0b6c53e212b55a255f500e9e548e9ccd80 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > @@ -6,9 +6,7 @@ > > /* > ** negabs: > -** mov x0, -9223372036854775808 > -** fmov d[0-9]+, x0 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.d, z0.d, #0x8000000000000000 > ** ret > */ > double negabs (double x) > @@ -22,8 +20,7 @@ double negabs (double x) > > /* > ** negabsf: > -** movi v[0-9]+.2s, 0x80, lsl 24 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.s, z0.s, #0x80000000 > ** ret > */ > float negabsf (float x)
> -----Original Message----- > From: Richard Sandiford <richard.sandiford@arm.com> > Sent: Wednesday, September 27, 2023 9:50 AM > To: Tamar Christina <Tamar.Christina@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: Re: [PATCH]AArch64: Use SVE unpredicated LOGICAL expressions > when Advanced SIMD inefficient [PR109154] > > Tamar Christina <tamar.christina@arm.com> writes: > > Hi All, > > > > SVE has much bigger immediate encoding range for bitmasks than > > Advanced SIMD has and so on a system that is SVE capable if we need an > > Advanced SIMD Inclusive-OR by immediate and would require a reload then > an unpredicated SVE ORR instead. > > > > This has both speed and size improvements. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > PR tree-optimization/109154 > > * config/aarch64/aarch64.md (<optab><mode>3): Convert to new > syntax and > > SVE split case. > > * config/aarch64/iterators.md (VCONV, vconv): New. > > > > gcc/testsuite/ChangeLog: > > > > PR tree-optimization/109154 > > * gcc.target/aarch64/sve/fneg-abs_2.c: Updated. > > * gcc.target/aarch64/sve/fneg-abs_4.c: Updated. > > > > --- inline copy of patch -- > > diff --git a/gcc/config/aarch64/aarch64.md > > b/gcc/config/aarch64/aarch64.md index > > > 60c92213c75a2a4c18a6b59ae52fe45d1e872718..377c5cafedd43d8d13204 > 89a3626 > > 7cc6e5f15239 100644 > > --- a/gcc/config/aarch64/aarch64.md > > +++ b/gcc/config/aarch64/aarch64.md > > @@ -4551,17 +4551,27 @@ (define_insn_and_split > "*aarch64_and<mode>_imm2" > > } > > ) > > > > -(define_insn "<optab><mode>3" > > - [(set (match_operand:GPI 0 "register_operand" "=r,rk,w") > > - (LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r,w") > > - (match_operand:GPI 2 "aarch64_logical_operand" > "r,<lconst>,w")))] > > - "" > > - "@ > > - <logical>\\t%<w>0, %<w>1, %<w>2 > > - <logical>\\t%<w>0, %<w>1, %2 > > - <logical>\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" > > - [(set_attr "type" "logic_reg,logic_imm,neon_logic") > > - (set_attr "arch" "*,*,simd")] > > +(define_insn_and_split "<optab><mode>3" > > + [(set (match_operand:GPI 0 "register_operand") > > + (LOGICAL:GPI (match_operand:GPI 1 "register_operand") > > + (match_operand:GPI 2 "aarch64_logical_operand")))] > > + "" > > + {@ [cons: =0, 1, 2; attrs: type, arch] > > + [r , %r, r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 > > + [rk, r , <lconst>; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 > > + [w , 0 , <lconst>; * , sve ] # > > + [w , w , w ; neon_logic, simd] <logical>\t%0.<Vbtype>, %1.<Vbtype>, > %2.<Vbtype> > > + } > > + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) > > + && satisfies_constraint_<lconst> (operands[2]) > > + && FP_REGNUM_P (REGNO (operands[0]))" > > + [(const_int 0)] > > + { > > + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], <MODE>mode); > > + rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]); > > + emit_insn (gen_<optab><vconv>3 (op1, op1, op2)); > > + DONE; > > + } > > ) > > The WIP SME patches add a %Z modifier for 'z' register prefixes, similarly to > b/h/s/d for scalar FP. With that I think the alternative can be: > > [w , 0 , <lconst>; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 > > although it would be nice to keep the hex constant. My original patch added a %u for (undecorated) which just prints the register number and changed %C to also accept a single constant instead of only a uniform vector. But I figured you wouldn't like that? 😊 Cheers, Tamar > > Will try to post the patches up to that part soon. > > Thanks, > Richard > > > > > ;; zero_extend version of above > > diff --git a/gcc/config/aarch64/iterators.md > > b/gcc/config/aarch64/iterators.md index > > > d17becc37e230684beaee3c69e2a0f0ce612eda5..568cd5d1a3a9e00475376 > 177ad13 > > de72609df3d8 100644 > > --- a/gcc/config/aarch64/iterators.md > > +++ b/gcc/config/aarch64/iterators.md > > @@ -1432,6 +1432,11 @@ (define_mode_attr VCONQ [(V8QI "V16QI") > (V16QI "V16QI") > > (HI "V8HI") (QI "V16QI") > > (SF "V4SF") (DF "V2DF")]) > > > > +;; 128-bit container modes for the lower part of an SVE vector to the > > +inner or ;; scalar source mode. > > +(define_mode_attr VCONV [(SI "VNx4SI") (DI "VNx2DI")]) > > +(define_mode_attr vconv [(SI "vnx4si") (DI "vnx2di")]) > > + > > ;; Half modes of all vector modes. > > (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") > > (V4HI "V2HI") (V8HI "V4HI") > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > > index > > > a60cd31b9294af2dac69eed1c93f899bd5c78fca..fe9f27bf91b8fb18205a589 > 1a5d5 > > e847a5d88e4b 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > > @@ -7,8 +7,7 @@ > > > > /* > > ** f1: > > -** movi v[0-9]+.2s, 0x80, lsl 24 > > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > > +** orr z0.s, z0.s, #0x80000000 > > ** ret > > */ > > float32_t f1 (float32_t a) > > @@ -18,9 +17,7 @@ float32_t f1 (float32_t a) > > > > /* > > ** f2: > > -** mov x0, -9223372036854775808 > > -** fmov d[0-9]+, x0 > > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > > +** orr z0.d, z0.d, #0x8000000000000000 > > ** ret > > */ > > float64_t f2 (float64_t a) > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > > index > > > 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..707bcb0b6c53e212b55a > 255f500e > > 9e548e9ccd80 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > > @@ -6,9 +6,7 @@ > > > > /* > > ** negabs: > > -** mov x0, -9223372036854775808 > > -** fmov d[0-9]+, x0 > > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > > +** orr z0.d, z0.d, #0x8000000000000000 > > ** ret > > */ > > double negabs (double x) > > @@ -22,8 +20,7 @@ double negabs (double x) > > > > /* > > ** negabsf: > > -** movi v[0-9]+.2s, 0x80, lsl 24 > > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > > +** orr z0.s, z0.s, #0x80000000 > > ** ret > > */ > > float negabsf (float x)
Tamar Christina <Tamar.Christina@arm.com> writes: >> -----Original Message----- >> From: Richard Sandiford <richard.sandiford@arm.com> >> Sent: Wednesday, September 27, 2023 9:50 AM >> To: Tamar Christina <Tamar.Christina@arm.com> >> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw >> <Richard.Earnshaw@arm.com>; Marcus Shawcroft >> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> >> Subject: Re: [PATCH]AArch64: Use SVE unpredicated LOGICAL expressions >> when Advanced SIMD inefficient [PR109154] >> >> Tamar Christina <tamar.christina@arm.com> writes: >> > Hi All, >> > >> > SVE has much bigger immediate encoding range for bitmasks than >> > Advanced SIMD has and so on a system that is SVE capable if we need an >> > Advanced SIMD Inclusive-OR by immediate and would require a reload then >> an unpredicated SVE ORR instead. >> > >> > This has both speed and size improvements. >> > >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> > >> > Ok for master? >> > >> > Thanks, >> > Tamar >> > >> > gcc/ChangeLog: >> > >> > PR tree-optimization/109154 >> > * config/aarch64/aarch64.md (<optab><mode>3): Convert to new >> syntax and >> > SVE split case. >> > * config/aarch64/iterators.md (VCONV, vconv): New. >> > >> > gcc/testsuite/ChangeLog: >> > >> > PR tree-optimization/109154 >> > * gcc.target/aarch64/sve/fneg-abs_2.c: Updated. >> > * gcc.target/aarch64/sve/fneg-abs_4.c: Updated. >> > >> > --- inline copy of patch -- >> > diff --git a/gcc/config/aarch64/aarch64.md >> > b/gcc/config/aarch64/aarch64.md index >> > >> 60c92213c75a2a4c18a6b59ae52fe45d1e872718..377c5cafedd43d8d13204 >> 89a3626 >> > 7cc6e5f15239 100644 >> > --- a/gcc/config/aarch64/aarch64.md >> > +++ b/gcc/config/aarch64/aarch64.md >> > @@ -4551,17 +4551,27 @@ (define_insn_and_split >> "*aarch64_and<mode>_imm2" >> > } >> > ) >> > >> > -(define_insn "<optab><mode>3" >> > - [(set (match_operand:GPI 0 "register_operand" "=r,rk,w") >> > - (LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r,w") >> > - (match_operand:GPI 2 "aarch64_logical_operand" >> "r,<lconst>,w")))] >> > - "" >> > - "@ >> > - <logical>\\t%<w>0, %<w>1, %<w>2 >> > - <logical>\\t%<w>0, %<w>1, %2 >> > - <logical>\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" >> > - [(set_attr "type" "logic_reg,logic_imm,neon_logic") >> > - (set_attr "arch" "*,*,simd")] >> > +(define_insn_and_split "<optab><mode>3" >> > + [(set (match_operand:GPI 0 "register_operand") >> > + (LOGICAL:GPI (match_operand:GPI 1 "register_operand") >> > + (match_operand:GPI 2 "aarch64_logical_operand")))] >> > + "" >> > + {@ [cons: =0, 1, 2; attrs: type, arch] >> > + [r , %r, r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 >> > + [rk, r , <lconst>; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 >> > + [w , 0 , <lconst>; * , sve ] # >> > + [w , w , w ; neon_logic, simd] <logical>\t%0.<Vbtype>, %1.<Vbtype>, >> %2.<Vbtype> >> > + } >> > + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) >> > + && satisfies_constraint_<lconst> (operands[2]) >> > + && FP_REGNUM_P (REGNO (operands[0]))" >> > + [(const_int 0)] >> > + { >> > + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], <MODE>mode); >> > + rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]); >> > + emit_insn (gen_<optab><vconv>3 (op1, op1, op2)); >> > + DONE; >> > + } >> > ) >> >> The WIP SME patches add a %Z modifier for 'z' register prefixes, similarly to >> b/h/s/d for scalar FP. With that I think the alternative can be: >> >> [w , 0 , <lconst>; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 >> >> although it would be nice to keep the hex constant. > > My original patch added a %u for (undecorated) which just prints the register > number and changed %C to also accept a single constant instead of only a uniform vector. Not saying no to %u in future, but %Z seems more consistent with the current approach. And yeah, I'd also wondered about extending %C. The problem is guessing whether to print a 32-bit, 64-bit or 128-bit constant for negative immediates. Thanks, Richard > But I figured you wouldn't like that? 😊 > > Cheers, > Tamar > >> >> Will try to post the patches up to that part soon. >> >> Thanks, >> Richard >> >> > >> > ;; zero_extend version of above >> > diff --git a/gcc/config/aarch64/iterators.md >> > b/gcc/config/aarch64/iterators.md index >> > >> d17becc37e230684beaee3c69e2a0f0ce612eda5..568cd5d1a3a9e00475376 >> 177ad13 >> > de72609df3d8 100644 >> > --- a/gcc/config/aarch64/iterators.md >> > +++ b/gcc/config/aarch64/iterators.md >> > @@ -1432,6 +1432,11 @@ (define_mode_attr VCONQ [(V8QI "V16QI") >> (V16QI "V16QI") >> > (HI "V8HI") (QI "V16QI") >> > (SF "V4SF") (DF "V2DF")]) >> > >> > +;; 128-bit container modes for the lower part of an SVE vector to the >> > +inner or ;; scalar source mode. >> > +(define_mode_attr VCONV [(SI "VNx4SI") (DI "VNx2DI")]) >> > +(define_mode_attr vconv [(SI "vnx4si") (DI "vnx2di")]) >> > + >> > ;; Half modes of all vector modes. >> > (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") >> > (V4HI "V2HI") (V8HI "V4HI") >> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c >> > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c >> > index >> > >> a60cd31b9294af2dac69eed1c93f899bd5c78fca..fe9f27bf91b8fb18205a589 >> 1a5d5 >> > e847a5d88e4b 100644 >> > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c >> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c >> > @@ -7,8 +7,7 @@ >> > >> > /* >> > ** f1: >> > -** movi v[0-9]+.2s, 0x80, lsl 24 >> > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b >> > +** orr z0.s, z0.s, #0x80000000 >> > ** ret >> > */ >> > float32_t f1 (float32_t a) >> > @@ -18,9 +17,7 @@ float32_t f1 (float32_t a) >> > >> > /* >> > ** f2: >> > -** mov x0, -9223372036854775808 >> > -** fmov d[0-9]+, x0 >> > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b >> > +** orr z0.d, z0.d, #0x8000000000000000 >> > ** ret >> > */ >> > float64_t f2 (float64_t a) >> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c >> > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c >> > index >> > >> 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..707bcb0b6c53e212b55a >> 255f500e >> > 9e548e9ccd80 100644 >> > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c >> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c >> > @@ -6,9 +6,7 @@ >> > >> > /* >> > ** negabs: >> > -** mov x0, -9223372036854775808 >> > -** fmov d[0-9]+, x0 >> > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b >> > +** orr z0.d, z0.d, #0x8000000000000000 >> > ** ret >> > */ >> > double negabs (double x) >> > @@ -22,8 +20,7 @@ double negabs (double x) >> > >> > /* >> > ** negabsf: >> > -** movi v[0-9]+.2s, 0x80, lsl 24 >> > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b >> > +** orr z0.s, z0.s, #0x80000000 >> > ** ret >> > */ >> > float negabsf (float x)
> >> > >> The WIP SME patches add a %Z modifier for 'z' register prefixes, > >> similarly to b/h/s/d for scalar FP. With that I think the alternative can be: > >> > >> [w , 0 , <lconst>; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 > >> > >> although it would be nice to keep the hex constant. > > > > My original patch added a %u for (undecorated) which just prints the > > register number and changed %C to also accept a single constant instead of > only a uniform vector. > > Not saying no to %u in future, but %Z seems more consistent with the current > approach. And yeah, I'd also wondered about extending %C. > The problem is guessing whether to print a 32-bit, 64-bit or 128-bit constant > for negative immediates. > I know we're waiting for the %Z but I've updated the remainder of the series and for completeness and CI purposes I'm sending the updated patch before the change to use %Z. -- SVE has much bigger immediate encoding range for bitmasks than Advanced SIMD has and so on a system that is SVE capable if we need an Advanced SIMD Inclusive-OR by immediate and would require a reload then use an unpredicated SVE ORR instead. This has both speed and size improvements. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR tree-optimization/109154 * config/aarch64/aarch64.md (<optab><mode>3): Add SVE split case. * config/aarch64/aarch64-simd.md (ior<mode>3<vczle><vczbe>): Likewise. * config/aarch64/iterators.md (VCONV, vconv): New. * config/aarch64/predicates.md(aarch64_orr_imm_sve_advsimd): New. gcc/testsuite/ChangeLog: PR tree-optimization/109154 * gcc.target/aarch64/sve/fneg-abs_1.c: Updated. * gcc.target/aarch64/sve/fneg-abs_2.c: Updated. * gcc.target/aarch64/sve/fneg-abs_4.c: Updated. --- inline copy of patch --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 33eceb436584ff73c7271f93639f2246d1af19e0..25a1e4e8ecf767636c0ff3cdab6cad6e1482f73e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1216,14 +1216,29 @@ (define_insn "and<mode>3<vczle><vczbe>" ) ;; For ORR (vector, register) and ORR (vector, immediate) -(define_insn "ior<mode>3<vczle><vczbe>" +(define_insn_and_split "ior<mode>3<vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand") (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand") - (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))] + (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))] "TARGET_SIMD" - {@ [ cons: =0 , 1 , 2 ] - [ w , w , w ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> - [ w , 0 , Do ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_ORR); + {@ [ cons: =0 , 1 , 2; attrs: arch ] + [ w , w , w ; simd ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> + [ w , 0 , vsl; sve ] # + [ w , 0 , Do ; simd ] \ + << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \ + AARCH64_CHECK_ORR); + } + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) + && satisfies_constraint_vsl (operands[2]) + && FP_REGNUM_P (REGNO (operands[0]))" + [(const_int 0)] + { + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], <MODE>mode); + rtx op2 = + gen_const_vec_duplicate (<VCONV>mode, + unwrap_const_vec_duplicate (operands[2])); + emit_insn (gen_ior<vconv>3 (op1, op1, op2)); + DONE; } [(set_attr "type" "neon_logic<q>")] ) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 064d68ceb22533434468b22c4e5848e85a8c6eff..24349ecdbbab875f21975f116732a9e53762d4c1 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4545,7 +4545,7 @@ (define_insn_and_split "*aarch64_and<mode>_imm2" } ) -(define_insn "<optab><mode>3" +(define_insn_and_split "<optab><mode>3" [(set (match_operand:GPI 0 "register_operand") (LOGICAL:GPI (match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_logical_operand")))] @@ -4553,8 +4553,19 @@ (define_insn "<optab><mode>3" {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ] [ r , %r , r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 [ rk , r , <lconst> ; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 + [ w , 0 , <lconst> ; * , sve ] # [ w , w , w ; neon_logic , simd ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> } + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) + && satisfies_constraint_<lconst> (operands[2]) + && FP_REGNUM_P (REGNO (operands[0]))" + [(const_int 0)] + { + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], <MODE>mode); + rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]); + emit_insn (gen_<optab><vconv>3 (op1, op1, op2)); + DONE; + } ) ;; zero_extend version of above diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d17becc37e230684beaee3c69e2a0f0ce612eda5..5ec854a364e41b9827271ca6e870c8027336c7cd 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1432,6 +1432,19 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (HI "V8HI") (QI "V16QI") (SF "V4SF") (DF "V2DF")]) +;; 128-bit container modes for the lower part of an SVE vector to the inner or +;; neon source mode. +(define_mode_attr VCONV [(SI "VNx4SI") (DI "VNx2DI") + (V8QI "VNx16QI") (V16QI "VNx16QI") + (V4HI "VNx8HI") (V8HI "VNx8HI") + (V2SI "VNx4SI") (V4SI "VNx4SI") + (V2DI "VNx2DI")]) +(define_mode_attr vconv [(SI "vnx4si") (DI "vnx2di") + (V8QI "vnx16qi") (V16QI "vnx16qi") + (V4HI "vnx8hi") (V8HI "vnx8hi") + (V2SI "vnx4si") (V4SI "vnx4si") + (V2DI "vnx2di")]) + ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") (V4HI "V2HI") (V8HI "V4HI") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 01de47439744acb3708c645b98eaa607294a1f1f..a73724a7fc05636d4c0643a291f40f2609564778 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -871,6 +871,11 @@ (define_predicate "aarch64_sve_logical_operand" (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_sve_logical_immediate"))) +(define_predicate "aarch64_orr_imm_sve_advsimd" + (ior (match_operand 0 "aarch64_reg_or_orr_imm") + (and (match_test "TARGET_SVE") + (match_operand 0 "aarch64_sve_logical_operand")))) + (define_predicate "aarch64_sve_gather_offset_b" (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_sve_gather_immediate_b"))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c index 0c7664e6de77a497682952653ffd417453854d52..68e6ef0bdb234f26d0c3b055e777a9e1fb214c6d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c @@ -6,7 +6,7 @@ /* ** t1: -** orr v[0-9]+.2s, #128, lsl #24 +** orr z[0-9]+.s, z[0-9]+.s, #0x80000000 ** ret */ float32x2_t t1 (float32x2_t a) @@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a) /* ** t2: -** orr v[0-9]+.4s, #128, lsl #24 +** orr z[0-9]+.s, z[0-9]+.s, #0x80000000 ** ret */ float32x4_t t2 (float32x4_t a) @@ -26,9 +26,7 @@ float32x4_t t2 (float32x4_t a) /* ** t3: -** adrp x0, .LC[0-9]+ -** ldr q[0-9]+, \[x0, #:lo12:.LC0\] -** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** orr z[0-9]+.d, z[0-9]+.d, #0x8000000000000000 ** ret */ float64x2_t t3 (float64x2_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c index a60cd31b9294af2dac69eed1c93f899bd5c78fca..fe9f27bf91b8fb18205a5891a5d5e847a5d88e4b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -7,8 +7,7 @@ /* ** f1: -** movi v[0-9]+.2s, 0x80, lsl 24 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.s, z0.s, #0x80000000 ** ret */ float32_t f1 (float32_t a) @@ -18,9 +17,7 @@ float32_t f1 (float32_t a) /* ** f2: -** mov x0, -9223372036854775808 -** fmov d[0-9]+, x0 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.d, z0.d, #0x8000000000000000 ** ret */ float64_t f2 (float64_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c index 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..707bcb0b6c53e212b55a255f500e9e548e9ccd80 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c @@ -6,9 +6,7 @@ /* ** negabs: -** mov x0, -9223372036854775808 -** fmov d[0-9]+, x0 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.d, z0.d, #0x8000000000000000 ** ret */ double negabs (double x) @@ -22,8 +20,7 @@ double negabs (double x) /* ** negabsf: -** movi v[0-9]+.2s, 0x80, lsl 24 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.s, z0.s, #0x80000000 ** ret */ float negabsf (float x)
> >> > + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) > >> > + && satisfies_constraint_<lconst> (operands[2]) > >> > + && FP_REGNUM_P (REGNO (operands[0]))" > >> > + [(const_int 0)] > >> > + { > >> > + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], > <MODE>mode); > >> > + rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]); > >> > + emit_insn (gen_<optab><vconv>3 (op1, op1, op2)); > >> > + DONE; > >> > + } > >> > ) > >> > >> The WIP SME patches add a %Z modifier for 'z' register prefixes, > >> similarly to b/h/s/d for scalar FP. With that I think the alternative can be: > >> > >> [w , 0 , <lconst>; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 > >> > >> although it would be nice to keep the hex constant. > > > > My original patch added a %u for (undecorated) which just prints the > > register number and changed %C to also accept a single constant instead of > only a uniform vector. > > Not saying no to %u in future, but %Z seems more consistent with the current > approach. And yeah, I'd also wondered about extending %C. > The problem is guessing whether to print a 32-bit, 64-bit or 128-bit constant > for negative immediates. > Rebased patch, Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR tree-optimization/109154 * config/aarch64/aarch64.md (<optab><mode>3): Add SVE case. * config/aarch64/aarch64-simd.md (ior<mode>3<vczle><vczbe>): Likewise. * config/aarch64/iterators.md (VCONV, vconv): New. * config/aarch64/predicates.md(aarch64_orr_imm_sve_advsimd): New. gcc/testsuite/ChangeLog: PR tree-optimization/109154 * gcc.target/aarch64/sve/fneg-abs_1.c: Updated. * gcc.target/aarch64/sve/fneg-abs_2.c: Updated. * gcc.target/aarch64/sve/fneg-abs_4.c: Updated. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 33eceb436584ff73c7271f93639f2246d1af19e0..98c418c54a82a348c597310caa23916f9c16f9b6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1219,11 +1219,14 @@ (define_insn "and<mode>3<vczle><vczbe>" (define_insn "ior<mode>3<vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand") (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand") - (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))] - "TARGET_SIMD" - {@ [ cons: =0 , 1 , 2 ] - [ w , w , w ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> - [ w , 0 , Do ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_ORR); + (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))] + "TARGET_SIMD" + {@ [ cons: =0 , 1 , 2; attrs: arch ] + [ w , w , w ; simd ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> + [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2 + [ w , 0 , Do ; simd ] \ + << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \ + AARCH64_CHECK_ORR); } [(set_attr "type" "neon_logic<q>")] ) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 4fcd71a2e9d1e8c35f35593255c4f66a68856a79..c6b1506fe7b47dd40741f26ef0cc92692008a631 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4599,7 +4599,8 @@ (define_insn "<optab><mode>3" "" {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ] [ r , %r , r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 - [ rk , r , <lconst> ; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 + [ rk , ^r , <lconst> ; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 + [ w , 0 , <lconst> ; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 [ w , w , w ; neon_logic , simd ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> } ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 1593a8fd04f91259295d0e393cbc7973daf7bf73..d24109b4fe6a867125b9474d34d616155bc36b3f 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1435,6 +1435,19 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (HI "V8HI") (QI "V16QI") (SF "V4SF") (DF "V2DF")]) +;; 128-bit container modes for the lower part of an SVE vector to the inner or +;; neon source mode. +(define_mode_attr VCONV [(SI "VNx4SI") (DI "VNx2DI") + (V8QI "VNx16QI") (V16QI "VNx16QI") + (V4HI "VNx8HI") (V8HI "VNx8HI") + (V2SI "VNx4SI") (V4SI "VNx4SI") + (V2DI "VNx2DI")]) +(define_mode_attr vconv [(SI "vnx4si") (DI "vnx2di") + (V8QI "vnx16qi") (V16QI "vnx16qi") + (V4HI "vnx8hi") (V8HI "vnx8hi") + (V2SI "vnx4si") (V4SI "vnx4si") + (V2DI "vnx2di")]) + ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") (V4HI "V2HI") (V8HI "V4HI") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 01de47439744acb3708c645b98eaa607294a1f1f..a73724a7fc05636d4c0643a291f40f2609564778 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -871,6 +871,11 @@ (define_predicate "aarch64_sve_logical_operand" (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_sve_logical_immediate"))) +(define_predicate "aarch64_orr_imm_sve_advsimd" + (ior (match_operand 0 "aarch64_reg_or_orr_imm") + (and (match_test "TARGET_SVE") + (match_operand 0 "aarch64_sve_logical_operand")))) + (define_predicate "aarch64_sve_gather_offset_b" (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_sve_gather_immediate_b"))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c index 0c7664e6de77a497682952653ffd417453854d52..a8b27199ff83d0eebadfc7dcf03f94e1229d76b8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c @@ -6,7 +6,7 @@ /* ** t1: -** orr v[0-9]+.2s, #128, lsl #24 +** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 ** ret */ float32x2_t t1 (float32x2_t a) @@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a) /* ** t2: -** orr v[0-9]+.4s, #128, lsl #24 +** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 ** ret */ float32x4_t t2 (float32x4_t a) @@ -26,9 +26,7 @@ float32x4_t t2 (float32x4_t a) /* ** t3: -** adrp x0, .LC[0-9]+ -** ldr q[0-9]+, \[x0, #:lo12:.LC0\] -** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** orr z[0-9]+.d, z[0-9]+.d, #-9223372036854775808 ** ret */ float64x2_t t3 (float64x2_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c index a60cd31b9294af2dac69eed1c93f899bd5c78fca..19a7695e605bc8aced486a9c450d1cdc6be4691a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -7,8 +7,7 @@ /* ** f1: -** movi v[0-9]+.2s, 0x80, lsl 24 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.s, z0.s, #-2147483648 ** ret */ float32_t f1 (float32_t a) @@ -18,9 +17,7 @@ float32_t f1 (float32_t a) /* ** f2: -** mov x0, -9223372036854775808 -** fmov d[0-9]+, x0 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.d, z0.d, #-9223372036854775808 ** ret */ float64_t f2 (float64_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c index 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..663d5fe17e091d128313b6b8b8dc918a01a96c4f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c @@ -6,9 +6,7 @@ /* ** negabs: -** mov x0, -9223372036854775808 -** fmov d[0-9]+, x0 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.d, z0.d, #-9223372036854775808 ** ret */ double negabs (double x) @@ -22,8 +20,7 @@ double negabs (double x) /* ** negabsf: -** movi v[0-9]+.2s, 0x80, lsl 24 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.s, z0.s, #-2147483648 ** ret */ float negabsf (float x)
Tamar Christina <Tamar.Christina@arm.com> writes: >> >> > + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) >> >> > + && satisfies_constraint_<lconst> (operands[2]) >> >> > + && FP_REGNUM_P (REGNO (operands[0]))" >> >> > + [(const_int 0)] >> >> > + { >> >> > + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], >> <MODE>mode); >> >> > + rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]); >> >> > + emit_insn (gen_<optab><vconv>3 (op1, op1, op2)); >> >> > + DONE; >> >> > + } >> >> > ) >> >> >> >> The WIP SME patches add a %Z modifier for 'z' register prefixes, >> >> similarly to b/h/s/d for scalar FP. With that I think the alternative can be: >> >> >> >> [w , 0 , <lconst>; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 >> >> >> >> although it would be nice to keep the hex constant. >> > >> > My original patch added a %u for (undecorated) which just prints the >> > register number and changed %C to also accept a single constant instead of >> only a uniform vector. >> >> Not saying no to %u in future, but %Z seems more consistent with the current >> approach. And yeah, I'd also wondered about extending %C. >> The problem is guessing whether to print a 32-bit, 64-bit or 128-bit constant >> for negative immediates. >> > > Rebased patch, > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/109154 > * config/aarch64/aarch64.md (<optab><mode>3): Add SVE case. > * config/aarch64/aarch64-simd.md (ior<mode>3<vczle><vczbe>): Likewise. > * config/aarch64/iterators.md (VCONV, vconv): New. > * config/aarch64/predicates.md(aarch64_orr_imm_sve_advsimd): New. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/109154 > * gcc.target/aarch64/sve/fneg-abs_1.c: Updated. > * gcc.target/aarch64/sve/fneg-abs_2.c: Updated. > * gcc.target/aarch64/sve/fneg-abs_4.c: Updated. > > --- inline copy of patch -- > > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index 33eceb436584ff73c7271f93639f2246d1af19e0..98c418c54a82a348c597310caa23916f9c16f9b6 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -1219,11 +1219,14 @@ (define_insn "and<mode>3<vczle><vczbe>" > (define_insn "ior<mode>3<vczle><vczbe>" > [(set (match_operand:VDQ_I 0 "register_operand") > (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand") > - (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))] > - "TARGET_SIMD" > - {@ [ cons: =0 , 1 , 2 ] > - [ w , w , w ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> > - [ w , 0 , Do ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_ORR); > + (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))] > + "TARGET_SIMD" > + {@ [ cons: =0 , 1 , 2; attrs: arch ] > + [ w , w , w ; simd ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> > + [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2 > + [ w , 0 , Do ; simd ] \ > + << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \ > + AARCH64_CHECK_ORR); > } > [(set_attr "type" "neon_logic<q>")] > ) > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 4fcd71a2e9d1e8c35f35593255c4f66a68856a79..c6b1506fe7b47dd40741f26ef0cc92692008a631 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4599,7 +4599,8 @@ (define_insn "<optab><mode>3" > "" > {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ] > [ r , %r , r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 > - [ rk , r , <lconst> ; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 > + [ rk , ^r , <lconst> ; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 > + [ w , 0 , <lconst> ; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 > [ w , w , w ; neon_logic , simd ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> > } > ) > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 1593a8fd04f91259295d0e393cbc7973daf7bf73..d24109b4fe6a867125b9474d34d616155bc36b3f 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -1435,6 +1435,19 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") > (HI "V8HI") (QI "V16QI") > (SF "V4SF") (DF "V2DF")]) > > +;; 128-bit container modes for the lower part of an SVE vector to the inner or > +;; neon source mode. > +(define_mode_attr VCONV [(SI "VNx4SI") (DI "VNx2DI") > + (V8QI "VNx16QI") (V16QI "VNx16QI") > + (V4HI "VNx8HI") (V8HI "VNx8HI") > + (V2SI "VNx4SI") (V4SI "VNx4SI") > + (V2DI "VNx2DI")]) > +(define_mode_attr vconv [(SI "vnx4si") (DI "vnx2di") > + (V8QI "vnx16qi") (V16QI "vnx16qi") > + (V4HI "vnx8hi") (V8HI "vnx8hi") > + (V2SI "vnx4si") (V4SI "vnx4si") > + (V2DI "vnx2di")]) > + > ;; Half modes of all vector modes. > (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") > (V4HI "V2HI") (V8HI "V4HI") These attributes arne't needed any more (at least, not by this patch). OK for trunk with those removed. Thanks, Richard > diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md > index 01de47439744acb3708c645b98eaa607294a1f1f..a73724a7fc05636d4c0643a291f40f2609564778 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -871,6 +871,11 @@ (define_predicate "aarch64_sve_logical_operand" > (ior (match_operand 0 "register_operand") > (match_operand 0 "aarch64_sve_logical_immediate"))) > > +(define_predicate "aarch64_orr_imm_sve_advsimd" > + (ior (match_operand 0 "aarch64_reg_or_orr_imm") > + (and (match_test "TARGET_SVE") > + (match_operand 0 "aarch64_sve_logical_operand")))) > + > (define_predicate "aarch64_sve_gather_offset_b" > (ior (match_operand 0 "register_operand") > (match_operand 0 "aarch64_sve_gather_immediate_b"))) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c > index 0c7664e6de77a497682952653ffd417453854d52..a8b27199ff83d0eebadfc7dcf03f94e1229d76b8 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c > @@ -6,7 +6,7 @@ > > /* > ** t1: > -** orr v[0-9]+.2s, #128, lsl #24 > +** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 > ** ret > */ > float32x2_t t1 (float32x2_t a) > @@ -16,7 +16,7 @@ float32x2_t t1 (float32x2_t a) > > /* > ** t2: > -** orr v[0-9]+.4s, #128, lsl #24 > +** orr z[0-9]+.s, z[0-9]+.s, #-2147483648 > ** ret > */ > float32x4_t t2 (float32x4_t a) > @@ -26,9 +26,7 @@ float32x4_t t2 (float32x4_t a) > > /* > ** t3: > -** adrp x0, .LC[0-9]+ > -** ldr q[0-9]+, \[x0, #:lo12:.LC0\] > -** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b > +** orr z[0-9]+.d, z[0-9]+.d, #-9223372036854775808 > ** ret > */ > float64x2_t t3 (float64x2_t a) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > index a60cd31b9294af2dac69eed1c93f899bd5c78fca..19a7695e605bc8aced486a9c450d1cdc6be4691a 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c > @@ -7,8 +7,7 @@ > > /* > ** f1: > -** movi v[0-9]+.2s, 0x80, lsl 24 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.s, z0.s, #-2147483648 > ** ret > */ > float32_t f1 (float32_t a) > @@ -18,9 +17,7 @@ float32_t f1 (float32_t a) > > /* > ** f2: > -** mov x0, -9223372036854775808 > -** fmov d[0-9]+, x0 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.d, z0.d, #-9223372036854775808 > ** ret > */ > float64_t f2 (float64_t a) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > index 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..663d5fe17e091d128313b6b8b8dc918a01a96c4f 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c > @@ -6,9 +6,7 @@ > > /* > ** negabs: > -** mov x0, -9223372036854775808 > -** fmov d[0-9]+, x0 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.d, z0.d, #-9223372036854775808 > ** ret > */ > double negabs (double x) > @@ -22,8 +20,7 @@ double negabs (double x) > > /* > ** negabsf: > -** movi v[0-9]+.2s, 0x80, lsl 24 > -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b > +** orr z0.s, z0.s, #-2147483648 > ** ret > */ > float negabsf (float x)
--- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4551,17 +4551,27 @@ (define_insn_and_split "*aarch64_and<mode>_imm2" } ) -(define_insn "<optab><mode>3" - [(set (match_operand:GPI 0 "register_operand" "=r,rk,w") - (LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r,w") - (match_operand:GPI 2 "aarch64_logical_operand" "r,<lconst>,w")))] - "" - "@ - <logical>\\t%<w>0, %<w>1, %<w>2 - <logical>\\t%<w>0, %<w>1, %2 - <logical>\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" - [(set_attr "type" "logic_reg,logic_imm,neon_logic") - (set_attr "arch" "*,*,simd")] +(define_insn_and_split "<optab><mode>3" + [(set (match_operand:GPI 0 "register_operand") + (LOGICAL:GPI (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "aarch64_logical_operand")))] + "" + {@ [cons: =0, 1, 2; attrs: type, arch] + [r , %r, r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 + [rk, r , <lconst>; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 + [w , 0 , <lconst>; * , sve ] # + [w , w , w ; neon_logic, simd] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> + } + "&& TARGET_SVE && rtx_equal_p (operands[0], operands[1]) + && satisfies_constraint_<lconst> (operands[2]) + && FP_REGNUM_P (REGNO (operands[0]))" + [(const_int 0)] + { + rtx op1 = lowpart_subreg (<VCONV>mode, operands[1], <MODE>mode); + rtx op2 = gen_const_vec_duplicate (<VCONV>mode, operands[2]); + emit_insn (gen_<optab><vconv>3 (op1, op1, op2)); + DONE; + } ) ;; zero_extend version of above diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d17becc37e230684beaee3c69e2a0f0ce612eda5..568cd5d1a3a9e00475376177ad13de72609df3d8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1432,6 +1432,11 @@ (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (HI "V8HI") (QI "V16QI") (SF "V4SF") (DF "V2DF")]) +;; 128-bit container modes for the lower part of an SVE vector to the inner or +;; scalar source mode. +(define_mode_attr VCONV [(SI "VNx4SI") (DI "VNx2DI")]) +(define_mode_attr vconv [(SI "vnx4si") (DI "vnx2di")]) + ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") (V4HI "V2HI") (V8HI "V4HI") diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c index a60cd31b9294af2dac69eed1c93f899bd5c78fca..fe9f27bf91b8fb18205a5891a5d5e847a5d88e4b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -7,8 +7,7 @@ /* ** f1: -** movi v[0-9]+.2s, 0x80, lsl 24 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.s, z0.s, #0x80000000 ** ret */ float32_t f1 (float32_t a) @@ -18,9 +17,7 @@ float32_t f1 (float32_t a) /* ** f2: -** mov x0, -9223372036854775808 -** fmov d[0-9]+, x0 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.d, z0.d, #0x8000000000000000 ** ret */ float64_t f2 (float64_t a) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c index 21f2a8da2a5d44e3d01f6604ca7be87e3744d494..707bcb0b6c53e212b55a255f500e9e548e9ccd80 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c @@ -6,9 +6,7 @@ /* ** negabs: -** mov x0, -9223372036854775808 -** fmov d[0-9]+, x0 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.d, z0.d, #0x8000000000000000 ** ret */ double negabs (double x) @@ -22,8 +20,7 @@ double negabs (double x) /* ** negabsf: -** movi v[0-9]+.2s, 0x80, lsl 24 -** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** orr z0.s, z0.s, #0x80000000 ** ret */ float negabsf (float x)