Message ID | 20211217154251.GA4694@arm.com |
---|---|
State | New |
Headers | show |
Series | [1/3] middle-end vect: Simplify and extend the complex numbers validation routines. | expand |
Tamar Christina <tamar.christina@arm.com> writes: > Hi All, > > After the first patch in the series this updates the optabs to expect the > canonical sequence. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? and backport along with the first patch? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/102819 > PR tree-optimization/103169 > * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4, > cmul<conj_op><mode>3): Use canonical order. > * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, > cmul<conj_op><mode>3): Likewise. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9cfb5649f9b0e73 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" > ;; remainder. Because of this, expand early. > (define_expand "cml<fcmac1><conj_op><mode>4" > [(set (match_operand:VHSDF 0 "register_operand") > - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") > - (match_operand:VHSDF 3 "register_operand")] > - FCMLA_OP)))] > + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") > + (match_operand:VHSDF 2 "register_operand")] > + FCMLA_OP) > + (match_operand:VHSDF 3 "register_operand")))] > "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > { > rtx tmp = gen_reg_rtx (<MODE>mode); > - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > - operands[3], operands[2])); > + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > + operands[1], operands[2])); > emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > - operands[3], operands[2])); > + operands[1], operands[2])); > DONE; > }) > > @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" > rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); > rtx res1 = gen_reg_rtx (<MODE>mode); > emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, > - operands[2], operands[1])); > + operands[1], operands[2])); > emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, > - operands[2], operands[1])); > + operands[1], operands[2])); This doesn't look right. Going from the documentation, patch 1 isn't changing the operand order for CMUL: the conjugated operand (if there is one) is still operand 2. The FCMLA sequences use the opposite order, where the conjugated operand (if there is one) is operand 1. So I think the reversal here is still needed. Same for the multiplication operands in CML* above. Thanks, Richard
Richard Sandiford <richard.sandiford@arm.com> writes: > Tamar Christina <tamar.christina@arm.com> writes: >> Hi All, >> >> After the first patch in the series this updates the optabs to expect the >> canonical sequence. >> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> >> Ok for master? and backport along with the first patch? >> >> Thanks, >> Tamar >> >> gcc/ChangeLog: >> >> PR tree-optimization/102819 >> PR tree-optimization/103169 >> * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4, >> cmul<conj_op><mode>3): Use canonical order. >> * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, >> cmul<conj_op><mode>3): Likewise. >> >> --- inline copy of patch -- >> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md >> index f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9cfb5649f9b0e73 100644 >> --- a/gcc/config/aarch64/aarch64-simd.md >> +++ b/gcc/config/aarch64/aarch64-simd.md >> @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" >> ;; remainder. Because of this, expand early. >> (define_expand "cml<fcmac1><conj_op><mode>4" >> [(set (match_operand:VHSDF 0 "register_operand") >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") >> - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") >> - (match_operand:VHSDF 3 "register_operand")] >> - FCMLA_OP)))] >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") >> + (match_operand:VHSDF 2 "register_operand")] >> + FCMLA_OP) >> + (match_operand:VHSDF 3 "register_operand")))] >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" >> { >> rtx tmp = gen_reg_rtx (<MODE>mode); >> - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], >> - operands[3], operands[2])); >> + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], >> + operands[1], operands[2])); >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, >> - operands[3], operands[2])); >> + operands[1], operands[2])); >> DONE; >> }) >> >> @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" >> rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); >> rtx res1 = gen_reg_rtx (<MODE>mode); >> emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, >> - operands[2], operands[1])); >> + operands[1], operands[2])); >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, >> - operands[2], operands[1])); >> + operands[1], operands[2])); > > This doesn't look right. Going from the documentation, patch 1 isn't > changing the operand order for CMUL: the conjugated operand (if there > is one) is still operand 2. The FCMLA sequences use the opposite order, > where the conjugated operand (if there is one) is operand 1. So I think I meant “the first multiplication operand” rather than “operand 1” here. > the reversal here is still needed. > > Same for the multiplication operands in CML* above. > > Thanks, > Richard
> -----Original Message----- > From: Richard Sandiford <richard.sandiford@arm.com> > Sent: Friday, December 17, 2021 4:49 PM > To: Tamar Christina <Tamar.Christina@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex mul, > fma and fms > > Richard Sandiford <richard.sandiford@arm.com> writes: > > Tamar Christina <tamar.christina@arm.com> writes: > >> Hi All, > >> > >> After the first patch in the series this updates the optabs to expect > >> the canonical sequence. > >> > >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > >> > >> Ok for master? and backport along with the first patch? > >> > >> Thanks, > >> Tamar > >> > >> gcc/ChangeLog: > >> > >> PR tree-optimization/102819 > >> PR tree-optimization/103169 > >> * config/aarch64/aarch64-simd.md > (cml<fcmac1><conj_op><mode>4, > >> cmul<conj_op><mode>3): Use canonical order. > >> * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, > >> cmul<conj_op><mode>3): Likewise. > >> > >> --- inline copy of patch -- > >> diff --git a/gcc/config/aarch64/aarch64-simd.md > >> b/gcc/config/aarch64/aarch64-simd.md > >> index > >> > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9 > c > >> fb5649f9b0e73 100644 > >> --- a/gcc/config/aarch64/aarch64-simd.md > >> +++ b/gcc/config/aarch64/aarch64-simd.md > >> @@ -556,17 +556,17 @@ (define_insn > "aarch64_fcmlaq_lane<rot><mode>" > >> ;; remainder. Because of this, expand early. > >> (define_expand "cml<fcmac1><conj_op><mode>4" > >> [(set (match_operand:VHSDF 0 "register_operand") > >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > >> - (unspec:VHSDF [(match_operand:VHSDF 2 > "register_operand") > >> - (match_operand:VHSDF 3 > "register_operand")] > >> - FCMLA_OP)))] > >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > "register_operand") > >> + (match_operand:VHSDF 2 > "register_operand")] > >> + FCMLA_OP) > >> + (match_operand:VHSDF 3 "register_operand")))] > >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > >> { > >> rtx tmp = gen_reg_rtx (<MODE>mode); > >> - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > >> - operands[3], operands[2])); > >> + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > >> + operands[1], operands[2])); > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > >> - operands[3], operands[2])); > >> + operands[1], operands[2])); > >> DONE; > >> }) > >> > >> @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" > >> rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); > >> rtx res1 = gen_reg_rtx (<MODE>mode); > >> emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, > >> - operands[2], operands[1])); > >> + operands[1], operands[2])); > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, > >> - operands[2], operands[1])); > >> + operands[1], operands[2])); > > > > This doesn't look right. Going from the documentation, patch 1 isn't > > changing the operand order for CMUL: the conjugated operand (if there > > is one) is still operand 2. The FCMLA sequences use the opposite > > order, where the conjugated operand (if there is one) is operand 1. > > So I think > > I meant “the first multiplication operand” rather than “operand 1” here. > > > the reversal here is still needed. > > > > Same for the multiplication operands in CML* above. I did actually change the order in patch 1, but didn't update the docs.. That was done because I followed the SLP order again, but now I've updated them to do what the docs say. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? and backport along with the first patch? Thanks, Tamar gcc/ChangeLog: PR tree-optimization/102819 PR tree-optimization/103169 * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4): Use canonical order. * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4): Likewise. --- inline copy of patch --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1e5731b14cab59ce 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" ;; remainder. Because of this, expand early. (define_expand "cml<fcmac1><conj_op><mode>4" [(set (match_operand:VHSDF 0 "register_operand") - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") - (match_operand:VHSDF 3 "register_operand")] - FCMLA_OP)))] + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + FCMLA_OP) + (match_operand:VHSDF 3 "register_operand")))] "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], - operands[3], operands[2])); + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], + operands[2], operands[1])); emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, - operands[3], operands[2])); + operands[2], operands[1])); DONE; }) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 9ef968840c20a3049901b3f8a919cf27ded1da3e..9ed19017c480b88779e9e3b08c0e031be60a8c12 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7278,11 +7278,11 @@ (define_expand "cml<fcmac1><conj_op><mode>4" rtx tmp = gen_reg_rtx (<MODE>mode); emit_insn (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], - operands[3], operands[2], - operands[1], operands[5])); + operands[2], operands[1], + operands[3], operands[5])); emit_insn (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], - operands[3], operands[2], + operands[2], operands[1], tmp, operands[5])); DONE; })
ping > -----Original Message----- > From: Tamar Christina > Sent: Monday, December 20, 2021 4:21 PM > To: Richard Sandiford <richard.sandiford@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: RE: [2/3 PATCH]AArch64 use canonical ordering for complex > mul, fma and fms > > > > > -----Original Message----- > > From: Richard Sandiford <richard.sandiford@arm.com> > > Sent: Friday, December 17, 2021 4:49 PM > > To: Tamar Christina <Tamar.Christina@arm.com> > > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov > <Kyrylo.Tkachov@arm.com> > > Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex > > mul, fma and fms > > > > Richard Sandiford <richard.sandiford@arm.com> writes: > > > Tamar Christina <tamar.christina@arm.com> writes: > > >> Hi All, > > >> > > >> After the first patch in the series this updates the optabs to > > >> expect the canonical sequence. > > >> > > >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > >> > > >> Ok for master? and backport along with the first patch? > > >> > > >> Thanks, > > >> Tamar > > >> > > >> gcc/ChangeLog: > > >> > > >> PR tree-optimization/102819 > > >> PR tree-optimization/103169 > > >> * config/aarch64/aarch64-simd.md > > (cml<fcmac1><conj_op><mode>4, > > >> cmul<conj_op><mode>3): Use canonical order. > > >> * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, > > >> cmul<conj_op><mode>3): Likewise. > > >> > > >> --- inline copy of patch -- > > >> diff --git a/gcc/config/aarch64/aarch64-simd.md > > >> b/gcc/config/aarch64/aarch64-simd.md > > >> index > > >> > > > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9 > > c > > >> fb5649f9b0e73 100644 > > >> --- a/gcc/config/aarch64/aarch64-simd.md > > >> +++ b/gcc/config/aarch64/aarch64-simd.md > > >> @@ -556,17 +556,17 @@ (define_insn > > "aarch64_fcmlaq_lane<rot><mode>" > > >> ;; remainder. Because of this, expand early. > > >> (define_expand "cml<fcmac1><conj_op><mode>4" > > >> [(set (match_operand:VHSDF 0 "register_operand") > > >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > > >> - (unspec:VHSDF [(match_operand:VHSDF 2 > > "register_operand") > > >> - (match_operand:VHSDF 3 > > "register_operand")] > > >> - FCMLA_OP)))] > > >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > > "register_operand") > > >> + (match_operand:VHSDF 2 > > "register_operand")] > > >> + FCMLA_OP) > > >> + (match_operand:VHSDF 3 "register_operand")))] > > >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > > >> { > > >> rtx tmp = gen_reg_rtx (<MODE>mode); > > >> - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > > >> - operands[3], operands[2])); > > >> + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > > >> + operands[1], operands[2])); > > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > > >> - operands[3], operands[2])); > > >> + operands[1], operands[2])); > > >> DONE; > > >> }) > > >> > > >> @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" > > >> rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); > > >> rtx res1 = gen_reg_rtx (<MODE>mode); > > >> emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, > > >> - operands[2], operands[1])); > > >> + operands[1], operands[2])); > > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, > > >> - operands[2], operands[1])); > > >> + operands[1], operands[2])); > > > > > > This doesn't look right. Going from the documentation, patch 1 > > > isn't changing the operand order for CMUL: the conjugated operand > > > (if there is one) is still operand 2. The FCMLA sequences use the > > > opposite order, where the conjugated operand (if there is one) is > operand 1. > > > So I think > > > > I meant “the first multiplication operand” rather than “operand 1” here. > > > > > the reversal here is still needed. > > > > > > Same for the multiplication operands in CML* above. > > I did actually change the order in patch 1, but didn't update the docs.. > That was done because I followed the SLP order again, but now I've > updated them to do what the docs say. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? and backport along with the first patch? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/102819 > PR tree-optimization/103169 > * config/aarch64/aarch64-simd.md > (cml<fcmac1><conj_op><mode>4): Use > canonical order. > * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4): > Likewise. > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1 > e5731b14cab59ce 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" > ;; remainder. Because of this, expand early. > (define_expand "cml<fcmac1><conj_op><mode>4" > [(set (match_operand:VHSDF 0 "register_operand") > - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > - (unspec:VHSDF [(match_operand:VHSDF 2 > "register_operand") > - (match_operand:VHSDF 3 > "register_operand")] > - FCMLA_OP)))] > + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > "register_operand") > + (match_operand:VHSDF 2 > "register_operand")] > + FCMLA_OP) > + (match_operand:VHSDF 3 "register_operand")))] > "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > { > rtx tmp = gen_reg_rtx (<MODE>mode); > - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > - operands[3], operands[2])); > + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > + operands[2], operands[1])); > emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > - operands[3], operands[2])); > + operands[2], operands[1])); > DONE; > }) > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > 9ef968840c20a3049901b3f8a919cf27ded1da3e..9ed19017c480b88779e9e3b08 > c0e031be60a8c12 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -7278,11 +7278,11 @@ (define_expand "cml<fcmac1><conj_op><mode>4" > rtx tmp = gen_reg_rtx (<MODE>mode); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], > - operands[3], operands[2], > - operands[1], operands[5])); > + operands[2], operands[1], > + operands[3], operands[5])); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], > - operands[3], operands[2], > + operands[2], operands[1], > tmp, operands[5])); > DONE; > })
Ping x3. > -----Original Message----- > From: Tamar Christina > Sent: Tuesday, January 11, 2022 7:11 AM > To: Richard Sandiford <richard.sandiford@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: RE: [2/3 PATCH]AArch64 use canonical ordering for complex mul, > fma and fms > > ping > > > -----Original Message----- > > From: Tamar Christina > > Sent: Monday, December 20, 2021 4:21 PM > > To: Richard Sandiford <richard.sandiford@arm.com> > > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov > <Kyrylo.Tkachov@arm.com> > > Subject: RE: [2/3 PATCH]AArch64 use canonical ordering for complex > > mul, fma and fms > > > > > > > > > -----Original Message----- > > > From: Richard Sandiford <richard.sandiford@arm.com> > > > Sent: Friday, December 17, 2021 4:49 PM > > > To: Tamar Christina <Tamar.Christina@arm.com> > > > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > > > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > > > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov > > <Kyrylo.Tkachov@arm.com> > > > Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex > > > mul, fma and fms > > > > > > Richard Sandiford <richard.sandiford@arm.com> writes: > > > > Tamar Christina <tamar.christina@arm.com> writes: > > > >> Hi All, > > > >> > > > >> After the first patch in the series this updates the optabs to > > > >> expect the canonical sequence. > > > >> > > > >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > >> > > > >> Ok for master? and backport along with the first patch? > > > >> > > > >> Thanks, > > > >> Tamar > > > >> > > > >> gcc/ChangeLog: > > > >> > > > >> PR tree-optimization/102819 > > > >> PR tree-optimization/103169 > > > >> * config/aarch64/aarch64-simd.md > > > (cml<fcmac1><conj_op><mode>4, > > > >> cmul<conj_op><mode>3): Use canonical order. > > > >> * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, > > > >> cmul<conj_op><mode>3): Likewise. > > > >> > > > >> --- inline copy of patch -- > > > >> diff --git a/gcc/config/aarch64/aarch64-simd.md > > > >> b/gcc/config/aarch64/aarch64-simd.md > > > >> index > > > >> > > > > > > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9 > > > c > > > >> fb5649f9b0e73 100644 > > > >> --- a/gcc/config/aarch64/aarch64-simd.md > > > >> +++ b/gcc/config/aarch64/aarch64-simd.md > > > >> @@ -556,17 +556,17 @@ (define_insn > > > "aarch64_fcmlaq_lane<rot><mode>" > > > >> ;; remainder. Because of this, expand early. > > > >> (define_expand "cml<fcmac1><conj_op><mode>4" > > > >> [(set (match_operand:VHSDF 0 "register_operand") > > > >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > > > >> - (unspec:VHSDF [(match_operand:VHSDF 2 > > > "register_operand") > > > >> - (match_operand:VHSDF 3 > > > "register_operand")] > > > >> - FCMLA_OP)))] > > > >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > > > "register_operand") > > > >> + (match_operand:VHSDF 2 > > > "register_operand")] > > > >> + FCMLA_OP) > > > >> + (match_operand:VHSDF 3 "register_operand")))] > > > >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > > > >> { > > > >> rtx tmp = gen_reg_rtx (<MODE>mode); > > > >> - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, > operands[1], > > > >> - operands[3], operands[2])); > > > >> + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, > operands[3], > > > >> + operands[1], > operands[2])); > > > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], > tmp, > > > >> - operands[3], operands[2])); > > > >> + operands[1], > operands[2])); > > > >> DONE; > > > >> }) > > > >> > > > >> @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" > > > >> rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); > > > >> rtx res1 = gen_reg_rtx (<MODE>mode); > > > >> emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, > > > >> - operands[2], operands[1])); > > > >> + operands[1], > operands[2])); > > > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], > res1, > > > >> - operands[2], operands[1])); > > > >> + operands[1], > operands[2])); > > > > > > > > This doesn't look right. Going from the documentation, patch 1 > > > > isn't changing the operand order for CMUL: the conjugated operand > > > > (if there is one) is still operand 2. The FCMLA sequences use the > > > > opposite order, where the conjugated operand (if there is one) is > > operand 1. > > > > So I think > > > > > > I meant “the first multiplication operand” rather than “operand 1” here. > > > > > > > the reversal here is still needed. > > > > > > > > Same for the multiplication operands in CML* above. > > > > I did actually change the order in patch 1, but didn't update the docs.. > > That was done because I followed the SLP order again, but now I've > > updated them to do what the docs say. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? and backport along with the first patch? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > PR tree-optimization/102819 > > PR tree-optimization/103169 > > * config/aarch64/aarch64-simd.md > > (cml<fcmac1><conj_op><mode>4): Use > > canonical order. > > * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4): > > Likewise. > > > > --- inline copy of patch --- > > > > diff --git a/gcc/config/aarch64/aarch64-simd.md > > b/gcc/config/aarch64/aarch64-simd.md > > index > > > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1 > > e5731b14cab59ce 100644 > > --- a/gcc/config/aarch64/aarch64-simd.md > > +++ b/gcc/config/aarch64/aarch64-simd.md > > @@ -556,17 +556,17 @@ (define_insn > "aarch64_fcmlaq_lane<rot><mode>" > > ;; remainder. Because of this, expand early. > > (define_expand "cml<fcmac1><conj_op><mode>4" > > [(set (match_operand:VHSDF 0 "register_operand") > > - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > > - (unspec:VHSDF [(match_operand:VHSDF 2 > > "register_operand") > > - (match_operand:VHSDF 3 > > "register_operand")] > > - FCMLA_OP)))] > > + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > > "register_operand") > > + (match_operand:VHSDF 2 > > "register_operand")] > > + FCMLA_OP) > > + (match_operand:VHSDF 3 "register_operand")))] > > "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > > { > > rtx tmp = gen_reg_rtx (<MODE>mode); > > - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > > - operands[3], operands[2])); > > + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > > + operands[2], operands[1])); > > emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > > - operands[3], operands[2])); > > + operands[2], operands[1])); > > DONE; > > }) > > > > diff --git a/gcc/config/aarch64/aarch64-sve.md > > b/gcc/config/aarch64/aarch64-sve.md > > index > > > 9ef968840c20a3049901b3f8a919cf27ded1da3e..9ed19017c480b88779e9e3b08 > > c0e031be60a8c12 100644 > > --- a/gcc/config/aarch64/aarch64-sve.md > > +++ b/gcc/config/aarch64/aarch64-sve.md > > @@ -7278,11 +7278,11 @@ (define_expand > "cml<fcmac1><conj_op><mode>4" > > rtx tmp = gen_reg_rtx (<MODE>mode); > > emit_insn > > (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], > > - operands[3], operands[2], > > - operands[1], operands[5])); > > + operands[2], operands[1], > > + operands[3], operands[5])); > > emit_insn > > (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], > operands[4], > > - operands[3], operands[2], > > + operands[2], operands[1], > > tmp, operands[5])); > > DONE; > > })
Tamar Christina <Tamar.Christina@arm.com> writes: >> -----Original Message----- >> From: Richard Sandiford <richard.sandiford@arm.com> >> Sent: Friday, December 17, 2021 4:49 PM >> To: Tamar Christina <Tamar.Christina@arm.com> >> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw >> <Richard.Earnshaw@arm.com>; Marcus Shawcroft >> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> >> Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex mul, >> fma and fms >> >> Richard Sandiford <richard.sandiford@arm.com> writes: >> > Tamar Christina <tamar.christina@arm.com> writes: >> >> Hi All, >> >> >> >> After the first patch in the series this updates the optabs to expect >> >> the canonical sequence. >> >> >> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> >> >> >> Ok for master? and backport along with the first patch? >> >> >> >> Thanks, >> >> Tamar >> >> >> >> gcc/ChangeLog: >> >> >> >> PR tree-optimization/102819 >> >> PR tree-optimization/103169 >> >> * config/aarch64/aarch64-simd.md >> (cml<fcmac1><conj_op><mode>4, >> >> cmul<conj_op><mode>3): Use canonical order. >> >> * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, >> >> cmul<conj_op><mode>3): Likewise. >> >> >> >> --- inline copy of patch -- >> >> diff --git a/gcc/config/aarch64/aarch64-simd.md >> >> b/gcc/config/aarch64/aarch64-simd.md >> >> index >> >> >> f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9 >> c >> >> fb5649f9b0e73 100644 >> >> --- a/gcc/config/aarch64/aarch64-simd.md >> >> +++ b/gcc/config/aarch64/aarch64-simd.md >> >> @@ -556,17 +556,17 @@ (define_insn >> "aarch64_fcmlaq_lane<rot><mode>" >> >> ;; remainder. Because of this, expand early. >> >> (define_expand "cml<fcmac1><conj_op><mode>4" >> >> [(set (match_operand:VHSDF 0 "register_operand") >> >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") >> >> - (unspec:VHSDF [(match_operand:VHSDF 2 >> "register_operand") >> >> - (match_operand:VHSDF 3 >> "register_operand")] >> >> - FCMLA_OP)))] >> >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 >> "register_operand") >> >> + (match_operand:VHSDF 2 >> "register_operand")] >> >> + FCMLA_OP) >> >> + (match_operand:VHSDF 3 "register_operand")))] >> >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" >> >> { >> >> rtx tmp = gen_reg_rtx (<MODE>mode); >> >> - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], >> >> - operands[3], operands[2])); >> >> + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], >> >> + operands[1], operands[2])); >> >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, >> >> - operands[3], operands[2])); >> >> + operands[1], operands[2])); >> >> DONE; >> >> }) >> >> >> >> @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" >> >> rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); >> >> rtx res1 = gen_reg_rtx (<MODE>mode); >> >> emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, >> >> - operands[2], operands[1])); >> >> + operands[1], operands[2])); >> >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, >> >> - operands[2], operands[1])); >> >> + operands[1], operands[2])); >> > >> > This doesn't look right. Going from the documentation, patch 1 isn't >> > changing the operand order for CMUL: the conjugated operand (if there >> > is one) is still operand 2. The FCMLA sequences use the opposite >> > order, where the conjugated operand (if there is one) is operand 1. >> > So I think >> >> I meant “the first multiplication operand” rather than “operand 1” here. >> >> > the reversal here is still needed. >> > >> > Same for the multiplication operands in CML* above. > > I did actually change the order in patch 1, but didn't update the docs.. > That was done because I followed the SLP order again, but now I've updated > them to do what the docs say. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? and backport along with the first patch? OK, thanks. Richard > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/102819 > PR tree-optimization/103169 > * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4): Use > canonical order. > * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4): Likewise. > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1e5731b14cab59ce 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" > ;; remainder. Because of this, expand early. > (define_expand "cml<fcmac1><conj_op><mode>4" > [(set (match_operand:VHSDF 0 "register_operand") > - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") > - (match_operand:VHSDF 3 "register_operand")] > - FCMLA_OP)))] > + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") > + (match_operand:VHSDF 2 "register_operand")] > + FCMLA_OP) > + (match_operand:VHSDF 3 "register_operand")))] > "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > { > rtx tmp = gen_reg_rtx (<MODE>mode); > - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > - operands[3], operands[2])); > + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > + operands[2], operands[1])); > emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > - operands[3], operands[2])); > + operands[2], operands[1])); > DONE; > }) > > diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md > index 9ef968840c20a3049901b3f8a919cf27ded1da3e..9ed19017c480b88779e9e3b08c0e031be60a8c12 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -7278,11 +7278,11 @@ (define_expand "cml<fcmac1><conj_op><mode>4" > rtx tmp = gen_reg_rtx (<MODE>mode); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], > - operands[3], operands[2], > - operands[1], operands[5])); > + operands[2], operands[1], > + operands[3], operands[5])); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], > - operands[3], operands[2], > + operands[2], operands[1], > tmp, operands[5])); > DONE; > })
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9cfb5649f9b0e73 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" ;; remainder. Because of this, expand early. (define_expand "cml<fcmac1><conj_op><mode>4" [(set (match_operand:VHSDF 0 "register_operand") - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") - (match_operand:VHSDF 3 "register_operand")] - FCMLA_OP)))] + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + FCMLA_OP) + (match_operand:VHSDF 3 "register_operand")))] "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], - operands[3], operands[2])); + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], + operands[1], operands[2])); emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, - operands[3], operands[2])); + operands[1], operands[2])); DONE; }) @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); rtx res1 = gen_reg_rtx (<MODE>mode); emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, - operands[2], operands[1])); + operands[1], operands[2])); emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, - operands[2], operands[1])); + operands[1], operands[2])); DONE; }) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 9ef968840c20a3049901b3f8a919cf27ded1da3e..96a57442c7eb5f1080c8014a2f0311b2350de852 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7278,11 +7278,11 @@ (define_expand "cml<fcmac1><conj_op><mode>4" rtx tmp = gen_reg_rtx (<MODE>mode); emit_insn (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], - operands[3], operands[2], - operands[1], operands[5])); + operands[1], operands[2], + operands[3], operands[5])); emit_insn (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], - operands[3], operands[2], + operands[1], operands[2], tmp, operands[5])); DONE; }) @@ -7305,11 +7305,11 @@ (define_expand "cmul<conj_op><mode>3" rtx tmp = gen_reg_rtx (<MODE>mode); emit_insn (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg, - operands[2], operands[1], + operands[1], operands[2], accum, gp_mode)); emit_insn (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg, - operands[2], operands[1], + operands[1], operands[2], tmp, gp_mode)); DONE; })