Message ID | mpt34rzyl2m.fsf@arm.com |
---|---|
State | New |
Headers | show |
Series | [pushed] aarch64: Fix bogus cnot optimisation [PR114603] | expand |
On Fri, Apr 5, 2024 at 3:52 PM Richard Sandiford <richard.sandiford@arm.com> wrote: > > aarch64-sve.md had a pattern that combined: > > cmpeq pb.T, pa/z, zc.T, #0 > mov zd.T, pb/z, #1 > > into: > > cnot zd.T, pa/m, zc.T > > But this is only valid if pa.T is a ptrue. In other cases, the > original would set inactive elements of zd.T to 0, whereas the > combined form would copy elements from zc.T. > > This isn't a regression on a known testcase. However, it's a nasty > wrong code bug that could conceivably trigger for autovec code (although > I've not been able to construct a reproducer so far). That fix is also > quite localised to the buggy operation. I'd therefore prefer to push > the fix now rather than wait for GCC 15. wrong-code bugs (and also rejects-valid or ice-on-valid) are always exempt from the regression-only fixing. In practice every such bug will be a regression, in this case to when the combining pattern was introduced (unless that was with the version with the initial introduction of the port of course). Richard. > Tested on aarch64-linux-gnu & pushed. I'll backport to branches if > there is no fallout. > > Richard > > gcc/ > PR target/114603 > * config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace > with... > (@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be > a ptrue. > (*cnot<mode>): Require operand 1 to be a ptrue. > * config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand): > Use aarch64_ptrue_cnot<mode> for _x operations that are predicated > with a ptrue. Represent other _x operations as fully-defined _m > operations. > > gcc/testsuite/ > PR target/114603 > * gcc.target/aarch64/sve/acle/general/cnot_1.c: New test. > --- > .../aarch64/aarch64-sve-builtins-base.cc | 25 ++++++++++++------- > gcc/config/aarch64/aarch64-sve.md | 22 ++++++++-------- > .../aarch64/sve/acle/general/cnot_1.c | 23 +++++++++++++++++ > 3 files changed, 50 insertions(+), 20 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > index 257ca5bf6ad..5be2315a3c6 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > @@ -517,15 +517,22 @@ public: > expand (function_expander &e) const override > { > machine_mode mode = e.vector_mode (0); > - if (e.pred == PRED_x) > - { > - /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs > - a ptrue hint. */ > - e.add_ptrue_hint (0, e.gp_mode (0)); > - return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); > - } > - > - return e.use_cond_insn (code_for_cond_cnot (mode), 0); > + machine_mode pred_mode = e.gp_mode (0); > + /* The underlying _x pattern is effectively: > + > + dst = src == 0 ? 1 : 0 > + > + rather than an UNSPEC_PRED_X. Using this form allows autovec > + constructs to be matched by combine, but it means that the > + predicate on the src == 0 comparison must be all-true. > + > + For simplicity, represent other _x operations as fully-defined _m > + operations rather than using a separate bespoke pattern. */ > + if (e.pred == PRED_x > + && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode)) > + return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode)); > + return e.use_cond_insn (code_for_cond_cnot (mode), > + e.pred == PRED_x ? 1 : 0); > } > }; > > diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md > index eca8623e587..0434358122d 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -3363,24 +3363,24 @@ (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2" > ;; - CNOT > ;; ------------------------------------------------------------------------- > > -;; Predicated logical inverse. > -(define_expand "@aarch64_pred_cnot<mode>" > +;; Logical inverse, predicated with a ptrue. > +(define_expand "@aarch64_ptrue_cnot<mode>" > [(set (match_operand:SVE_FULL_I 0 "register_operand") > (unspec:SVE_FULL_I > [(unspec:<VPRED> > [(match_operand:<VPRED> 1 "register_operand") > - (match_operand:SI 2 "aarch64_sve_ptrue_flag") > + (const_int SVE_KNOWN_PTRUE) > (eq:<VPRED> > - (match_operand:SVE_FULL_I 3 "register_operand") > - (match_dup 4))] > + (match_operand:SVE_FULL_I 2 "register_operand") > + (match_dup 3))] > UNSPEC_PRED_Z) > - (match_dup 5) > - (match_dup 4)] > + (match_dup 4) > + (match_dup 3)] > UNSPEC_SEL))] > "TARGET_SVE" > { > - operands[4] = CONST0_RTX (<MODE>mode); > - operands[5] = CONST1_RTX (<MODE>mode); > + operands[3] = CONST0_RTX (<MODE>mode); > + operands[4] = CONST1_RTX (<MODE>mode); > } > ) > > @@ -3389,7 +3389,7 @@ (define_insn "*cnot<mode>" > (unspec:SVE_I > [(unspec:<VPRED> > [(match_operand:<VPRED> 1 "register_operand") > - (match_operand:SI 5 "aarch64_sve_ptrue_flag") > + (const_int SVE_KNOWN_PTRUE) > (eq:<VPRED> > (match_operand:SVE_I 2 "register_operand") > (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] > @@ -11001,4 +11001,4 @@ (define_insn "@aarch64_sve_set_neonq_<mode>" > GET_MODE (operands[2])); > return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>"; > } > -) > \ No newline at end of file > +) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c > new file mode 100644 > index 00000000000..b1a489f0cf0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c > @@ -0,0 +1,23 @@ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include <arm_sve.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +/* > +** foo: > +** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0 > +** mov z0\.s, \1/z, #1 > +** ret > +*/ > +svint32_t foo(svbool_t pg, svint32_t y) > +{ > + return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0)); > +} > + > +#ifdef __cplusplus > +} > +#endif > -- > 2.25.1 >
Richard Biener <richard.guenther@gmail.com> writes: > On Fri, Apr 5, 2024 at 3:52 PM Richard Sandiford >> This isn't a regression on a known testcase. However, it's a nasty >> wrong code bug that could conceivably trigger for autovec code (although >> I've not been able to construct a reproducer so far). That fix is also >> quite localised to the buggy operation. I'd therefore prefer to push >> the fix now rather than wait for GCC 15. > > wrong-code bugs (and also rejects-valid or ice-on-valid) are always exempt > from the regression-only fixing. In practice every such bug will be a > regression, > in this case to when the combining pattern was introduced (unless that was > with the version with the initial introduction of the port of course). Ah, thanks, hadn't realised that. Makes sense though. It's good news of a sort since unfortunately I've another SVE wrong-code fix in the works... Richard
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 257ca5bf6ad..5be2315a3c6 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -517,15 +517,22 @@ public: expand (function_expander &e) const override { machine_mode mode = e.vector_mode (0); - if (e.pred == PRED_x) - { - /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs - a ptrue hint. */ - e.add_ptrue_hint (0, e.gp_mode (0)); - return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); - } - - return e.use_cond_insn (code_for_cond_cnot (mode), 0); + machine_mode pred_mode = e.gp_mode (0); + /* The underlying _x pattern is effectively: + + dst = src == 0 ? 1 : 0 + + rather than an UNSPEC_PRED_X. Using this form allows autovec + constructs to be matched by combine, but it means that the + predicate on the src == 0 comparison must be all-true. + + For simplicity, represent other _x operations as fully-defined _m + operations rather than using a separate bespoke pattern. */ + if (e.pred == PRED_x + && gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode)) + return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode)); + return e.use_cond_insn (code_for_cond_cnot (mode), + e.pred == PRED_x ? 1 : 0); } }; diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index eca8623e587..0434358122d 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3363,24 +3363,24 @@ (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2" ;; - CNOT ;; ------------------------------------------------------------------------- -;; Predicated logical inverse. -(define_expand "@aarch64_pred_cnot<mode>" +;; Logical inverse, predicated with a ptrue. +(define_expand "@aarch64_ptrue_cnot<mode>" [(set (match_operand:SVE_FULL_I 0 "register_operand") (unspec:SVE_FULL_I [(unspec:<VPRED> [(match_operand:<VPRED> 1 "register_operand") - (match_operand:SI 2 "aarch64_sve_ptrue_flag") + (const_int SVE_KNOWN_PTRUE) (eq:<VPRED> - (match_operand:SVE_FULL_I 3 "register_operand") - (match_dup 4))] + (match_operand:SVE_FULL_I 2 "register_operand") + (match_dup 3))] UNSPEC_PRED_Z) - (match_dup 5) - (match_dup 4)] + (match_dup 4) + (match_dup 3)] UNSPEC_SEL))] "TARGET_SVE" { - operands[4] = CONST0_RTX (<MODE>mode); - operands[5] = CONST1_RTX (<MODE>mode); + operands[3] = CONST0_RTX (<MODE>mode); + operands[4] = CONST1_RTX (<MODE>mode); } ) @@ -3389,7 +3389,7 @@ (define_insn "*cnot<mode>" (unspec:SVE_I [(unspec:<VPRED> [(match_operand:<VPRED> 1 "register_operand") - (match_operand:SI 5 "aarch64_sve_ptrue_flag") + (const_int SVE_KNOWN_PTRUE) (eq:<VPRED> (match_operand:SVE_I 2 "register_operand") (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] @@ -11001,4 +11001,4 @@ (define_insn "@aarch64_sve_set_neonq_<mode>" GET_MODE (operands[2])); return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>"; } -) \ No newline at end of file +) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c new file mode 100644 index 00000000000..b1a489f0cf0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c @@ -0,0 +1,23 @@ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** foo: +** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0 +** mov z0\.s, \1/z, #1 +** ret +*/ +svint32_t foo(svbool_t pg, svint32_t y) +{ + return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0)); +} + +#ifdef __cplusplus +} +#endif