Message ID | 18778C11-2B63-45AD-B9C4-AE61EBEDC61A@nvidia.com |
---|---|
State | New |
Headers | show |
Series | SVE intrinsics: Fold svaba with op1 all zeros to svabd. | expand |
Jennifer Schmitz <jschmitz@nvidia.com> writes: > Similar to > https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665780.html, > this patch implements folding of svaba to svabd if op1 is all zeros, > resulting in the use of UABD/SABD instructions instead of UABA/SABA. > Tests were added to check the produced assembly for use of UABD/SABD, > also for the _n case. > > The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. > OK for mainline? > > Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> > > gcc/ > * config/aarch64/aarch64-sve-builtins-sve2.cc > (svaba_impl::fold): Fold svaba to svabd if op1 is all zeros. > > gcc/testsuite/ > * gcc.target/aarch64/sve2/acle/asm/aba_s32.c: New tests. > * gcc.target/aarch64/sve2/acle/asm/aba_s64.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/aba_u32.c: Likewise. > * gcc.target/aarch64/sve2/acle/asm/aba_u64.c: Likewise. OK, thanks. Richard > --- > .../aarch64/aarch64-sve-builtins-sve2.cc | 18 +++++++++++++++ > .../aarch64/sve2/acle/asm/aba_s32.c | 23 +++++++++++++++++++ > .../aarch64/sve2/acle/asm/aba_s64.c | 22 ++++++++++++++++++ > .../aarch64/sve2/acle/asm/aba_u32.c | 22 ++++++++++++++++++ > .../aarch64/sve2/acle/asm/aba_u64.c | 22 ++++++++++++++++++ > 5 files changed, 107 insertions(+) > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc > index 6a20a613f83..107b299d068 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc > @@ -80,6 +80,24 @@ unspec_sqrdcmlah (int rot) > > class svaba_impl : public function_base > { > +public: > + gimple * > + fold (gimple_folder &f) const override > + { > + /* Fold to svabd if op1 is all zeros. */ > + tree op1 = gimple_call_arg (f.call, 0); > + if (!integer_zerop (op1)) > + return NULL; > + function_instance instance ("svabd", functions::svabd, > + shapes::binary_opt_n, f.mode_suffix_id, > + f.type_suffix_ids, GROUP_none, PRED_x); > + gcall *call = f.redirect_call (instance); > + /* Add a ptrue as predicate, because unlike svaba, svabd is > + predicated. */ > + gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ())); > + return call; > + } > + > public: > rtx > expand (function_expander &e) const override > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c > index 73c00282526..655ad630241 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c > @@ -108,3 +108,26 @@ TEST_UNIFORM_Z (aba_11_s32_tied2, svint32_t, > TEST_UNIFORM_Z (aba_11_s32_untied, svint32_t, > z0 = svaba_n_s32 (z1, z2, 11), > z0 = svaba (z1, z2, 11)) > + > +/* > +** aba_11_s32_zeroop1n: > +** ptrue (p[0-7])\.b, all > +** mov z0\.s, #11 > +** sabd z0\.s, \1/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_s32_zeroop1n, svint32_t, > + z0 = svaba_n_s32 (svdup_s32 (0), z1, 11), > + z0 = svaba (svdup_s32 (0), z1, 11)) > + > + > +/* > +** aba_11_s32_zeroop1: > +** ptrue (p[0-7])\.b, all > +** mov z0\.s, #11 > +** sabd z0\.s, \1/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_s32_zeroop1, svint32_t, > + z0 = svaba_s32 (svdup_s32 (0), z1, svdup_s32 (11)), > + z0 = svaba (svdup_s32 (0), z1, svdup_s32 (11))) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c > index 0c169dbf613..8b1eb7d2f4e 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c > @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_s64_tied2, svint64_t, > TEST_UNIFORM_Z (aba_11_s64_untied, svint64_t, > z0 = svaba_n_s64 (z1, z2, 11), > z0 = svaba (z1, z2, 11)) > + > +/* > +** aba_11_s64_zeroop1n: > +** ptrue (p[0-7])\.b, all > +** mov z0\.d, #11 > +** sabd z0\.d, \1/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_s64_zeroop1n, svint64_t, > + z0 = svaba_n_s64 (svdup_s64 (0), z1, 11), > + z0 = svaba (svdup_s64 (0), z1, 11)) > + > +/* > +** aba_11_s64_zeroop1: > +** ptrue (p[0-7])\.b, all > +** mov z0\.d, #11 > +** sabd z0\.d, \1/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_s64_zeroop1, svint64_t, > + z0 = svaba_s64 (svdup_s64 (0), z1, svdup_s64 (11)), > + z0 = svaba (svdup_s64 (0), z1, svdup_s64 (11))) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c > index 2ba8f419567..fc2fed28e02 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c > @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u32_tied2, svuint32_t, > TEST_UNIFORM_Z (aba_11_u32_untied, svuint32_t, > z0 = svaba_n_u32 (z1, z2, 11), > z0 = svaba (z1, z2, 11)) > + > +/* > +** aba_11_u32_zeroop1n: > +** ptrue (p[0-7])\.b, all > +** mov z0\.s, #11 > +** uabd z0\.s, \1/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_u32_zeroop1n, svuint32_t, > + z0 = svaba_n_u32 (svdup_u32 (0), z1, 11), > + z0 = svaba (svdup_u32 (0), z1, 11)) > + > +/* > +** aba_11_u32_zeroop1: > +** ptrue (p[0-7])\.b, all > +** mov z0\.s, #11 > +** uabd z0\.s, \1/m, z0\.s, z1\.s > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_u32_zeroop1, svuint32_t, > + z0 = svaba_u32 (svdup_u32 (0), z1, svdup_u32 (11)), > + z0 = svaba (svdup_u32 (0), z1, svdup_u32 (11))) > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c > index 8c6bef02004..f6ed2167d6a 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c > @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u64_tied2, svuint64_t, > TEST_UNIFORM_Z (aba_11_u64_untied, svuint64_t, > z0 = svaba_n_u64 (z1, z2, 11), > z0 = svaba (z1, z2, 11)) > + > +/* > +** aba_11_u64_zeroop1n: > +** ptrue (p[0-7])\.b, all > +** mov z0\.d, #11 > +** uabd z0\.d, \1/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_u64_zeroop1n, svuint64_t, > + z0 = svaba_n_u64 (svdup_u64 (0), z1, 11), > + z0 = svaba (svdup_u64 (0), z1, 11)) > + > +/* > +** aba_11_u64_zeroop1: > +** ptrue (p[0-7])\.b, all > +** mov z0\.d, #11 > +** uabd z0\.d, \1/m, z0\.d, z1\.d > +** ret > +*/ > +TEST_UNIFORM_Z (aba_11_u64_zeroop1, svuint64_t, > + z0 = svaba_u64 (svdup_u64 (0), z1, svdup_u64 (11)), > + z0 = svaba (svdup_u64 (0), z1, svdup_u64 (11)))
> On 24 Oct 2024, at 21:55, Richard Sandiford <richard.sandiford@arm.com> wrote: > > External email: Use caution opening links or attachments > > > Jennifer Schmitz <jschmitz@nvidia.com> writes: >> Similar to >> https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665780.html, >> this patch implements folding of svaba to svabd if op1 is all zeros, >> resulting in the use of UABD/SABD instructions instead of UABA/SABA. >> Tests were added to check the produced assembly for use of UABD/SABD, >> also for the _n case. >> >> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. >> OK for mainline? >> >> Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> >> >> gcc/ >> * config/aarch64/aarch64-sve-builtins-sve2.cc >> (svaba_impl::fold): Fold svaba to svabd if op1 is all zeros. >> >> gcc/testsuite/ >> * gcc.target/aarch64/sve2/acle/asm/aba_s32.c: New tests. >> * gcc.target/aarch64/sve2/acle/asm/aba_s64.c: Likewise. >> * gcc.target/aarch64/sve2/acle/asm/aba_u32.c: Likewise. >> * gcc.target/aarch64/sve2/acle/asm/aba_u64.c: Likewise. > > OK, thanks. Thanks, committed to trunk: 0b22f0585348335369298c7d39afd171758eebe9 > > Richard > >> --- >> .../aarch64/aarch64-sve-builtins-sve2.cc | 18 +++++++++++++++ >> .../aarch64/sve2/acle/asm/aba_s32.c | 23 +++++++++++++++++++ >> .../aarch64/sve2/acle/asm/aba_s64.c | 22 ++++++++++++++++++ >> .../aarch64/sve2/acle/asm/aba_u32.c | 22 ++++++++++++++++++ >> .../aarch64/sve2/acle/asm/aba_u64.c | 22 ++++++++++++++++++ >> 5 files changed, 107 insertions(+) >> >> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc >> index 6a20a613f83..107b299d068 100644 >> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc >> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc >> @@ -80,6 +80,24 @@ unspec_sqrdcmlah (int rot) >> >> class svaba_impl : public function_base >> { >> +public: >> + gimple * >> + fold (gimple_folder &f) const override >> + { >> + /* Fold to svabd if op1 is all zeros. */ >> + tree op1 = gimple_call_arg (f.call, 0); >> + if (!integer_zerop (op1)) >> + return NULL; >> + function_instance instance ("svabd", functions::svabd, >> + shapes::binary_opt_n, f.mode_suffix_id, >> + f.type_suffix_ids, GROUP_none, PRED_x); >> + gcall *call = f.redirect_call (instance); >> + /* Add a ptrue as predicate, because unlike svaba, svabd is >> + predicated. */ >> + gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ())); >> + return call; >> + } >> + >> public: >> rtx >> expand (function_expander &e) const override >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c >> index 73c00282526..655ad630241 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c >> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c >> @@ -108,3 +108,26 @@ TEST_UNIFORM_Z (aba_11_s32_tied2, svint32_t, >> TEST_UNIFORM_Z (aba_11_s32_untied, svint32_t, >> z0 = svaba_n_s32 (z1, z2, 11), >> z0 = svaba (z1, z2, 11)) >> + >> +/* >> +** aba_11_s32_zeroop1n: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.s, #11 >> +** sabd z0\.s, \1/m, z0\.s, z1\.s >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_s32_zeroop1n, svint32_t, >> + z0 = svaba_n_s32 (svdup_s32 (0), z1, 11), >> + z0 = svaba (svdup_s32 (0), z1, 11)) >> + >> + >> +/* >> +** aba_11_s32_zeroop1: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.s, #11 >> +** sabd z0\.s, \1/m, z0\.s, z1\.s >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_s32_zeroop1, svint32_t, >> + z0 = svaba_s32 (svdup_s32 (0), z1, svdup_s32 (11)), >> + z0 = svaba (svdup_s32 (0), z1, svdup_s32 (11))) >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c >> index 0c169dbf613..8b1eb7d2f4e 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c >> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c >> @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_s64_tied2, svint64_t, >> TEST_UNIFORM_Z (aba_11_s64_untied, svint64_t, >> z0 = svaba_n_s64 (z1, z2, 11), >> z0 = svaba (z1, z2, 11)) >> + >> +/* >> +** aba_11_s64_zeroop1n: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.d, #11 >> +** sabd z0\.d, \1/m, z0\.d, z1\.d >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_s64_zeroop1n, svint64_t, >> + z0 = svaba_n_s64 (svdup_s64 (0), z1, 11), >> + z0 = svaba (svdup_s64 (0), z1, 11)) >> + >> +/* >> +** aba_11_s64_zeroop1: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.d, #11 >> +** sabd z0\.d, \1/m, z0\.d, z1\.d >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_s64_zeroop1, svint64_t, >> + z0 = svaba_s64 (svdup_s64 (0), z1, svdup_s64 (11)), >> + z0 = svaba (svdup_s64 (0), z1, svdup_s64 (11))) >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c >> index 2ba8f419567..fc2fed28e02 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c >> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c >> @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u32_tied2, svuint32_t, >> TEST_UNIFORM_Z (aba_11_u32_untied, svuint32_t, >> z0 = svaba_n_u32 (z1, z2, 11), >> z0 = svaba (z1, z2, 11)) >> + >> +/* >> +** aba_11_u32_zeroop1n: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.s, #11 >> +** uabd z0\.s, \1/m, z0\.s, z1\.s >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_u32_zeroop1n, svuint32_t, >> + z0 = svaba_n_u32 (svdup_u32 (0), z1, 11), >> + z0 = svaba (svdup_u32 (0), z1, 11)) >> + >> +/* >> +** aba_11_u32_zeroop1: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.s, #11 >> +** uabd z0\.s, \1/m, z0\.s, z1\.s >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_u32_zeroop1, svuint32_t, >> + z0 = svaba_u32 (svdup_u32 (0), z1, svdup_u32 (11)), >> + z0 = svaba (svdup_u32 (0), z1, svdup_u32 (11))) >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c >> index 8c6bef02004..f6ed2167d6a 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c >> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c >> @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u64_tied2, svuint64_t, >> TEST_UNIFORM_Z (aba_11_u64_untied, svuint64_t, >> z0 = svaba_n_u64 (z1, z2, 11), >> z0 = svaba (z1, z2, 11)) >> + >> +/* >> +** aba_11_u64_zeroop1n: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.d, #11 >> +** uabd z0\.d, \1/m, z0\.d, z1\.d >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_u64_zeroop1n, svuint64_t, >> + z0 = svaba_n_u64 (svdup_u64 (0), z1, 11), >> + z0 = svaba (svdup_u64 (0), z1, 11)) >> + >> +/* >> +** aba_11_u64_zeroop1: >> +** ptrue (p[0-7])\.b, all >> +** mov z0\.d, #11 >> +** uabd z0\.d, \1/m, z0\.d, z1\.d >> +** ret >> +*/ >> +TEST_UNIFORM_Z (aba_11_u64_zeroop1, svuint64_t, >> + z0 = svaba_u64 (svdup_u64 (0), z1, svdup_u64 (11)), >> + z0 = svaba (svdup_u64 (0), z1, svdup_u64 (11)))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 6a20a613f83..107b299d068 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -80,6 +80,24 @@ unspec_sqrdcmlah (int rot) class svaba_impl : public function_base { +public: + gimple * + fold (gimple_folder &f) const override + { + /* Fold to svabd if op1 is all zeros. */ + tree op1 = gimple_call_arg (f.call, 0); + if (!integer_zerop (op1)) + return NULL; + function_instance instance ("svabd", functions::svabd, + shapes::binary_opt_n, f.mode_suffix_id, + f.type_suffix_ids, GROUP_none, PRED_x); + gcall *call = f.redirect_call (instance); + /* Add a ptrue as predicate, because unlike svaba, svabd is + predicated. */ + gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ())); + return call; + } + public: rtx expand (function_expander &e) const override diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c index 73c00282526..655ad630241 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c @@ -108,3 +108,26 @@ TEST_UNIFORM_Z (aba_11_s32_tied2, svint32_t, TEST_UNIFORM_Z (aba_11_s32_untied, svint32_t, z0 = svaba_n_s32 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_s32_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** sabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_s32_zeroop1n, svint32_t, + z0 = svaba_n_s32 (svdup_s32 (0), z1, 11), + z0 = svaba (svdup_s32 (0), z1, 11)) + + +/* +** aba_11_s32_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** sabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_s32_zeroop1, svint32_t, + z0 = svaba_s32 (svdup_s32 (0), z1, svdup_s32 (11)), + z0 = svaba (svdup_s32 (0), z1, svdup_s32 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c index 0c169dbf613..8b1eb7d2f4e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_s64_tied2, svint64_t, TEST_UNIFORM_Z (aba_11_s64_untied, svint64_t, z0 = svaba_n_s64 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_s64_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** sabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_s64_zeroop1n, svint64_t, + z0 = svaba_n_s64 (svdup_s64 (0), z1, 11), + z0 = svaba (svdup_s64 (0), z1, 11)) + +/* +** aba_11_s64_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** sabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_s64_zeroop1, svint64_t, + z0 = svaba_s64 (svdup_s64 (0), z1, svdup_s64 (11)), + z0 = svaba (svdup_s64 (0), z1, svdup_s64 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c index 2ba8f419567..fc2fed28e02 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u32_tied2, svuint32_t, TEST_UNIFORM_Z (aba_11_u32_untied, svuint32_t, z0 = svaba_n_u32 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_u32_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** uabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_u32_zeroop1n, svuint32_t, + z0 = svaba_n_u32 (svdup_u32 (0), z1, 11), + z0 = svaba (svdup_u32 (0), z1, 11)) + +/* +** aba_11_u32_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** uabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_u32_zeroop1, svuint32_t, + z0 = svaba_u32 (svdup_u32 (0), z1, svdup_u32 (11)), + z0 = svaba (svdup_u32 (0), z1, svdup_u32 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c index 8c6bef02004..f6ed2167d6a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u64_tied2, svuint64_t, TEST_UNIFORM_Z (aba_11_u64_untied, svuint64_t, z0 = svaba_n_u64 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_u64_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** uabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_u64_zeroop1n, svuint64_t, + z0 = svaba_n_u64 (svdup_u64 (0), z1, 11), + z0 = svaba (svdup_u64 (0), z1, 11)) + +/* +** aba_11_u64_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** uabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_u64_zeroop1, svuint64_t, + z0 = svaba_u64 (svdup_u64 (0), z1, svdup_u64 (11)), + z0 = svaba (svdup_u64 (0), z1, svdup_u64 (11)))
Similar to https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665780.html, this patch implements folding of svaba to svabd if op1 is all zeros, resulting in the use of UABD/SABD instructions instead of UABA/SABA. Tests were added to check the produced assembly for use of UABD/SABD, also for the _n case. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-sve2.cc (svaba_impl::fold): Fold svaba to svabd if op1 is all zeros. gcc/testsuite/ * gcc.target/aarch64/sve2/acle/asm/aba_s32.c: New tests. * gcc.target/aarch64/sve2/acle/asm/aba_s64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/aba_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/aba_u64.c: Likewise. --- .../aarch64/aarch64-sve-builtins-sve2.cc | 18 +++++++++++++++ .../aarch64/sve2/acle/asm/aba_s32.c | 23 +++++++++++++++++++ .../aarch64/sve2/acle/asm/aba_s64.c | 22 ++++++++++++++++++ .../aarch64/sve2/acle/asm/aba_u32.c | 22 ++++++++++++++++++ .../aarch64/sve2/acle/asm/aba_u64.c | 22 ++++++++++++++++++ 5 files changed, 107 insertions(+)