Message ID | 20210505173854.GA17884@arm.com |
---|---|
State | New |
Headers | show |
Series | [1/4] middle-end Vect: Add support for dot-product where the sign for the multiplicant changes. | expand |
Tamar Christina <tamar.christina@arm.com> writes: > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de17126432e2a63a32b 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>" > DONE; > }) > > +;; Auto-vectorizer pattern for usdot > +(define_expand "usdot_prod<vsi2qi>" > + [(set (match_operand:VS 0 "register_operand") > + (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand") > + (match_operand:<VSI2QI> 2 "register_operand")] > + UNSPEC_USDOT) > + (match_operand:VS 3 "register_operand")))] > + "TARGET_I8MM" > +{ > + emit_insn ( > + gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1], > + operands[2])); > + emit_move_insn (operands[0], operands[3]); > + DONE; > +}) We can't modify operands[3] here; it's an input rather than an output. It looks like this would work with just the {…} removed though. The pattern will match aarch64_usdot<vsi2qi> on its own accord. Even better would be to rename __builtin_aarch64_usdot… to __builtin_usdot_prod…, change its arguments so that they line up with the optabs, and change arm_neon.h to match. > diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c > new file mode 100644 > index 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f09496dd038410d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ > + > +#define N 480 > +#define SIGNEDNESS_1 unsigned > +#define SIGNEDNESS_2 signed > +#define SIGNEDNESS_3 signed > +#define SIGNEDNESS_4 unsigned > + > +SIGNEDNESS_1 int __attribute__ ((noipa)) > +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, > + SIGNEDNESS_4 char *restrict b) > +{ > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > + { > + int av = a[i]; > + int bv = b[i]; > + SIGNEDNESS_2 short mult = av * bv; > + res += mult; > + } > + return res; > +} > + > +SIGNEDNESS_1 int __attribute__ ((noipa)) > +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, > + SIGNEDNESS_4 char *restrict a) > +{ > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > + { > + int av = a[i]; > + int bv = b[i]; > + SIGNEDNESS_2 short mult = av * bv; > + res += mult; > + } > + return res; > +} > + > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c > new file mode 100644 > index 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec3505657bf05320e5fdbb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */ > + > +#define N 480 > +#define SIGNEDNESS_1 unsigned > +#define SIGNEDNESS_2 signed > +#define SIGNEDNESS_3 signed > +#define SIGNEDNESS_4 unsigned > + > +SIGNEDNESS_1 int __attribute__ ((noipa)) > +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, > + SIGNEDNESS_4 char *restrict b) > +{ > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > + { > + int av = a[i]; > + int bv = b[i]; > + SIGNEDNESS_2 short mult = av * bv; > + res += mult; > + } > + return res; > +} > + > +SIGNEDNESS_1 int __attribute__ ((noipa)) > +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, > + SIGNEDNESS_4 char *restrict a) > +{ > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > + { > + int av = a[i]; > + int bv = b[i]; > + SIGNEDNESS_2 short mult = av * bv; > + res += mult; > + } > + return res; > +} > + > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ Guess this is personal preference, but I don't think the SIGNEDNESS_* macros add anything when used like this. I remember doing something similar in the past when including .c files from other .c files(!) in order to avoid cut-&-paste, but there doesn't seem much benefit for standalone files like these. Thanks, Richard
Hi Richard, > -----Original Message----- > From: Richard Sandiford <richard.sandiford@arm.com> > Sent: Monday, May 10, 2021 5:49 PM > To: Tamar Christina <Tamar.Christina@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: Re: [PATCH 2/4]AArch64: Add support for sign differing dot-product > usdot for NEON and SVE. > > Tamar Christina <tamar.christina@arm.com> writes: > > diff --git a/gcc/config/aarch64/aarch64-simd.md > > b/gcc/config/aarch64/aarch64-simd.md > > index > > > 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de1 > 7126 > > 432e2a63a32b 100644 > > --- a/gcc/config/aarch64/aarch64-simd.md > > +++ b/gcc/config/aarch64/aarch64-simd.md > > @@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>" > > DONE; > > }) > > > > +;; Auto-vectorizer pattern for usdot > > +(define_expand "usdot_prod<vsi2qi>" > > + [(set (match_operand:VS 0 "register_operand") > > + (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 > "register_operand") > > + (match_operand:<VSI2QI> 2 "register_operand")] > > + UNSPEC_USDOT) > > + (match_operand:VS 3 "register_operand")))] > > + "TARGET_I8MM" > > +{ > > + emit_insn ( > > + gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1], > > + operands[2])); > > + emit_move_insn (operands[0], operands[3]); > > + DONE; > > +}) > > We can't modify operands[3] here; it's an input rather than an output. Sorry, I should have noticed this.. I had blindly copied the existing pattern for dot-product and that looks like it's wrong. I'll send a different patch to fix that one. > > It looks like this would work with just the {…} removed though. > The pattern will match aarch64_usdot<vsi2qi> on its own accord. > > Even better would be to rename __builtin_aarch64_usdot… to > __builtin_usdot_prod…, change its arguments so that they line up with the > optabs, and change arm_neon.h to match. > > > diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c > > b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c > > new file mode 100644 > > index > > > 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f > 09 > > 496dd038410d > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c > > @@ -0,0 +1,38 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ > > + > > +#define N 480 > > +#define SIGNEDNESS_1 unsigned > > +#define SIGNEDNESS_2 signed > > +#define SIGNEDNESS_3 signed > > +#define SIGNEDNESS_4 unsigned > > + > > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > +SIGNEDNESS_3 char *restrict a, > > + SIGNEDNESS_4 char *restrict b) > > +{ > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > + { > > + int av = a[i]; > > + int bv = b[i]; > > + SIGNEDNESS_2 short mult = av * bv; > > + res += mult; > > + } > > + return res; > > +} > > + > > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res, > > +SIGNEDNESS_3 char *restrict b, > > + SIGNEDNESS_4 char *restrict a) > > +{ > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > + { > > + int av = a[i]; > > + int bv = b[i]; > > + SIGNEDNESS_2 short mult = av * bv; > > + res += mult; > > + } > > + return res; > > +} > > + > > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c > > b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c > > new file mode 100644 > > index > > > 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec35056 > 57b > > f05320e5fdbb > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c > > @@ -0,0 +1,38 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */ > > + > > +#define N 480 > > +#define SIGNEDNESS_1 unsigned > > +#define SIGNEDNESS_2 signed > > +#define SIGNEDNESS_3 signed > > +#define SIGNEDNESS_4 unsigned > > + > > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > +SIGNEDNESS_3 char *restrict a, > > + SIGNEDNESS_4 char *restrict b) > > +{ > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > + { > > + int av = a[i]; > > + int bv = b[i]; > > + SIGNEDNESS_2 short mult = av * bv; > > + res += mult; > > + } > > + return res; > > +} > > + > > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res, > > +SIGNEDNESS_3 char *restrict b, > > + SIGNEDNESS_4 char *restrict a) > > +{ > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > + { > > + int av = a[i]; > > + int bv = b[i]; > > + SIGNEDNESS_2 short mult = av * bv; > > + res += mult; > > + } > > + return res; > > +} > > + > > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ > > Guess this is personal preference, but I don't think the SIGNEDNESS_* > macros add anything when used like this. I remember doing something > similar in the past when including .c files from other .c files(!) in order to > avoid cut-&-paste, but there doesn't seem much benefit for standalone files > like these. If it's the same to you, I do prefer this version, since it's identical to the mid-end tests, It does allow when familiar with the tests to just quickly see what it's testing. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_usdot<vsi2qi>): Rename to... (usdot_prod<vsi2qi>): ... This. * config/aarch64/aarch64-simd-builtins.def (usdot): Rename to... (usdot_prod): ...This. * config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Likewise. * config/aarch64/aarch64-sve.md (@aarch64_<sur>dot_prod<vsi2qi>): Rename to... (@<sur>dot_prod<vsi2qi>): ...This. * config/aarch64/aarch64-sve-builtins-base.cc (svusdot_impl::expand): Use it. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vusdot-autovec.c: New test. * gcc.target/aarch64/sve/vusdot-autovec.c: New test. > > Thanks, > Richard
Tamar Christina <Tamar.Christina@arm.com> writes: >> -----Original Message----- >> From: Richard Sandiford <richard.sandiford@arm.com> >> Sent: Monday, May 10, 2021 5:49 PM >> To: Tamar Christina <Tamar.Christina@arm.com> >> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw >> <Richard.Earnshaw@arm.com>; Marcus Shawcroft >> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> >> Subject: Re: [PATCH 2/4]AArch64: Add support for sign differing dot-product >> usdot for NEON and SVE. >> >> Tamar Christina <tamar.christina@arm.com> writes: >> > diff --git a/gcc/config/aarch64/aarch64-simd.md >> > b/gcc/config/aarch64/aarch64-simd.md >> > index >> > >> 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de1 >> 7126 >> > 432e2a63a32b 100644 >> > --- a/gcc/config/aarch64/aarch64-simd.md >> > +++ b/gcc/config/aarch64/aarch64-simd.md >> > @@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>" >> > DONE; >> > }) >> > >> > +;; Auto-vectorizer pattern for usdot >> > +(define_expand "usdot_prod<vsi2qi>" >> > + [(set (match_operand:VS 0 "register_operand") >> > + (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 >> "register_operand") >> > + (match_operand:<VSI2QI> 2 "register_operand")] >> > + UNSPEC_USDOT) >> > + (match_operand:VS 3 "register_operand")))] >> > + "TARGET_I8MM" >> > +{ >> > + emit_insn ( >> > + gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1], >> > + operands[2])); >> > + emit_move_insn (operands[0], operands[3]); >> > + DONE; >> > +}) >> >> We can't modify operands[3] here; it's an input rather than an output. > > Sorry, I should have noticed this.. I had blindly copied the existing pattern for dot-product and that looks like it's wrong. > I'll send a different patch to fix that one. > >> >> It looks like this would work with just the {…} removed though. >> The pattern will match aarch64_usdot<vsi2qi> on its own accord. >> >> Even better would be to rename __builtin_aarch64_usdot… to >> __builtin_usdot_prod…, change its arguments so that they line up with the >> optabs, and change arm_neon.h to match. >> >> > diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c >> > b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c >> > new file mode 100644 >> > index >> > >> 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f >> 09 >> > 496dd038410d >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c >> > @@ -0,0 +1,38 @@ >> > +/* { dg-do compile } */ >> > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ >> > + >> > +#define N 480 >> > +#define SIGNEDNESS_1 unsigned >> > +#define SIGNEDNESS_2 signed >> > +#define SIGNEDNESS_3 signed >> > +#define SIGNEDNESS_4 unsigned >> > + >> > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, >> > +SIGNEDNESS_3 char *restrict a, >> > + SIGNEDNESS_4 char *restrict b) >> > +{ >> > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) >> > + { >> > + int av = a[i]; >> > + int bv = b[i]; >> > + SIGNEDNESS_2 short mult = av * bv; >> > + res += mult; >> > + } >> > + return res; >> > +} >> > + >> > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res, >> > +SIGNEDNESS_3 char *restrict b, >> > + SIGNEDNESS_4 char *restrict a) >> > +{ >> > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) >> > + { >> > + int av = a[i]; >> > + int bv = b[i]; >> > + SIGNEDNESS_2 short mult = av * bv; >> > + res += mult; >> > + } >> > + return res; >> > +} >> > + >> > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ >> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c >> > b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c >> > new file mode 100644 >> > index >> > >> 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec35056 >> 57b >> > f05320e5fdbb >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c >> > @@ -0,0 +1,38 @@ >> > +/* { dg-do compile } */ >> > +/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */ >> > + >> > +#define N 480 >> > +#define SIGNEDNESS_1 unsigned >> > +#define SIGNEDNESS_2 signed >> > +#define SIGNEDNESS_3 signed >> > +#define SIGNEDNESS_4 unsigned >> > + >> > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, >> > +SIGNEDNESS_3 char *restrict a, >> > + SIGNEDNESS_4 char *restrict b) >> > +{ >> > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) >> > + { >> > + int av = a[i]; >> > + int bv = b[i]; >> > + SIGNEDNESS_2 short mult = av * bv; >> > + res += mult; >> > + } >> > + return res; >> > +} >> > + >> > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res, >> > +SIGNEDNESS_3 char *restrict b, >> > + SIGNEDNESS_4 char *restrict a) >> > +{ >> > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) >> > + { >> > + int av = a[i]; >> > + int bv = b[i]; >> > + SIGNEDNESS_2 short mult = av * bv; >> > + res += mult; >> > + } >> > + return res; >> > +} >> > + >> > +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ >> >> Guess this is personal preference, but I don't think the SIGNEDNESS_* >> macros add anything when used like this. I remember doing something >> similar in the past when including .c files from other .c files(!) in order to >> avoid cut-&-paste, but there doesn't seem much benefit for standalone files >> like these. > > If it's the same to you, I do prefer this version, since it's identical to the mid-end tests, > It does allow when familiar with the tests to just quickly see what it's testing. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-simd.md (aarch64_usdot<vsi2qi>): Rename to... > (usdot_prod<vsi2qi>): ... This. > * config/aarch64/aarch64-simd-builtins.def (usdot): Rename to... > (usdot_prod): ...This. > * config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Likewise. > * config/aarch64/aarch64-sve.md (@aarch64_<sur>dot_prod<vsi2qi>): > Rename to... > (@<sur>dot_prod<vsi2qi>): ...This. > * config/aarch64/aarch64-sve-builtins-base.cc > (svusdot_impl::expand): Use it. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/simd/vusdot-autovec.c: New test. > * gcc.target/aarch64/sve/vusdot-autovec.c: New test. OK, thanks. Richard
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 4edee99051c4e2112b546becca47da32aae21df2..c9fb8e702732dd311fb10de17126432e2a63a32b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -648,6 +648,22 @@ (define_expand "<sur>dot_prod<vsi2qi>" DONE; }) +;; Auto-vectorizer pattern for usdot +(define_expand "usdot_prod<vsi2qi>" + [(set (match_operand:VS 0 "register_operand") + (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand") + (match_operand:<VSI2QI> 2 "register_operand")] + UNSPEC_USDOT) + (match_operand:VS 3 "register_operand")))] + "TARGET_I8MM" +{ + emit_insn ( + gen_aarch64_usdot<vsi2qi> (operands[3], operands[3], operands[1], + operands[2])); + emit_move_insn (operands[0], operands[3]); + DONE; +}) + ;; These instructions map to the __builtins for the Dot Product ;; indexed operations. (define_insn "aarch64_<sur>dot_lane<vsi2qi>" diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index dfdf0e2fd186389cbddcff51ef52f8778d7fdb24..50adcd5404e97e610485140fdbfe4c8ebbf2f602 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -2366,7 +2366,7 @@ public: Hence we do the same rotation on arguments as svdot_impl does. */ e.rotate_inputs_left (0, 3); machine_mode mode = e.vector_mode (0); - insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode); + insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode); return e.use_exact_insn (icode); } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 7db2938bb84e04d066a7b07574e5cf344a3a8fb6..1278f6f12fadf8eec693cd47fd545ff3277f08f1 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6870,7 +6870,7 @@ (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" [(set_attr "movprfx" "*,yes")] ) -(define_insn "@aarch64_<sur>dot_prod<vsi2qi>" +(define_insn "@<sur>dot_prod<vsi2qi>" [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") (plus:VNx4SI_ONLY (unspec:VNx4SI_ONLY diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c new file mode 100644 index 0000000000000000000000000000000000000000..b99a945903c043c7410becaf6f09496dd038410d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vusdot-autovec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ + +#define N 480 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +SIGNEDNESS_1 int __attribute__ ((noipa)) +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, + SIGNEDNESS_4 char *restrict a) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c new file mode 100644 index 0000000000000000000000000000000000000000..094dd51cea62e0ba05ec3505657bf05320e5fdbb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vusdot-autovec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+i8mm+sve" } */ + +#define N 480 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +SIGNEDNESS_1 int __attribute__ ((noipa)) +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, + SIGNEDNESS_4 char *restrict a) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +/* { dg-final { scan-assembler-times {\tusdot\t} 2 } } */