Message ID | 2C4AC631-3F0B-4131-95C3-E6AA81AAB71C@nvidia.com |
---|---|
State | New |
Headers | show |
Series | SVE intrinsics: Add constant folding for svindex. | expand |
I resubmitted an corrected version of this patch in https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665648.html > On 16 Oct 2024, at 10:32, Jennifer Schmitz <jschmitz@nvidia.com> wrote: > > This patch folds svindex with constant arguments into a vector series. > We implemented this in svindex_impl::fold using the function build_vec_series. > For example, > svuint64_t f1 () > { > return svindex_u642 (10, 3); > } > compiled with -O2 -march=armv8.2-a+sve, is folded to {10, 13, 16, ...} > in the gimple pass lower. > This optimization benefits cases where svindex is used in combination with > other gimple-level optimizations. > For example, > svuint64_t f2 () > { > return svmul_x (svptrue_b64 (), svindex_u64 (10, 3), 5); > } > has previously been compiled to > f2: > index z0.d, #10, #3 > mul z0.d, z0.d, #5 > ret > Now, it is compiled to > f2: > mov x0, 50 > index z0.d, x0, #15 > ret > > For non-constant arguments, build_vec_series produces a VEC_SERIES_EXPR, > which is translated back at RTL level to an index instruction without codegen > changes. > > We added test cases checking > - the application of the transform during gimple for constant arguments, > - the interaction with another gimple-level optimization. > > The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. > OK for mainline? > > Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> > > gcc/ > * config/aarch64/aarch64-sve-builtins-base.cc > (svindex_impl::fold): Add constant folding. > > gcc/testsuite/ > * gcc.target/aarch64/sve/index_const_fold.c: New test. > --- > .../aarch64/aarch64-sve-builtins-base.cc | 12 +++++++ > .../gcc.target/aarch64/sve/index_const_fold.c | 35 +++++++++++++++++++ > 2 files changed, 47 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > index 1c17149e1f0..f6b1657ecbb 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > @@ -1304,6 +1304,18 @@ public: > > class svindex_impl : public function_base > { > +public: > + gimple * > + fold (gimple_folder &f) const override > + { > + tree vec_type = TREE_TYPE (f.lhs); > + tree base = gimple_call_arg (f.call, 0); > + tree step = gimple_call_arg (f.call, 1); > + > + return gimple_build_assign (f.lhs, > + build_vec_series (vec_type, base, step)); > + } > + > public: > rtx > expand (function_expander &e) const override > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c > new file mode 100644 > index 00000000000..f5e6c0f7a85 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c > @@ -0,0 +1,35 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +#include <arm_sve.h> > +#include <stdint.h> > + > +#define INDEX_CONST(TYPE, TY) \ > + sv##TYPE f_##TY##_index_const () \ > + { \ > + return svindex_##TY (10, 3); \ > + } > + > +#define MULT_INDEX(TYPE, TY) \ > + sv##TYPE f_##TY##_mult_index () \ > + { \ > + return svmul_x (svptrue_b8 (), \ > + svindex_##TY (10, 3), \ > + 5); \ > + } > + > +#define ALL_TESTS(TYPE, TY) \ > + INDEX_CONST (TYPE, TY) \ > + MULT_INDEX (TYPE, TY) > + > +ALL_TESTS (uint8_t, u8) > +ALL_TESTS (uint16_t, u16) > +ALL_TESTS (uint32_t, u32) > +ALL_TESTS (uint64_t, u64) > +ALL_TESTS (int8_t, s8) > +ALL_TESTS (int16_t, s16) > +ALL_TESTS (int32_t, s32) > +ALL_TESTS (int64_t, s64) > + > +/* { dg-final { scan-tree-dump "return \\{ 10, 13, 16, ... \\}" 8 "optimized" } } */ > +/* { dg-final { scan-tree-dump "return \\{ 50, 65, 80, ... \\}" 8 "optimized" } } */ > -- > 2.44.0
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 1c17149e1f0..f6b1657ecbb 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1304,6 +1304,18 @@ public: class svindex_impl : public function_base { +public: + gimple * + fold (gimple_folder &f) const override + { + tree vec_type = TREE_TYPE (f.lhs); + tree base = gimple_call_arg (f.call, 0); + tree step = gimple_call_arg (f.call, 1); + + return gimple_build_assign (f.lhs, + build_vec_series (vec_type, base, step)); + } + public: rtx expand (function_expander &e) const override diff --git a/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c new file mode 100644 index 00000000000..f5e6c0f7a85 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +#include <arm_sve.h> +#include <stdint.h> + +#define INDEX_CONST(TYPE, TY) \ + sv##TYPE f_##TY##_index_const () \ + { \ + return svindex_##TY (10, 3); \ + } + +#define MULT_INDEX(TYPE, TY) \ + sv##TYPE f_##TY##_mult_index () \ + { \ + return svmul_x (svptrue_b8 (), \ + svindex_##TY (10, 3), \ + 5); \ + } + +#define ALL_TESTS(TYPE, TY) \ + INDEX_CONST (TYPE, TY) \ + MULT_INDEX (TYPE, TY) + +ALL_TESTS (uint8_t, u8) +ALL_TESTS (uint16_t, u16) +ALL_TESTS (uint32_t, u32) +ALL_TESTS (uint64_t, u64) +ALL_TESTS (int8_t, s8) +ALL_TESTS (int16_t, s16) +ALL_TESTS (int32_t, s32) +ALL_TESTS (int64_t, s64) + +/* { dg-final { scan-tree-dump "return \\{ 10, 13, 16, ... \\}" 8 "optimized" } } */ +/* { dg-final { scan-tree-dump "return \\{ 50, 65, 80, ... \\}" 8 "optimized" } } */
This patch folds svindex with constant arguments into a vector series. We implemented this in svindex_impl::fold using the function build_vec_series. For example, svuint64_t f1 () { return svindex_u642 (10, 3); } compiled with -O2 -march=armv8.2-a+sve, is folded to {10, 13, 16, ...} in the gimple pass lower. This optimization benefits cases where svindex is used in combination with other gimple-level optimizations. For example, svuint64_t f2 () { return svmul_x (svptrue_b64 (), svindex_u64 (10, 3), 5); } has previously been compiled to f2: index z0.d, #10, #3 mul z0.d, z0.d, #5 ret Now, it is compiled to f2: mov x0, 50 index z0.d, x0, #15 ret For non-constant arguments, build_vec_series produces a VEC_SERIES_EXPR, which is translated back at RTL level to an index instruction without codegen changes. We added test cases checking - the application of the transform during gimple for constant arguments, - the interaction with another gimple-level optimization. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svindex_impl::fold): Add constant folding. gcc/testsuite/ * gcc.target/aarch64/sve/index_const_fold.c: New test. --- .../aarch64/aarch64-sve-builtins-base.cc | 12 +++++++ .../gcc.target/aarch64/sve/index_const_fold.c | 35 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/index_const_fold.c