Message ID | a8a4dc84-c867-441a-93d6-8a3c932f0fa6@gmail.com |
---|---|
State | New |
Headers | show |
Series | vect: Allow same precision for bit-precision conversions. | expand |
> Am 18.10.2023 um 16:19 schrieb Robin Dapp <rdapp.gcc@gmail.com>: > > Hi, > > even though there was no full conclusion yet I took the liberty of > just posting this as a patch in case of further discussion. > > In PR/111794 we miss a vectorization because on riscv type precision and > mode precision differ for mask types. We can still vectorize when > allowing assignments with the same precision for dest and source which > is what this patch does. > > Bootstrapped and regtested on x86, aarch64 and power10. No new failures > on riscv. It looks safe, thus OK. Richard. > Regards > Robin > > gcc/ChangeLog: > > PR/111794 > > * tree-vect-stmts.cc (vectorizable_assignment): Add > same-precision exception for dest and source. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/slp-mask-1.c: New test. > * gcc.target/riscv/rvv/autovec/slp-mask-run-1.c: New test. > --- > .../gcc.target/riscv/rvv/autovec/slp-mask-1.c | 18 +++++++++++ > .../riscv/rvv/autovec/slp-mask-run-1.c | 31 +++++++++++++++++++ > gcc/tree-vect-stmts.cc | 12 ++++--- > 3 files changed, 56 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c > new file mode 100644 > index 00000000000..ee1baa58d63 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-slp-details" } */ > + > +void > +__attribute__ ((noipa)) > +f (int *restrict x, short *restrict y, int *restrict res) > +{ > + res[0] = x[0] == 1 & y[0] == 2; > + res[1] = x[1] == 1 & y[1] == 2; > + res[2] = x[2] == 1 & y[2] == 2; > + res[3] = x[3] == 1 & y[3] == 2; > + res[4] = x[4] == 1 & y[4] == 2; > + res[5] = x[5] == 1 & y[5] == 2; > + res[6] = x[6] == 1 & y[6] == 2; > + res[7] = x[7] == 1 & y[7] == 2; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp2" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c > new file mode 100644 > index 00000000000..b3469c41c87 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c > @@ -0,0 +1,31 @@ > +/* { dg-do run { target { riscv_v } } } */ > +/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */ > + > +#include <malloc.h> > +#include <stdio.h> > + > +#include "slp-mask-1.c" > + > +#define SZ 8 > + > +__attribute__ ((optimize ("1"))) > +int main () > +{ > + int *a = malloc (SZ * sizeof (*a)); > + short *b = malloc (SZ * sizeof (*b)); > + int *res = malloc (SZ * sizeof (*res)); > + int *ref = malloc (SZ * sizeof (*ref)); > + > + for (int i = 0; i < SZ; i++) > + { > + a[i] = i & 1; > + b[i] = 2; > + ref[i] = a[i] == 1 & b[i] == 2; > + } > + > + f (a, b, res); > + > + for (int i = 0; i < SZ; i++) > + if (res[i] != ref[i]) > + __builtin_abort (); > +} > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index cd7c1090d88..e612da6c492 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -6084,14 +6084,16 @@ vectorizable_assignment (vec_info *vinfo, > /* But a conversion that does not change the bit-pattern is ok. */ > && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) > && INTEGRAL_TYPE_P (TREE_TYPE (op)) > - && (TYPE_PRECISION (TREE_TYPE (scalar_dest)) > + && (((TYPE_PRECISION (TREE_TYPE (scalar_dest)) > > TYPE_PRECISION (TREE_TYPE (op))) > - && TYPE_UNSIGNED (TREE_TYPE (op)))) > + && TYPE_UNSIGNED (TREE_TYPE (op))) > + || (TYPE_PRECISION (TREE_TYPE (scalar_dest)) > + == TYPE_PRECISION (TREE_TYPE (op)))))) > { > if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "type conversion to/from bit-precision " > - "unsupported.\n"); > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "type conversion to/from bit-precision " > + "unsupported.\n"); > return false; > } > > -- > 2.41.0 >
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c new file mode 100644 index 00000000000..ee1baa58d63 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-slp-details" } */ + +void +__attribute__ ((noipa)) +f (int *restrict x, short *restrict y, int *restrict res) +{ + res[0] = x[0] == 1 & y[0] == 2; + res[1] = x[1] == 1 & y[1] == 2; + res[2] = x[2] == 1 & y[2] == 2; + res[3] = x[3] == 1 & y[3] == 2; + res[4] = x[4] == 1 & y[4] == 2; + res[5] = x[5] == 1 & y[5] == 2; + res[6] = x[6] == 1 & y[6] == 2; + res[7] = x[7] == 1 & y[7] == 2; +} + +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp2" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c new file mode 100644 index 00000000000..b3469c41c87 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c @@ -0,0 +1,31 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */ + +#include <malloc.h> +#include <stdio.h> + +#include "slp-mask-1.c" + +#define SZ 8 + +__attribute__ ((optimize ("1"))) +int main () +{ + int *a = malloc (SZ * sizeof (*a)); + short *b = malloc (SZ * sizeof (*b)); + int *res = malloc (SZ * sizeof (*res)); + int *ref = malloc (SZ * sizeof (*ref)); + + for (int i = 0; i < SZ; i++) + { + a[i] = i & 1; + b[i] = 2; + ref[i] = a[i] == 1 & b[i] == 2; + } + + f (a, b, res); + + for (int i = 0; i < SZ; i++) + if (res[i] != ref[i]) + __builtin_abort (); +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index cd7c1090d88..e612da6c492 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -6084,14 +6084,16 @@ vectorizable_assignment (vec_info *vinfo, /* But a conversion that does not change the bit-pattern is ok. */ && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op)) - && (TYPE_PRECISION (TREE_TYPE (scalar_dest)) + && (((TYPE_PRECISION (TREE_TYPE (scalar_dest)) > TYPE_PRECISION (TREE_TYPE (op))) - && TYPE_UNSIGNED (TREE_TYPE (op)))) + && TYPE_UNSIGNED (TREE_TYPE (op))) + || (TYPE_PRECISION (TREE_TYPE (scalar_dest)) + == TYPE_PRECISION (TREE_TYPE (op)))))) { if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "type conversion to/from bit-precision " - "unsupported.\n"); + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "type conversion to/from bit-precision " + "unsupported.\n"); return false; }