Message ID | 8bcdf719-97ac-4a7c-bcd9-f04d28c345a0@gmail.com |
---|---|
State | New |
Headers | show |
Series | match: Fix vcond into conditional op folding [PR113607]. | expand |
On Wed, Jan 31, 2024 at 12:50 PM Robin Dapp <rdapp.gcc@gmail.com> wrote: > > Hi, > > in PR113607 we see an invalid fold of > > _429 = .COND_SHL (mask_patt_205.47_276, vect_cst__262, vect_cst__262, { 0, ... }); > vect_prephitmp_129.51_282 = _429; > vect_iftmp.55_287 = VEC_COND_EXPR <mask_patt_209.54_286, vect_prephitmp_129.51_282, vect_cst__262>; > > to > > Applying pattern match.pd:9607, gimple-match-10.cc:3817 > gimple_simplified to vect_iftmp.55_287 = .COND_SHL (mask_patt_205.47_276, vect_cst__262, vect_cst__262, { 0, ... }); > > where we essentially use COND_SHL's else instead of VEC_COND_EXPR's. > > This patch adjusts the corresponding match.pd pattern and makes it only > match when the else values are the same. > > That, however, causes the exact test case to fail which this pattern > was introduced for. XFAIL it for now. > > Bootstrapped and regtested on x86. Regtested on riscv. aarch64 > is still running. OK. Thanks, Richard. > Regards > Robin > > > gcc/ChangeLog: > > PR middle-end/113607 > > * match.pd: Make sure else values match when folding a > vec_cond into a conditional operation. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/sve/pre_cond_share_1.c: XFAIL. > * gcc.target/riscv/rvv/autovec/pr113607-run.c: New test. > * gcc.target/riscv/rvv/autovec/pr113607.c: New test. > --- > gcc/match.pd | 8 +-- > .../gcc.target/aarch64/sve/pre_cond_share_1.c | 2 +- > .../riscv/rvv/autovec/pr113607-run.c | 4 ++ > .../gcc.target/riscv/rvv/autovec/pr113607.c | 49 +++++++++++++++++++ > 4 files changed, 58 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index e42ecaf9ec7..7c391a8fe20 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -9592,18 +9592,18 @@ and, > > /* Detect simplification for vector condition folding where > > - c = mask1 ? (masked_op mask2 a b) : b > + c = mask1 ? (masked_op mask2 a b els) : els > > into > > - c = masked_op (mask1 & mask2) a b > + c = masked_op (mask1 & mask2) a b els > > where the operation can be partially applied to one operand. */ > > (for cond_op (COND_BINARY) > (simplify > (vec_cond @0 > - (cond_op:s @1 @2 @3 @4) @3) > + (cond_op:s @1 @2 @3 @4) @4) > (cond_op (bit_and @1 @0) @2 @3 @4))) > > /* And same for ternary expressions. */ > @@ -9611,7 +9611,7 @@ and, > (for cond_op (COND_TERNARY) > (simplify > (vec_cond @0 > - (cond_op:s @1 @2 @3 @4 @5) @4) > + (cond_op:s @1 @2 @3 @4 @5) @5) > (cond_op (bit_and @1 @0) @2 @3 @4 @5))) > > /* For pointers @0 and @2 and nonnegative constant offset @1, look for > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > index b51d0f298ea..e4f754d739c 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c > @@ -129,4 +129,4 @@ fasten_main(size_t group, size_t ntypes, size_t nposes, size_t natlig, size_t na > } > > /* { dg-final { scan-tree-dump-times {\.COND_MUL} 1 "optimized" } } */ > -/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" { xfail *-*-* } } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c > new file mode 100644 > index 00000000000..06074767ce5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c > @@ -0,0 +1,4 @@ > +/* { dg-do run { target { riscv_v && rv64 } } } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fdump-tree-optimized" } */ > + > +#include "pr113607.c" > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c > new file mode 100644 > index 00000000000..70a93665497 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c > @@ -0,0 +1,49 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fdump-tree-optimized" } */ > + > +struct { > + signed b; > +} c, d = {6}; > + > +short e, f; > +int g[1000]; > +signed char h; > +int i, j; > +long k, l; > + > +long m(long n, long o) { > + if (n < 1 && o == 0) > + return 0; > + return n; > +} > + > +static int p() { > + long q = 0; > + int a = 0; > + for (; e < 2; e += 1) > + g[e * 7 + 1] = -1; > + for (; h < 1; h += 1) { > + k = g[8] || f; > + l = m(g[f * 7 + 1], k); > + a = l; > + j = a < 0 || g[f * 7 + 1] < 0 || g[f * 7 + 1] >= 32 ? a : a << g[f * 7 + 1]; > + if (j) > + ++q; > + } > + if (q) > + c = d; > + return i; > +} > + > +int main() { > + p(); > + if (c.b != 6) > + __builtin_abort (); > +} > + > +/* We must not fold VEC_COND_EXPR into COND_SHL. > + Therefore, make sure that we still have 2/4 VCOND_MASKs with real else > + value. */ > + > +/* { dg-final { scan-tree-dump-times { = \.VCOND_MASK.\([a-z0-9\._]+, [a-z0-9\._\{\}, ]+, [0-9\.\{\},]+\);} 0 "optimized" } } */ > +/* { dg-final { scan-tree-dump-times { = \.VCOND_MASK.\([a-z0-9\._]+, [a-z0-9\._\{\}, ]+, [a-z0-9\._]+\);} 4 "optimized" } } */ > -- > 2.43.0
diff --git a/gcc/match.pd b/gcc/match.pd index e42ecaf9ec7..7c391a8fe20 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -9592,18 +9592,18 @@ and, /* Detect simplification for vector condition folding where - c = mask1 ? (masked_op mask2 a b) : b + c = mask1 ? (masked_op mask2 a b els) : els into - c = masked_op (mask1 & mask2) a b + c = masked_op (mask1 & mask2) a b els where the operation can be partially applied to one operand. */ (for cond_op (COND_BINARY) (simplify (vec_cond @0 - (cond_op:s @1 @2 @3 @4) @3) + (cond_op:s @1 @2 @3 @4) @4) (cond_op (bit_and @1 @0) @2 @3 @4))) /* And same for ternary expressions. */ @@ -9611,7 +9611,7 @@ and, (for cond_op (COND_TERNARY) (simplify (vec_cond @0 - (cond_op:s @1 @2 @3 @4 @5) @4) + (cond_op:s @1 @2 @3 @4 @5) @5) (cond_op (bit_and @1 @0) @2 @3 @4 @5))) /* For pointers @0 and @2 and nonnegative constant offset @1, look for diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c index b51d0f298ea..e4f754d739c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c @@ -129,4 +129,4 @@ fasten_main(size_t group, size_t ntypes, size_t nposes, size_t natlig, size_t na } /* { dg-final { scan-tree-dump-times {\.COND_MUL} 1 "optimized" } } */ -/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c new file mode 100644 index 00000000000..06074767ce5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607-run.c @@ -0,0 +1,4 @@ +/* { dg-do run { target { riscv_v && rv64 } } } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fdump-tree-optimized" } */ + +#include "pr113607.c" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c new file mode 100644 index 00000000000..70a93665497 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113607.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fdump-tree-optimized" } */ + +struct { + signed b; +} c, d = {6}; + +short e, f; +int g[1000]; +signed char h; +int i, j; +long k, l; + +long m(long n, long o) { + if (n < 1 && o == 0) + return 0; + return n; +} + +static int p() { + long q = 0; + int a = 0; + for (; e < 2; e += 1) + g[e * 7 + 1] = -1; + for (; h < 1; h += 1) { + k = g[8] || f; + l = m(g[f * 7 + 1], k); + a = l; + j = a < 0 || g[f * 7 + 1] < 0 || g[f * 7 + 1] >= 32 ? a : a << g[f * 7 + 1]; + if (j) + ++q; + } + if (q) + c = d; + return i; +} + +int main() { + p(); + if (c.b != 6) + __builtin_abort (); +} + +/* We must not fold VEC_COND_EXPR into COND_SHL. + Therefore, make sure that we still have 2/4 VCOND_MASKs with real else + value. */ + +/* { dg-final { scan-tree-dump-times { = \.VCOND_MASK.\([a-z0-9\._]+, [a-z0-9\._\{\}, ]+, [0-9\.\{\},]+\);} 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times { = \.VCOND_MASK.\([a-z0-9\._]+, [a-z0-9\._\{\}, ]+, [a-z0-9\._]+\);} 4 "optimized" } } */