Message ID | CAFULd4b9AguC84O+rs0hW8hYsSywo1UJOyhFoBVK+C_KJGdDDw@mail.gmail.com |
---|---|
State | New |
Headers | show |
Series | [RFC] i386: Enable auto-vectorization for 32bit modes (+ testcases) | expand |
On Fri, May 21, 2021 at 5:00 PM Uros Bizjak via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Here it is, the patch that enables auto-vectorization for 32bit modes. > > Sent as RFC, because the patch fails some vectorizer scans, as it > obviously enables more vectorization to happen: > > Running target unix > FAIL: gcc.dg/vect/pr71264.c -flto -ffat-lto-objects scan-tree-dump > vect "vectorized 1 loops in function" > FAIL: gcc.dg/vect/pr71264.c scan-tree-dump vect "vectorized 1 loops in function" > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorized 1 loops" 1 > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorizing stmts using SLP" 1 > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorized 1 loops" 1 > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorizing > stmts using SLP" 1 > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorized 3 loops" 1 > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorizing stmts using SLP" 3 > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorized 3 loops" 1 > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorizing stmts > using SLP" 3 > > > Running target unix/-m32 > FAIL: gcc.dg/vect/no-vfa-vect-101.c scan-tree-dump-times vect "can't > determine dependence" 1 > FAIL: gcc.dg/vect/no-vfa-vect-102.c scan-tree-dump-times vect > "possible dependence between data-refs" 1 > FAIL: gcc.dg/vect/no-vfa-vect-102a.c scan-tree-dump-times vect > "possible dependence between data-refs" 1 > FAIL: gcc.dg/vect/no-vfa-vect-37.c scan-tree-dump-times vect "can't > determine dependence" 2 > FAIL: gcc.dg/vect/pr71264.c -flto -ffat-lto-objects scan-tree-dump > vect "vectorized 1 loops in function" > FAIL: gcc.dg/vect/pr71264.c scan-tree-dump vect "vectorized 1 loops in function" > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorized 1 loops" 1 > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorizing stmts using SLP" 1 > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorized 1 loops" 1 > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorizing > stmts using SLP" 1 > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorized 3 loops" 1 > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > scan-tree-dump-times vect "vectorizing stmts using SLP" 3 > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorized 3 loops" 1 > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorizing stmts > using SLP" 3 > FAIL: gcc.dg/vect/vect-104.c -flto -ffat-lto-objects > scan-tree-dump-times vect "possible dependence between data-refs" 1 > FAIL: gcc.dg/vect/vect-104.c scan-tree-dump-times vect "possible > dependence between data-refs" 1 Yeah, it's a bit iffy to adjust expectations. If there's a way to disable vectorization for 32bit modes on x86 that might be a way to "fix" them, otherwise we're lacking a way to query for available vector modes/sizes in the dejagnu vect targets. There's available_vector_sizes but it's implementation is hardly complete nor is size the only important thing (FP vs. INT). At least one could add a vect32 predicate similar to the existing vect64 one. Richard. > Please also note that V4QI and V2HI modes do not use MMX registers, so > auto-vectorization can also be enabled on 32bit x86 targets. > > Uros.
On Tue, May 25, 2021 at 4:29 PM Richard Biener <richard.guenther@gmail.com> wrote: > > On Fri, May 21, 2021 at 5:00 PM Uros Bizjak via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Here it is, the patch that enables auto-vectorization for 32bit modes. > > > > Sent as RFC, because the patch fails some vectorizer scans, as it > > obviously enables more vectorization to happen: > > > > Running target unix > > FAIL: gcc.dg/vect/pr71264.c -flto -ffat-lto-objects scan-tree-dump > > vect "vectorized 1 loops in function" > > FAIL: gcc.dg/vect/pr71264.c scan-tree-dump vect "vectorized 1 loops in function" > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorized 1 loops" 1 > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorizing stmts using SLP" 1 > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorized 1 loops" 1 > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorizing > > stmts using SLP" 1 > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorized 3 loops" 1 > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorizing stmts using SLP" 3 > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorized 3 loops" 1 > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorizing stmts > > using SLP" 3 > > > > > > Running target unix/-m32 > > FAIL: gcc.dg/vect/no-vfa-vect-101.c scan-tree-dump-times vect "can't > > determine dependence" 1 > > FAIL: gcc.dg/vect/no-vfa-vect-102.c scan-tree-dump-times vect > > "possible dependence between data-refs" 1 > > FAIL: gcc.dg/vect/no-vfa-vect-102a.c scan-tree-dump-times vect > > "possible dependence between data-refs" 1 > > FAIL: gcc.dg/vect/no-vfa-vect-37.c scan-tree-dump-times vect "can't > > determine dependence" 2 > > FAIL: gcc.dg/vect/pr71264.c -flto -ffat-lto-objects scan-tree-dump > > vect "vectorized 1 loops in function" > > FAIL: gcc.dg/vect/pr71264.c scan-tree-dump vect "vectorized 1 loops in function" > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorized 1 loops" 1 > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorizing stmts using SLP" 1 > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorized 1 loops" 1 > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorizing > > stmts using SLP" 1 > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorized 3 loops" 1 > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "vectorizing stmts using SLP" 3 > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorized 3 loops" 1 > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorizing stmts > > using SLP" 3 > > FAIL: gcc.dg/vect/vect-104.c -flto -ffat-lto-objects > > scan-tree-dump-times vect "possible dependence between data-refs" 1 > > FAIL: gcc.dg/vect/vect-104.c scan-tree-dump-times vect "possible > > dependence between data-refs" 1 > > Yeah, it's a bit iffy to adjust expectations. If there's a way to > disable vectorization > for 32bit modes on x86 that might be a way to "fix" them, otherwise we're > lacking a way to query for available vector modes/sizes in the dejagnu vect > targets. There's available_vector_sizes but it's implementation is hardly > complete nor is size the only important thing (FP vs. INT). At least > one could add a vect32 predicate similar to the existing vect64 one. I went the way you proposed above. By adding 32bit vector size to available_vector_sizes only two testcases fails. The attached patch fixes all vect scan failures (the remaining failure in vect_epilogues.c is just the case of missing uavg<mode>3_ceil pattern for V4QI epilogue vectorization - I plan to add the insn in the follow-up patch). The patch also xfails pr71264.c, the case of missing re-vectorization of 32bit vectors. WDYT? Uros. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 28e6113a609..04649b42122 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22190,12 +22190,15 @@ ix86_autovectorize_vector_modes (vector_modes *modes, bool all) modes->safe_push (V16QImode); modes->safe_push (V32QImode); } - else if (TARGET_MMX_WITH_SSE) + else if (TARGET_SSE2) modes->safe_push (V16QImode); if (TARGET_MMX_WITH_SSE) modes->safe_push (V8QImode); + if (TARGET_SSE2) + modes->safe_push (V4QImode); + return 0; } diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index cf3098749c0..16c6a3b8e99 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1740,6 +1740,12 @@ circumstances. @item vect_variable_length Target has variable-length vectors. +@item vect64 +Target supports vectors of 64 bits. + +@item vect32 +Target supports vectors of 32 bits. + @item vect_widen_sum_hi_to_si Target supports a vector widening summation of @code{short} operands into @code{int} results, or can promote (unpack) from @code{short} diff --git a/gcc/testsuite/gcc.dg/vect/pr71264.c b/gcc/testsuite/gcc.dg/vect/pr71264.c index dc849bf2797..1381e0ed132 100644 --- a/gcc/testsuite/gcc.dg/vect/pr71264.c +++ b/gcc/testsuite/gcc.dg/vect/pr71264.c @@ -19,5 +19,4 @@ void test(uint8_t *ptr, uint8_t *mask) } } -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { xfail s390*-*-* sparc*-*-* } } } */ - +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { xfail { { s390*-*-* sparc*-*-* } || vect32 } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-28.c b/gcc/testsuite/gcc.dg/vect/slp-28.c index 7778bad4465..0bb5f0eb0e4 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-28.c +++ b/gcc/testsuite/gcc.dg/vect/slp-28.c @@ -88,6 +88,7 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect32 } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target vect32 } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect32 } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-3.c b/gcc/testsuite/gcc.dg/vect/slp-3.c index 46ab584419a..80ded1840ad 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-3.c +++ b/gcc/testsuite/gcc.dg/vect/slp-3.c @@ -141,8 +141,8 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { ! vect_partial_vectors } } } } */ -/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target vect_partial_vectors } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target { ! vect_partial_vectors } } } }*/ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { ! { vect_partial_vectors || vect32 } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_partial_vectors || vect32 } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target { ! { vect_partial_vectors || vect32 } } } } }*/ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { vect_partial_vectors || vect32 } } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 849f1bbeda5..7f78c5593ac 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7626,6 +7626,7 @@ proc available_vector_sizes { } { if { ![is-effective-target ia32] } { lappend result 64 } + lappend result 32 } elseif { [istarget sparc*-*-*] } { lappend result 64 } elseif { [istarget amdgcn*-*-*] } { @@ -7655,6 +7656,12 @@ proc check_effective_target_vect64 { } { return [expr { [lsearch -exact [available_vector_sizes] 64] >= 0 }] } +# Return 1 if the target supports vectors of 32 bits. + +proc check_effective_target_vect32 { } { + return [expr { [lsearch -exact [available_vector_sizes] 32] >= 0 }] +} + # Return 1 if the target supports vector copysignf calls. proc check_effective_target_vect_call_copysignf { } {
On Wed, May 26, 2021 at 10:44 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Tue, May 25, 2021 at 4:29 PM Richard Biener > <richard.guenther@gmail.com> wrote: > > > > On Fri, May 21, 2021 at 5:00 PM Uros Bizjak via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > Here it is, the patch that enables auto-vectorization for 32bit modes. > > > > > > Sent as RFC, because the patch fails some vectorizer scans, as it > > > obviously enables more vectorization to happen: > > > > > > Running target unix > > > FAIL: gcc.dg/vect/pr71264.c -flto -ffat-lto-objects scan-tree-dump > > > vect "vectorized 1 loops in function" > > > FAIL: gcc.dg/vect/pr71264.c scan-tree-dump vect "vectorized 1 loops in function" > > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorized 1 loops" 1 > > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorizing stmts using SLP" 1 > > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorized 1 loops" 1 > > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorizing > > > stmts using SLP" 1 > > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorized 3 loops" 1 > > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorizing stmts using SLP" 3 > > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorized 3 loops" 1 > > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorizing stmts > > > using SLP" 3 > > > > > > > > > Running target unix/-m32 > > > FAIL: gcc.dg/vect/no-vfa-vect-101.c scan-tree-dump-times vect "can't > > > determine dependence" 1 > > > FAIL: gcc.dg/vect/no-vfa-vect-102.c scan-tree-dump-times vect > > > "possible dependence between data-refs" 1 > > > FAIL: gcc.dg/vect/no-vfa-vect-102a.c scan-tree-dump-times vect > > > "possible dependence between data-refs" 1 > > > FAIL: gcc.dg/vect/no-vfa-vect-37.c scan-tree-dump-times vect "can't > > > determine dependence" 2 > > > FAIL: gcc.dg/vect/pr71264.c -flto -ffat-lto-objects scan-tree-dump > > > vect "vectorized 1 loops in function" > > > FAIL: gcc.dg/vect/pr71264.c scan-tree-dump vect "vectorized 1 loops in function" > > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorized 1 loops" 1 > > > FAIL: gcc.dg/vect/slp-28.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorizing stmts using SLP" 1 > > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorized 1 loops" 1 > > > FAIL: gcc.dg/vect/slp-28.c scan-tree-dump-times vect "vectorizing > > > stmts using SLP" 1 > > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorized 3 loops" 1 > > > FAIL: gcc.dg/vect/slp-3.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "vectorizing stmts using SLP" 3 > > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorized 3 loops" 1 > > > FAIL: gcc.dg/vect/slp-3.c scan-tree-dump-times vect "vectorizing stmts > > > using SLP" 3 > > > FAIL: gcc.dg/vect/vect-104.c -flto -ffat-lto-objects > > > scan-tree-dump-times vect "possible dependence between data-refs" 1 > > > FAIL: gcc.dg/vect/vect-104.c scan-tree-dump-times vect "possible > > > dependence between data-refs" 1 > > > > Yeah, it's a bit iffy to adjust expectations. If there's a way to > > disable vectorization > > for 32bit modes on x86 that might be a way to "fix" them, otherwise we're > > lacking a way to query for available vector modes/sizes in the dejagnu vect > > targets. There's available_vector_sizes but it's implementation is hardly > > complete nor is size the only important thing (FP vs. INT). At least > > one could add a vect32 predicate similar to the existing vect64 one. > > I went the way you proposed above. By adding 32bit vector size to > available_vector_sizes only two testcases fails. The attached patch > fixes all vect scan failures (the remaining failure in > vect_epilogues.c is just the case of missing uavg<mode>3_ceil pattern > for V4QI epilogue vectorization - I plan to add the insn in the > follow-up patch). > > The patch also xfails pr71264.c, the case of missing re-vectorization > of 32bit vectors. > > WDYT? LGTM. Richard. > Uros.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f3b451835da..f43f3ba060e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22187,12 +22187,15 @@ ix86_autovectorize_vector_modes (vector_modes *modes, bool all) modes->safe_push (V16QImode); modes->safe_push (V32QImode); } - else if (TARGET_MMX_WITH_SSE) + else if (TARGET_SSE2) modes->safe_push (V16QImode); if (TARGET_MMX_WITH_SSE) modes->safe_push (V8QImode); + if (TARGET_SSE2) + modes->safe_push (V4QImode); + return 0; } diff --git a/gcc/testsuite/gcc.target/i386/pr100637-3b.c b/gcc/testsuite/gcc.target/i386/pr100637-3b.c new file mode 100644 index 00000000000..16df70059a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100637-3b.c @@ -0,0 +1,56 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse4" } */ + +char r[4], a[4], b[4]; +unsigned char ur[4], ua[4], ub[4]; + +void maxs (void) +{ + int i; + + for (i = 0; i < 4; i++) + r[i] = a[i] > b[i] ? a[i] : b[i]; +} + +/* { dg-final { scan-assembler "pmaxsb" } } */ + +void maxu (void) +{ + int i; + + for (i = 0; i < 4; i++) + ur[i] = ua[i] > ub[i] ? ua[i] : ub[i]; +} + +/* { dg-final { scan-assembler "pmaxub" } } */ + +void mins (void) +{ + int i; + + for (i = 0; i < 4; i++) + r[i] = a[i] < b[i] ? a[i] : b[i]; +} + +/* { dg-final { scan-assembler "pminsb" } } */ + +void minu (void) +{ + int i; + + for (i = 0; i < 4; i++) + ur[i] = ua[i] < ub[i] ? ua[i] : ub[i]; +} + +/* { dg-final { scan-assembler "pminub" } } */ + +void _abs (void) +{ + int i; + + for (i = 0; i < 4; i++) + r[i] = a[i] < 0 ? -a[i] : a[i]; +} + +/* { dg-final { scan-assembler "pabsb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100637-3w.c b/gcc/testsuite/gcc.target/i386/pr100637-3w.c new file mode 100644 index 00000000000..7f1882e7a56 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100637-3w.c @@ -0,0 +1,86 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse4" } */ + +short r[2], a[2], b[2]; +unsigned short ur[2], ua[2], ub[2]; + +void mulh (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = ((int) a[i] * b[i]) >> 16; +} + +/* { dg-final { scan-assembler "pmulhw" { xfail *-*-* } } } */ + +void mulhu (void) +{ + int i; + + for (i = 0; i < 2; i++) + ur[i] = ((unsigned int) ua[i] * ub[i]) >> 16; +} + +/* { dg-final { scan-assembler "pmulhuw" { xfail *-*-* } } } */ + +void mulhrs (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = ((((int) a[i] * b[i]) >> 14) + 1) >> 1; +} + +/* { dg-final { scan-assembler "pmulhrsw" } } */ + +void maxs (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = a[i] > b[i] ? a[i] : b[i]; +} + +/* { dg-final { scan-assembler "pmaxsw" } } */ + +void maxu (void) +{ + int i; + + for (i = 0; i < 2; i++) + ur[i] = ua[i] > ub[i] ? ua[i] : ub[i]; +} + +/* { dg-final { scan-assembler "pmaxuw" } } */ + +void mins (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = a[i] < b[i] ? a[i] : b[i]; +} + +/* { dg-final { scan-assembler "pminsw" } } */ + +void minu (void) +{ + int i; + + for (i = 0; i < 2; i++) + ur[i] = ua[i] < ub[i] ? ua[i] : ub[i]; +} + +/* { dg-final { scan-assembler "pminuw" } } */ + +void _abs (void) +{ + int i; + + for (i = 0; i < 2; i++) + r[i] = a[i] < 0 ? -a[i] : a[i]; +} + +/* { dg-final { scan-assembler "pabsw" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100637-4b.c b/gcc/testsuite/gcc.target/i386/pr100637-4b.c new file mode 100644 index 00000000000..198e3dd3352 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100637-4b.c @@ -0,0 +1,19 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +typedef char T; + +#define M 4 + +extern T a[M], b[M], s1[M], s2[M], r[M]; + +void foo (void) +{ + int j; + + for (j = 0; j < M; j++) + r[j] = (a[j] < b[j]) ? s1[j] : s2[j]; +} + +/* { dg-final { scan-assembler "pcmpgtb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100637-4w.c b/gcc/testsuite/gcc.target/i386/pr100637-4w.c new file mode 100644 index 00000000000..0f5dacce906 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100637-4w.c @@ -0,0 +1,19 @@ +/* PR target/100637 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +typedef short T; + +#define M 2 + +extern T a[M], b[M], s1[M], s2[M], r[M]; + +void foo (void) +{ + int j; + + for (j = 0; j < M; j++) + r[j] = (a[j] < b[j]) ? s1[j] : s2[j]; +} + +/* { dg-final { scan-assembler "pcmpgtw" { xfail *-*-* } } } */