Message ID | 20220123122816.345498-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2] x86: Also check mode of memory broadcast in bcst_mem_operand | expand |
On Sun, Jan 23, 2022 at 8:28 PM H.J. Lu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Return false for invalid mode on memory broadcast in bcst_mem_operand: > > (vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ]))) > Yes, thanks. > gcc/ > > PR target/104188 > * config/i386/predicates.md (bcst_mem_operand): Also check mode > of memory broadcast. > > gcc/testsuite/ > > PR target/104188 > * gcc.target/i386/pr104188.c: New test. > --- > gcc/config/i386/predicates.md | 2 + > gcc/testsuite/gcc.target/i386/pr104188.c | 70 ++++++++++++++++++++++++ > 2 files changed, 72 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/pr104188.c > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index eae6ab58e23..a8cc17a054d 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -1157,6 +1157,8 @@ (define_predicate "bcst_mem_operand" > (ior (match_test "TARGET_AVX512VL") > (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64"))) > (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))") > + (match_test "GET_MODE (XEXP (op, 0)) > + == GET_MODE_INNER (GET_MODE (op))") > (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))"))) > > ; Return true when OP is bcst_mem_operand or vector_memory_operand. > diff --git a/gcc/testsuite/gcc.target/i386/pr104188.c b/gcc/testsuite/gcc.target/i386/pr104188.c > new file mode 100644 > index 00000000000..c6f615b9625 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr104188.c > @@ -0,0 +1,70 @@ > +/* { dg-do run { target avx512f } } */ > +/* { dg-options "-O2 -mfpmath=sse" } */ > + > +#include <x86intrin.h> > + > +union U { > + float m[4][4]; > + __m128 r[4]; > + __m512 s; > +}; > + > +__attribute__((noipa, target("avx512f"))) > +void > +foo (union U *x, union U *a, union U *b) > +{ > + __m512 c = _mm512_loadu_ps (&a->s); > + __m512 d = _mm512_broadcast_f32x4 (b->r[0]); > + __m512 e = _mm512_broadcast_f32x4 (b->r[1]); > + __m512 f = _mm512_broadcast_f32x4 (b->r[2]); > + __m512 g = _mm512_broadcast_f32x4 (b->r[3]); > + __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d); > + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h); > + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h); > + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h); > + _mm512_storeu_ps (&x->s, h); > +} > + > +__attribute__((noipa, target("avx512f"))) > +void > +do_test (void) > +{ > + union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f }, > + { 5.0f, 6.0f, 7.0f, 8.0f }, > + { 9.0f, 10.0f, 11.0f, 12.0f }, > + { 13.0f, 14.0f, 15.0f, 16.0f } } }; > + union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f }, > + { 21.0f, 22.0f, 23.0f, 24.0f }, > + { 25.0f, 26.0f, 27.0f, 28.0f }, > + { 29.0f, 30.0f, 31.0f, 32.0f } } }; > + union U c; > + foo (&c, &a, &b); > + if (c.m[0][0] != 250.0f > + || c.m[0][1] != 260.0f > + || c.m[0][2] != 270.0f > + || c.m[0][3] != 280.0f) > + __builtin_abort (); > + if (c.m[1][0] != 618.0f > + || c.m[1][1] != 644.0f > + || c.m[1][2] != 670.0f > + || c.m[1][3] != 696.0f) > + __builtin_abort (); > + if (c.m[2][0] != 986.0f > + || c.m[2][1] != 1028.0f > + || c.m[2][2] != 1070.0f > + || c.m[2][3] != 1112.0f) > + __builtin_abort (); > + if (c.m[3][0] != 1354.0f > + || c.m[3][1] != 1412.0f > + || c.m[3][2] != 1470.0f > + || c.m[3][3] != 1528.0f) > + __builtin_abort (); > +} > + > +int > +main () > +{ > + if (__builtin_cpu_supports ("avx512f")) > + do_test (); > + return 0; > +} > -- > 2.34.1 >
On Sun, Jan 23, 2022 at 4:35 PM Hongtao Liu <crazylht@gmail.com> wrote: > > On Sun, Jan 23, 2022 at 8:28 PM H.J. Lu via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Return false for invalid mode on memory broadcast in bcst_mem_operand: > > > > (vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ]))) > > > Yes, thanks. I will also backport it to GCC 11 branch. Thanks. > > gcc/ > > > > PR target/104188 > > * config/i386/predicates.md (bcst_mem_operand): Also check mode > > of memory broadcast. > > > > gcc/testsuite/ > > > > PR target/104188 > > * gcc.target/i386/pr104188.c: New test. > > --- > > gcc/config/i386/predicates.md | 2 + > > gcc/testsuite/gcc.target/i386/pr104188.c | 70 ++++++++++++++++++++++++ > > 2 files changed, 72 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr104188.c > > > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > > index eae6ab58e23..a8cc17a054d 100644 > > --- a/gcc/config/i386/predicates.md > > +++ b/gcc/config/i386/predicates.md > > @@ -1157,6 +1157,8 @@ (define_predicate "bcst_mem_operand" > > (ior (match_test "TARGET_AVX512VL") > > (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64"))) > > (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))") > > + (match_test "GET_MODE (XEXP (op, 0)) > > + == GET_MODE_INNER (GET_MODE (op))") > > (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))"))) > > > > ; Return true when OP is bcst_mem_operand or vector_memory_operand. > > diff --git a/gcc/testsuite/gcc.target/i386/pr104188.c b/gcc/testsuite/gcc.target/i386/pr104188.c > > new file mode 100644 > > index 00000000000..c6f615b9625 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr104188.c > > @@ -0,0 +1,70 @@ > > +/* { dg-do run { target avx512f } } */ > > +/* { dg-options "-O2 -mfpmath=sse" } */ > > + > > +#include <x86intrin.h> > > + > > +union U { > > + float m[4][4]; > > + __m128 r[4]; > > + __m512 s; > > +}; > > + > > +__attribute__((noipa, target("avx512f"))) > > +void > > +foo (union U *x, union U *a, union U *b) > > +{ > > + __m512 c = _mm512_loadu_ps (&a->s); > > + __m512 d = _mm512_broadcast_f32x4 (b->r[0]); > > + __m512 e = _mm512_broadcast_f32x4 (b->r[1]); > > + __m512 f = _mm512_broadcast_f32x4 (b->r[2]); > > + __m512 g = _mm512_broadcast_f32x4 (b->r[3]); > > + __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d); > > + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h); > > + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h); > > + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h); > > + _mm512_storeu_ps (&x->s, h); > > +} > > + > > +__attribute__((noipa, target("avx512f"))) > > +void > > +do_test (void) > > +{ > > + union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f }, > > + { 5.0f, 6.0f, 7.0f, 8.0f }, > > + { 9.0f, 10.0f, 11.0f, 12.0f }, > > + { 13.0f, 14.0f, 15.0f, 16.0f } } }; > > + union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f }, > > + { 21.0f, 22.0f, 23.0f, 24.0f }, > > + { 25.0f, 26.0f, 27.0f, 28.0f }, > > + { 29.0f, 30.0f, 31.0f, 32.0f } } }; > > + union U c; > > + foo (&c, &a, &b); > > + if (c.m[0][0] != 250.0f > > + || c.m[0][1] != 260.0f > > + || c.m[0][2] != 270.0f > > + || c.m[0][3] != 280.0f) > > + __builtin_abort (); > > + if (c.m[1][0] != 618.0f > > + || c.m[1][1] != 644.0f > > + || c.m[1][2] != 670.0f > > + || c.m[1][3] != 696.0f) > > + __builtin_abort (); > > + if (c.m[2][0] != 986.0f > > + || c.m[2][1] != 1028.0f > > + || c.m[2][2] != 1070.0f > > + || c.m[2][3] != 1112.0f) > > + __builtin_abort (); > > + if (c.m[3][0] != 1354.0f > > + || c.m[3][1] != 1412.0f > > + || c.m[3][2] != 1470.0f > > + || c.m[3][3] != 1528.0f) > > + __builtin_abort (); > > +} > > + > > +int > > +main () > > +{ > > + if (__builtin_cpu_supports ("avx512f")) > > + do_test (); > > + return 0; > > +} > > -- > > 2.34.1 > > > > > -- > BR, > Hongtao
On Sun, Jan 23, 2022 at 04:39:34PM -0800, H.J. Lu via Gcc-patches wrote: > On Sun, Jan 23, 2022 at 4:35 PM Hongtao Liu <crazylht@gmail.com> wrote: > > > > On Sun, Jan 23, 2022 at 8:28 PM H.J. Lu via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > Return false for invalid mode on memory broadcast in bcst_mem_operand: > > > > > > (vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ]))) > > > > > Yes, thanks. > > I will also backport it to GCC 11 branch. On i686-linux this new testcase FAILs with: cc1: warning: SSE instruction set disabled, using 387 arithmetics FAIL: gcc.target/i386/pr104188.c (test for excess errors) Excess errors: cc1: warning: SSE instruction set disabled, using 387 arithmetics This is because it uses -mfpmath=sse, but -msse2 isn't on. Fixed by adding -msse2 to dg-options and requiring sse2_runtime effective target. Tested on x86_64-linux and i686-linux, committed as obvious to trunk/11: 2022-01-26 Jakub Jelinek <jakub@redhat.com> PR target/104188 * gcc.target/i386/pr104188.c: Add dg-require-effective-target sse2_runtime. Add -msse2 to dg-options. --- gcc/testsuite/gcc.target/i386/pr104188.c.jj 2022-01-24 10:18:21.174512441 +0100 +++ gcc/testsuite/gcc.target/i386/pr104188.c 2022-01-26 11:54:58.025950692 +0100 @@ -1,5 +1,6 @@ /* { dg-do run { target avx512f } } */ -/* { dg-options "-O2 -mfpmath=sse" } */ +/* { dg-require-effective-target sse2_runtime } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse" } */ #include <x86intrin.h> Jakub
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index eae6ab58e23..a8cc17a054d 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1157,6 +1157,8 @@ (define_predicate "bcst_mem_operand" (ior (match_test "TARGET_AVX512VL") (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64"))) (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))") + (match_test "GET_MODE (XEXP (op, 0)) + == GET_MODE_INNER (GET_MODE (op))") (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))"))) ; Return true when OP is bcst_mem_operand or vector_memory_operand. diff --git a/gcc/testsuite/gcc.target/i386/pr104188.c b/gcc/testsuite/gcc.target/i386/pr104188.c new file mode 100644 index 00000000000..c6f615b9625 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr104188.c @@ -0,0 +1,70 @@ +/* { dg-do run { target avx512f } } */ +/* { dg-options "-O2 -mfpmath=sse" } */ + +#include <x86intrin.h> + +union U { + float m[4][4]; + __m128 r[4]; + __m512 s; +}; + +__attribute__((noipa, target("avx512f"))) +void +foo (union U *x, union U *a, union U *b) +{ + __m512 c = _mm512_loadu_ps (&a->s); + __m512 d = _mm512_broadcast_f32x4 (b->r[0]); + __m512 e = _mm512_broadcast_f32x4 (b->r[1]); + __m512 f = _mm512_broadcast_f32x4 (b->r[2]); + __m512 g = _mm512_broadcast_f32x4 (b->r[3]); + __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d); + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h); + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h); + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h); + _mm512_storeu_ps (&x->s, h); +} + +__attribute__((noipa, target("avx512f"))) +void +do_test (void) +{ + union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f }, + { 5.0f, 6.0f, 7.0f, 8.0f }, + { 9.0f, 10.0f, 11.0f, 12.0f }, + { 13.0f, 14.0f, 15.0f, 16.0f } } }; + union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f }, + { 21.0f, 22.0f, 23.0f, 24.0f }, + { 25.0f, 26.0f, 27.0f, 28.0f }, + { 29.0f, 30.0f, 31.0f, 32.0f } } }; + union U c; + foo (&c, &a, &b); + if (c.m[0][0] != 250.0f + || c.m[0][1] != 260.0f + || c.m[0][2] != 270.0f + || c.m[0][3] != 280.0f) + __builtin_abort (); + if (c.m[1][0] != 618.0f + || c.m[1][1] != 644.0f + || c.m[1][2] != 670.0f + || c.m[1][3] != 696.0f) + __builtin_abort (); + if (c.m[2][0] != 986.0f + || c.m[2][1] != 1028.0f + || c.m[2][2] != 1070.0f + || c.m[2][3] != 1112.0f) + __builtin_abort (); + if (c.m[3][0] != 1354.0f + || c.m[3][1] != 1412.0f + || c.m[3][2] != 1470.0f + || c.m[3][3] != 1528.0f) + __builtin_abort (); +} + +int +main () +{ + if (__builtin_cpu_supports ("avx512f")) + do_test (); + return 0; +}