Message ID | 20240829073320.2188675-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | [x86] Check avx upper register for parallel. | expand |
On Thu, Aug 29, 2024 at 9:33 AM liuhongt <hongtao.liu@intel.com> wrote: > > For function arguments/return, when it's BLK mode, it's put in a > parallel with an expr_list, and the expr_list contains the real mode > and registers. > Current ix86_check_avx_upper_register only checked for SSE_REG_P, and > failed to handle that. The patch extend the handle to each subrtx. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? > > gcc/ChangeLog: > > PR target/116512 > * config/i386/i386.cc (ix86_avx_u128_mode_entry): Iterate > each subrtx for potential rtx parallel to check avx upper > register. > (ix86_avx_u128_mode_exit): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr116512.c: New test. > --- > gcc/config/i386/i386.cc | 28 ++++++++++++++++++++---- > gcc/testsuite/gcc.target/i386/pr116512.c | 26 ++++++++++++++++++++++ > 2 files changed, 50 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr116512.c > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 224a78cc832..94d1a14056e 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -15148,8 +15148,18 @@ ix86_avx_u128_mode_entry (void) > { > rtx incoming = DECL_INCOMING_RTL (arg); > > - if (incoming && ix86_check_avx_upper_register (incoming)) > - return AVX_U128_DIRTY; > + if (incoming) > + { > + /* construct_container may return a parallel with expr_list > + which contains the real reg and mode */ > + subrtx_var_iterator::array_type array; > + FOR_EACH_SUBRTX_VAR (iter, array, incoming, ALL) > + { > + rtx x = *iter; > + if (ix86_check_avx_upper_register (x)) > + return AVX_U128_DIRTY; > + } > + } > } Can the above loop be a part of ix86_check_avx_upper_register, so this function would scan the full RTX for avx upper register? Uros, > return AVX_U128_CLEAN; > @@ -15184,8 +15194,18 @@ ix86_avx_u128_mode_exit (void) > > /* Exit mode is set to AVX_U128_DIRTY if there are 256bit > or 512 bit modes used in the function return register. */ > - if (reg && ix86_check_avx_upper_register (reg)) > - return AVX_U128_DIRTY; > + if (reg) > + { > + /* construct_container may return a parallel with expr_list > + which contains the real reg and mode */ > + subrtx_var_iterator::array_type array; > + FOR_EACH_SUBRTX_VAR (iter, array, reg, ALL) > + { > + rtx x = *iter; > + if (ix86_check_avx_upper_register (x)) > + return AVX_U128_DIRTY; > + } > + } > > /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit > modes used in function arguments, otherwise return AVX_U128_CLEAN. > diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c > new file mode 100644 > index 00000000000..c2bc6c91b64 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr116512.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64-v4 -O2" } */ > +/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ > + > +#include <immintrin.h> > + > +struct B { > + union { > + __m512 f; > + __m512i s; > + }; > +}; > + > +struct B foo(int n) { > + struct B res; > + res.s = _mm512_set1_epi32(n); > + > + return res; > +} > + > +__m512i bar(int n) { > + struct B res; > + res.s = _mm512_set1_epi32(n); > + > + return res.s; > +} > -- > 2.31.1 >
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 224a78cc832..94d1a14056e 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -15148,8 +15148,18 @@ ix86_avx_u128_mode_entry (void) { rtx incoming = DECL_INCOMING_RTL (arg); - if (incoming && ix86_check_avx_upper_register (incoming)) - return AVX_U128_DIRTY; + if (incoming) + { + /* construct_container may return a parallel with expr_list + which contains the real reg and mode */ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, incoming, ALL) + { + rtx x = *iter; + if (ix86_check_avx_upper_register (x)) + return AVX_U128_DIRTY; + } + } } return AVX_U128_CLEAN; @@ -15184,8 +15194,18 @@ ix86_avx_u128_mode_exit (void) /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512 bit modes used in the function return register. */ - if (reg && ix86_check_avx_upper_register (reg)) - return AVX_U128_DIRTY; + if (reg) + { + /* construct_container may return a parallel with expr_list + which contains the real reg and mode */ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, reg, ALL) + { + rtx x = *iter; + if (ix86_check_avx_upper_register (x)) + return AVX_U128_DIRTY; + } + } /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit modes used in function arguments, otherwise return AVX_U128_CLEAN. diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c new file mode 100644 index 00000000000..c2bc6c91b64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116512.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ + +#include <immintrin.h> + +struct B { + union { + __m512 f; + __m512i s; + }; +}; + +struct B foo(int n) { + struct B res; + res.s = _mm512_set1_epi32(n); + + return res; +} + +__m512i bar(int n) { + struct B res; + res.s = _mm512_set1_epi32(n); + + return res.s; +}