diff mbox series

[x86] Check avx upper register for parallel.

Message ID 20240829073320.2188675-1-hongtao.liu@intel.com
State New
Headers show
Series [x86] Check avx upper register for parallel. | expand

Commit Message

liuhongt Aug. 29, 2024, 7:33 a.m. UTC
For function arguments/return, when it's BLK mode, it's put in a
parallel with an expr_list, and the expr_list contains the real mode
and registers.
Current ix86_check_avx_upper_register only checked for SSE_REG_P, and
failed to handle that. The patch extend the handle to each subrtx.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	PR target/116512
	* config/i386/i386.cc (ix86_avx_u128_mode_entry): Iterate
	each subrtx for potential rtx parallel to check avx upper
	register.
	(ix86_avx_u128_mode_exit): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr116512.c: New test.
---
 gcc/config/i386/i386.cc                  | 28 ++++++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr116512.c | 26 ++++++++++++++++++++++
 2 files changed, 50 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr116512.c

Comments

Uros Bizjak Aug. 29, 2024, 8:58 a.m. UTC | #1
On Thu, Aug 29, 2024 at 9:33 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> For function arguments/return, when it's BLK mode, it's put in a
> parallel with an expr_list, and the expr_list contains the real mode
> and registers.
> Current ix86_check_avx_upper_register only checked for SSE_REG_P, and
> failed to handle that. The patch extend the handle to each subrtx.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/116512
>         * config/i386/i386.cc (ix86_avx_u128_mode_entry): Iterate
>         each subrtx for potential rtx parallel to check avx upper
>         register.
>         (ix86_avx_u128_mode_exit): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr116512.c: New test.
> ---
>  gcc/config/i386/i386.cc                  | 28 ++++++++++++++++++++----
>  gcc/testsuite/gcc.target/i386/pr116512.c | 26 ++++++++++++++++++++++
>  2 files changed, 50 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr116512.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 224a78cc832..94d1a14056e 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -15148,8 +15148,18 @@ ix86_avx_u128_mode_entry (void)
>      {
>        rtx incoming = DECL_INCOMING_RTL (arg);
>
> -      if (incoming && ix86_check_avx_upper_register (incoming))
> -       return AVX_U128_DIRTY;
> +      if (incoming)
> +       {
> +         /* construct_container may return a parallel with expr_list
> +            which contains the real reg and mode  */
> +         subrtx_var_iterator::array_type array;
> +         FOR_EACH_SUBRTX_VAR (iter, array, incoming, ALL)
> +           {
> +             rtx x = *iter;
> +             if (ix86_check_avx_upper_register (x))
> +               return AVX_U128_DIRTY;
> +           }
> +       }
>      }

Can the above loop be a part of ix86_check_avx_upper_register, so this
function would scan the full RTX for avx upper register?

Uros,

>    return AVX_U128_CLEAN;
> @@ -15184,8 +15194,18 @@ ix86_avx_u128_mode_exit (void)
>
>    /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
>       or 512 bit modes used in the function return register. */
> -  if (reg && ix86_check_avx_upper_register (reg))
> -    return AVX_U128_DIRTY;
> +  if (reg)
> +    {
> +      /* construct_container may return a parallel with expr_list
> +        which contains the real reg and mode  */
> +      subrtx_var_iterator::array_type array;
> +      FOR_EACH_SUBRTX_VAR (iter, array, reg, ALL)
> +       {
> +         rtx x = *iter;
> +         if (ix86_check_avx_upper_register (x))
> +           return AVX_U128_DIRTY;
> +       }
> +    }
>
>    /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
>       modes used in function arguments, otherwise return AVX_U128_CLEAN.
> diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c
> new file mode 100644
> index 00000000000..c2bc6c91b64
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr116512.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64-v4 -O2" } */
> +/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
> +
> +#include <immintrin.h>
> +
> +struct B {
> +  union {
> +    __m512 f;
> +    __m512i s;
> +  };
> +};
> +
> +struct B foo(int n) {
> +  struct B res;
> +  res.s = _mm512_set1_epi32(n);
> +
> +  return res;
> +}
> +
> +__m512i bar(int n) {
> +  struct B res;
> +  res.s = _mm512_set1_epi32(n);
> +
> +  return res.s;
> +}
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 224a78cc832..94d1a14056e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -15148,8 +15148,18 @@  ix86_avx_u128_mode_entry (void)
     {
       rtx incoming = DECL_INCOMING_RTL (arg);
 
-      if (incoming && ix86_check_avx_upper_register (incoming))
-	return AVX_U128_DIRTY;
+      if (incoming)
+	{
+	  /* construct_container may return a parallel with expr_list
+	     which contains the real reg and mode  */
+	  subrtx_var_iterator::array_type array;
+	  FOR_EACH_SUBRTX_VAR (iter, array, incoming, ALL)
+	    {
+	      rtx x = *iter;
+	      if (ix86_check_avx_upper_register (x))
+		return AVX_U128_DIRTY;
+	    }
+	}
     }
 
   return AVX_U128_CLEAN;
@@ -15184,8 +15194,18 @@  ix86_avx_u128_mode_exit (void)
 
   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
      or 512 bit modes used in the function return register. */
-  if (reg && ix86_check_avx_upper_register (reg))
-    return AVX_U128_DIRTY;
+  if (reg)
+    {
+      /* construct_container may return a parallel with expr_list
+	 which contains the real reg and mode  */
+      subrtx_var_iterator::array_type array;
+      FOR_EACH_SUBRTX_VAR (iter, array, reg, ALL)
+	{
+	  rtx x = *iter;
+	  if (ix86_check_avx_upper_register (x))
+	    return AVX_U128_DIRTY;
+	}
+    }
 
   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
      modes used in function arguments, otherwise return AVX_U128_CLEAN.
diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c
new file mode 100644
index 00000000000..c2bc6c91b64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116512.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+
+struct B {
+  union {
+    __m512 f;
+    __m512i s;
+  };
+};
+
+struct B foo(int n) {
+  struct B res;
+  res.s = _mm512_set1_epi32(n);
+
+  return res;
+}
+
+__m512i bar(int n) {
+  struct B res;
+  res.s = _mm512_set1_epi32(n);
+
+  return res.s;
+}