diff mbox series

middle-end: Fix GSI for gcond root [PR117140]

Message ID patch-18878-tamar@arm.com
State New
Headers show
Series middle-end: Fix GSI for gcond root [PR117140] | expand

Commit Message

Tamar Christina Oct. 18, 2024, 8:38 a.m. UTC
Hi All,

When finding the gsi to use for code of the root statements we should use the
one of the original statement rather than the gcond which may be inside a
pattern.

Without this the emitted instructions may be discarded later.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/117410
	* tree-vect-slp.cc (vectorize_slp_instance_root_stmt): Use gsi from
	original statement.

gcc/testsuite/ChangeLog:

	PR tree-optimization/117410
	* gcc.dg/vect/vect-early-break_129-pr117410.c: New test.

---




--

Comments

Sam James Oct. 18, 2024, 8:49 a.m. UTC | #1
Tamar Christina <tamar.christina@arm.com> writes:

> Hi All,
>
> When finding the gsi to use for code of the root statements we should use the
> one of the original statement rather than the gcond which may be inside a
> pattern.
>
> Without this the emitted instructions may be discarded later.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	PR tree-optimization/117410

Typo in the number here & in tests (missing a 1)

> 	* tree-vect-slp.cc (vectorize_slp_instance_root_stmt): Use gsi from
> 	original statement.
>
> gcc/testsuite/ChangeLog:
>
> 	PR tree-optimization/117410
> 	* gcc.dg/vect/vect-early-break_129-pr117410.c: New test.
>
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..eec7f8db40c7101c0b9aa6a7b87909bf76cf89fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
> @@ -0,0 +1,94 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +typedef signed char int8_t;
> +typedef short int int16_t;
> +typedef int int32_t;
> +typedef long long int int64_t;
> +typedef unsigned char uint8_t;
> +typedef short unsigned int uint16_t;
> +typedef unsigned int uint32_t;
> +typedef long long unsigned int uint64_t;
> +
> +void __attribute__ ((noinline, noclone))
> +test_1_TYPE1_uint32_t (uint16_t *__restrict f, uint32_t *__restrict d,
> +                       uint16_t x, uint16_t x2, uint32_t y, int n)
> +{
> +    for (int i = 0; i < n; ++i)
> +        {
> +            f[i * 2 + 0] = x;
> +            f[i * 2 + 1] = x2;
> +            d[i] = y;
> +        }
> +}
> +
> +void __attribute__ ((noinline, noclone))
> +test_1_TYPE1_int64_t (int32_t *__restrict f, int64_t *__restrict d, int32_t x,
> +                      int32_t x2, int64_t y, int n)
> +{
> +    for (int i = 0; i < n; ++i)
> +        {
> +            f[i * 2 + 0] = x;
> +            f[i * 2 + 1] = x2;
> +            d[i] = y;
> +        }
> +}
> +
> +int
> +main (void)
> +{
> +        // This part is necessary for ice to appear though running it by itself does not trigger an ICE
> +        int n_3_TYPE1_uint32_t = 32;
> +        uint16_t x_3_uint16_t = 233;
> +        uint16_t x2_3_uint16_t = 78;
> +        uint32_t y_3_uint32_t = 1234;
> +        uint16_t f_3_uint16_t[33 * 2 + 1] = { 0} ;
> +        uint32_t d_3_uint32_t[33] = { 0} ;
> +        test_1_TYPE1_uint32_t (f_3_uint16_t, d_3_uint32_t, x_3_uint16_t, x2_3_uint16_t, y_3_uint32_t, n_3_TYPE1_uint32_t);
> +        for (int i = 0;
> +                        i < n_3_TYPE1_uint32_t;
> +                        ++i) {
> +                if (f_3_uint16_t[i * 2 + 0] != x_3_uint16_t) __builtin_abort ();
> +                if (f_3_uint16_t[i * 2 + 1] != x2_3_uint16_t) __builtin_abort ();
> +                if (d_3_uint32_t[i] != y_3_uint32_t) __builtin_abort ();
> +        }
> +        for (int i = n_3_TYPE1_uint32_t;
> +                        i < n_3_TYPE1_uint32_t + 1;
> +                        ++i) {
> +                if (f_3_uint16_t[i * 2 + 0] != 0) __builtin_abort ();
> +                if (f_3_uint16_t[i * 2 + 1] != 0) __builtin_abort ();
> +                if (d_3_uint32_t[i] != 0) __builtin_abort ();
> +        }
> +    // If ran without the above section, a different ice appears. see below
> +    int n_3_TYPE1_int64_t = 32;
> +    int32_t x_3_int32_t = 233;
> +    int32_t x2_3_int32_t = 78;
> +    int64_t y_3_int64_t = 1234;
> +    int32_t f_3_int32_t[33 * 2 + 1] = { 0 };
> +    int64_t d_3_int64_t[33] = { 0 };
> +    test_1_TYPE1_int64_t (f_3_int32_t, d_3_int64_t, x_3_int32_t, x2_3_int32_t,
> +                          y_3_int64_t, n_3_TYPE1_int64_t);
> +    for (int i = 0; i < n_3_TYPE1_int64_t; ++i)
> +        {
> +            if (f_3_int32_t[i * 2 + 0] != x_3_int32_t)
> +                __builtin_abort ();
> +            if (f_3_int32_t[i * 2 + 1] != x2_3_int32_t)
> +                __builtin_abort ();
> +            if (d_3_int64_t[i] != y_3_int64_t)
> +                __builtin_abort ();
> +        }
> +
> +    for (int i = n_3_TYPE1_int64_t; i < n_3_TYPE1_int64_t + 1; ++i)
> +        {
> +            if (f_3_int32_t[i * 2 + 0] != 0)
> +                __builtin_abort ();
> +            if (f_3_int32_t[i * 2 + 1] != 0)
> +                __builtin_abort ();
> +            if (d_3_int64_t[i] != 0)
> +                __builtin_abort ();
> +        }
> +
> +    return 0;
> +}
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index d35c2ea02dcef1c295bf97adc6717a7294d3f61e..9276662fa0f1beaa6c90835ba0e34726bf55ae19 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -11167,7 +11167,7 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance i
>  	 can't support lane > 1 at this time.  */
>        gcc_assert (instance->root_stmts.length () == 1);
>        auto root_stmt_info = instance->root_stmts[0];
> -      auto last_stmt = STMT_VINFO_STMT (root_stmt_info);
> +      auto last_stmt = STMT_VINFO_STMT (vect_orig_stmt (root_stmt_info));
>        gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
>        gimple *vec_stmt = NULL;
>        gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());
Richard Biener Oct. 18, 2024, 9:29 a.m. UTC | #2
On Fri, 18 Oct 2024, Tamar Christina wrote:

> Hi All,
> 
> When finding the gsi to use for code of the root statements we should use the
> one of the original statement rather than the gcond which may be inside a
> pattern.
> 
> Without this the emitted instructions may be discarded later.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/117410
> 	* tree-vect-slp.cc (vectorize_slp_instance_root_stmt): Use gsi from
> 	original statement.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/117410
> 	* gcc.dg/vect/vect-early-break_129-pr117410.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..eec7f8db40c7101c0b9aa6a7b87909bf76cf89fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
> @@ -0,0 +1,94 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +typedef signed char int8_t;
> +typedef short int int16_t;
> +typedef int int32_t;
> +typedef long long int int64_t;
> +typedef unsigned char uint8_t;
> +typedef short unsigned int uint16_t;
> +typedef unsigned int uint32_t;
> +typedef long long unsigned int uint64_t;
> +
> +void __attribute__ ((noinline, noclone))
> +test_1_TYPE1_uint32_t (uint16_t *__restrict f, uint32_t *__restrict d,
> +                       uint16_t x, uint16_t x2, uint32_t y, int n)
> +{
> +    for (int i = 0; i < n; ++i)
> +        {
> +            f[i * 2 + 0] = x;
> +            f[i * 2 + 1] = x2;
> +            d[i] = y;
> +        }
> +}
> +
> +void __attribute__ ((noinline, noclone))
> +test_1_TYPE1_int64_t (int32_t *__restrict f, int64_t *__restrict d, int32_t x,
> +                      int32_t x2, int64_t y, int n)
> +{
> +    for (int i = 0; i < n; ++i)
> +        {
> +            f[i * 2 + 0] = x;
> +            f[i * 2 + 1] = x2;
> +            d[i] = y;
> +        }
> +}
> +
> +int
> +main (void)
> +{
> +        // This part is necessary for ice to appear though running it by itself does not trigger an ICE
> +        int n_3_TYPE1_uint32_t = 32;
> +        uint16_t x_3_uint16_t = 233;
> +        uint16_t x2_3_uint16_t = 78;
> +        uint32_t y_3_uint32_t = 1234;
> +        uint16_t f_3_uint16_t[33 * 2 + 1] = { 0} ;
> +        uint32_t d_3_uint32_t[33] = { 0} ;
> +        test_1_TYPE1_uint32_t (f_3_uint16_t, d_3_uint32_t, x_3_uint16_t, x2_3_uint16_t, y_3_uint32_t, n_3_TYPE1_uint32_t);
> +        for (int i = 0;
> +                        i < n_3_TYPE1_uint32_t;
> +                        ++i) {
> +                if (f_3_uint16_t[i * 2 + 0] != x_3_uint16_t) __builtin_abort ();
> +                if (f_3_uint16_t[i * 2 + 1] != x2_3_uint16_t) __builtin_abort ();
> +                if (d_3_uint32_t[i] != y_3_uint32_t) __builtin_abort ();
> +        }
> +        for (int i = n_3_TYPE1_uint32_t;
> +                        i < n_3_TYPE1_uint32_t + 1;
> +                        ++i) {
> +                if (f_3_uint16_t[i * 2 + 0] != 0) __builtin_abort ();
> +                if (f_3_uint16_t[i * 2 + 1] != 0) __builtin_abort ();
> +                if (d_3_uint32_t[i] != 0) __builtin_abort ();
> +        }
> +    // If ran without the above section, a different ice appears. see below
> +    int n_3_TYPE1_int64_t = 32;
> +    int32_t x_3_int32_t = 233;
> +    int32_t x2_3_int32_t = 78;
> +    int64_t y_3_int64_t = 1234;
> +    int32_t f_3_int32_t[33 * 2 + 1] = { 0 };
> +    int64_t d_3_int64_t[33] = { 0 };
> +    test_1_TYPE1_int64_t (f_3_int32_t, d_3_int64_t, x_3_int32_t, x2_3_int32_t,
> +                          y_3_int64_t, n_3_TYPE1_int64_t);
> +    for (int i = 0; i < n_3_TYPE1_int64_t; ++i)
> +        {
> +            if (f_3_int32_t[i * 2 + 0] != x_3_int32_t)
> +                __builtin_abort ();
> +            if (f_3_int32_t[i * 2 + 1] != x2_3_int32_t)
> +                __builtin_abort ();
> +            if (d_3_int64_t[i] != y_3_int64_t)
> +                __builtin_abort ();
> +        }
> +
> +    for (int i = n_3_TYPE1_int64_t; i < n_3_TYPE1_int64_t + 1; ++i)
> +        {
> +            if (f_3_int32_t[i * 2 + 0] != 0)
> +                __builtin_abort ();
> +            if (f_3_int32_t[i * 2 + 1] != 0)
> +                __builtin_abort ();
> +            if (d_3_int64_t[i] != 0)
> +                __builtin_abort ();
> +        }
> +
> +    return 0;
> +}
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index d35c2ea02dcef1c295bf97adc6717a7294d3f61e..9276662fa0f1beaa6c90835ba0e34726bf55ae19 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -11167,7 +11167,7 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance i
>  	 can't support lane > 1 at this time.  */
>        gcc_assert (instance->root_stmts.length () == 1);
>        auto root_stmt_info = instance->root_stmts[0];
> -      auto last_stmt = STMT_VINFO_STMT (root_stmt_info);
> +      auto last_stmt = STMT_VINFO_STMT (vect_orig_stmt (root_stmt_info));
>        gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
>        gimple *vec_stmt = NULL;
>        gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());
> 
> 
> 
> 
>
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
new file mode 100644
index 0000000000000000000000000000000000000000..eec7f8db40c7101c0b9aa6a7b87909bf76cf89fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117410.c
@@ -0,0 +1,94 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+typedef signed char int8_t;
+typedef short int int16_t;
+typedef int int32_t;
+typedef long long int int64_t;
+typedef unsigned char uint8_t;
+typedef short unsigned int uint16_t;
+typedef unsigned int uint32_t;
+typedef long long unsigned int uint64_t;
+
+void __attribute__ ((noinline, noclone))
+test_1_TYPE1_uint32_t (uint16_t *__restrict f, uint32_t *__restrict d,
+                       uint16_t x, uint16_t x2, uint32_t y, int n)
+{
+    for (int i = 0; i < n; ++i)
+        {
+            f[i * 2 + 0] = x;
+            f[i * 2 + 1] = x2;
+            d[i] = y;
+        }
+}
+
+void __attribute__ ((noinline, noclone))
+test_1_TYPE1_int64_t (int32_t *__restrict f, int64_t *__restrict d, int32_t x,
+                      int32_t x2, int64_t y, int n)
+{
+    for (int i = 0; i < n; ++i)
+        {
+            f[i * 2 + 0] = x;
+            f[i * 2 + 1] = x2;
+            d[i] = y;
+        }
+}
+
+int
+main (void)
+{
+        // This part is necessary for ice to appear though running it by itself does not trigger an ICE
+        int n_3_TYPE1_uint32_t = 32;
+        uint16_t x_3_uint16_t = 233;
+        uint16_t x2_3_uint16_t = 78;
+        uint32_t y_3_uint32_t = 1234;
+        uint16_t f_3_uint16_t[33 * 2 + 1] = { 0} ;
+        uint32_t d_3_uint32_t[33] = { 0} ;
+        test_1_TYPE1_uint32_t (f_3_uint16_t, d_3_uint32_t, x_3_uint16_t, x2_3_uint16_t, y_3_uint32_t, n_3_TYPE1_uint32_t);
+        for (int i = 0;
+                        i < n_3_TYPE1_uint32_t;
+                        ++i) {
+                if (f_3_uint16_t[i * 2 + 0] != x_3_uint16_t) __builtin_abort ();
+                if (f_3_uint16_t[i * 2 + 1] != x2_3_uint16_t) __builtin_abort ();
+                if (d_3_uint32_t[i] != y_3_uint32_t) __builtin_abort ();
+        }
+        for (int i = n_3_TYPE1_uint32_t;
+                        i < n_3_TYPE1_uint32_t + 1;
+                        ++i) {
+                if (f_3_uint16_t[i * 2 + 0] != 0) __builtin_abort ();
+                if (f_3_uint16_t[i * 2 + 1] != 0) __builtin_abort ();
+                if (d_3_uint32_t[i] != 0) __builtin_abort ();
+        }
+    // If ran without the above section, a different ice appears. see below
+    int n_3_TYPE1_int64_t = 32;
+    int32_t x_3_int32_t = 233;
+    int32_t x2_3_int32_t = 78;
+    int64_t y_3_int64_t = 1234;
+    int32_t f_3_int32_t[33 * 2 + 1] = { 0 };
+    int64_t d_3_int64_t[33] = { 0 };
+    test_1_TYPE1_int64_t (f_3_int32_t, d_3_int64_t, x_3_int32_t, x2_3_int32_t,
+                          y_3_int64_t, n_3_TYPE1_int64_t);
+    for (int i = 0; i < n_3_TYPE1_int64_t; ++i)
+        {
+            if (f_3_int32_t[i * 2 + 0] != x_3_int32_t)
+                __builtin_abort ();
+            if (f_3_int32_t[i * 2 + 1] != x2_3_int32_t)
+                __builtin_abort ();
+            if (d_3_int64_t[i] != y_3_int64_t)
+                __builtin_abort ();
+        }
+
+    for (int i = n_3_TYPE1_int64_t; i < n_3_TYPE1_int64_t + 1; ++i)
+        {
+            if (f_3_int32_t[i * 2 + 0] != 0)
+                __builtin_abort ();
+            if (f_3_int32_t[i * 2 + 1] != 0)
+                __builtin_abort ();
+            if (d_3_int64_t[i] != 0)
+                __builtin_abort ();
+        }
+
+    return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index d35c2ea02dcef1c295bf97adc6717a7294d3f61e..9276662fa0f1beaa6c90835ba0e34726bf55ae19 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -11167,7 +11167,7 @@  vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance i
 	 can't support lane > 1 at this time.  */
       gcc_assert (instance->root_stmts.length () == 1);
       auto root_stmt_info = instance->root_stmts[0];
-      auto last_stmt = STMT_VINFO_STMT (root_stmt_info);
+      auto last_stmt = STMT_VINFO_STMT (vect_orig_stmt (root_stmt_info));
       gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
       gimple *vec_stmt = NULL;
       gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());