diff mbox series

[v2] tree-optimization/117484 - issue with SLP discovery of permuted .MASK_LOAD

Message ID 20241111125403.303903858C56@sourceware.org
State New
Headers show
Series [v2] tree-optimization/117484 - issue with SLP discovery of permuted .MASK_LOAD | expand

Commit Message

Richard Biener Nov. 11, 2024, 12:53 p.m. UTC
When we do SLP discovery of a .MASK_LOAD for a dataref group with gaps
the discovery for the mask will have gaps as well and this was
unexpected in a few places.  The following re-organizes things
slightly to accomodate for this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

	PR tree-optimization/117484
	* tree-vect-slp.cc (vect_build_slp_tree_2): Handle gaps in
	mask discovery.  Fix condition to release the load permutation.
	(vect_lower_load_permutations): Assert we get no load
	permutation for the unpermuted node.
	* tree-vect-slp-patterns.cc (linear_loads_p): Properly identify
	loads (without permutation).
	(compatible_complex_nodes_p): Likewise.

	* gcc.dg/vect/pr117484-1.c: New testcase.
	* gcc.dg/vect/pr117484-2.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/pr117484-1.c | 13 +++++++++++++
 gcc/testsuite/gcc.dg/vect/pr117484-2.c | 16 ++++++++++++++++
 gcc/tree-vect-slp-patterns.cc          | 14 ++++++++++----
 gcc/tree-vect-slp.cc                   | 22 +++++++++++++---------
 4 files changed, 52 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-2.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-1.c b/gcc/testsuite/gcc.dg/vect/pr117484-1.c
new file mode 100644
index 00000000000..453556c50f9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr117484-1.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+
+extern int a;
+extern short b[];
+extern signed char c[], d[];
+int main()
+{
+  for (long j = 3; j < 1024; j += 3)
+    if (c[j] ? b[j] : 0) {
+      b[j] = d[j - 2];
+      a = d[j];
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-2.c b/gcc/testsuite/gcc.dg/vect/pr117484-2.c
new file mode 100644
index 00000000000..baffe7597ba
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr117484-2.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+
+int a;
+extern int d[];
+extern int b[];
+extern _Bool c[];
+extern char h[];
+int main()
+{
+  for (int i = 0; i < 1024; i += 4)
+    if (h[i] || c[i])
+      {
+	a = d[i];
+	b[i] = d[i - 3];
+      }
+}
diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc
index 8adae8a6ec0..d62682be43c 100644
--- a/gcc/tree-vect-slp-patterns.cc
+++ b/gcc/tree-vect-slp-patterns.cc
@@ -221,9 +221,15 @@  linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
   perm_cache->put (root, retval);
 
   /* If it's a load node, then just read the load permute.  */
-  if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
+  if (SLP_TREE_DEF_TYPE (root) == vect_internal_def
+      && SLP_TREE_CODE (root) != VEC_PERM_EXPR
+      && STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))
+      && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))))
     {
-      retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
+      if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
+	retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
+      else
+	retval = PERM_EVENODD;
       perm_cache->put (root, retval);
       return retval;
     }
@@ -798,8 +804,8 @@  compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
 	return false;
     }
 
-  if (!SLP_TREE_LOAD_PERMUTATION (a).exists ()
-      || !SLP_TREE_LOAD_PERMUTATION (b).exists ())
+  if (!STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a))
+      || !STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b)))
     {
       for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
 	{
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ffe9e718575..8e4ad05e2a4 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2004,14 +2004,15 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 	    = STMT_VINFO_GROUPED_ACCESS (stmt_info)
 	      ? DR_GROUP_FIRST_ELEMENT (stmt_info) : stmt_info;
 	  bool any_permute = false;
-	  bool any_null = false;
 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
 	    {
 	      int load_place;
 	      if (! load_info)
 		{
-		  load_place = j;
-		  any_null = true;
+		  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+		    load_place = j;
+		  else
+		    load_place = 0;
 		}
 	      else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
 		load_place = vect_get_place_in_interleaving_chain
@@ -2022,11 +2023,6 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 	      any_permute |= load_place != j;
 	      load_permutation.quick_push (load_place);
 	    }
-	  if (any_null)
-	    {
-	      gcc_assert (!any_permute);
-	      load_permutation.release ();
-	    }
 
 	  if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
 	    {
@@ -2081,6 +2077,8 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 		     followed by 'node' being the desired final permutation.  */
 		  if (unperm_load)
 		    {
+		      gcc_assert
+			(!SLP_TREE_LOAD_PERMUTATION (unperm_load).exists ());
 		      lane_permutation_t lperm;
 		      lperm.create (group_size);
 		      for (unsigned j = 0; j < load_permutation.length (); ++j)
@@ -2101,6 +2099,10 @@  vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
 	    }
 	  else
 	    {
+	      if (!any_permute
+		  && STMT_VINFO_GROUPED_ACCESS (stmt_info)
+		  && group_size == DR_GROUP_SIZE (first_stmt_info))
+		load_permutation.release ();
 	      SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
 	      return node;
 	    }
@@ -2675,7 +2677,8 @@  out:
 	      tree op0;
 	      tree uniform_val = op0 = oprnd_info->ops[0];
 	      for (j = 1; j < oprnd_info->ops.length (); ++j)
-		if (!operand_equal_p (uniform_val, oprnd_info->ops[j]))
+		if (oprnd_info->ops[j]
+		    && !operand_equal_p (uniform_val, oprnd_info->ops[j]))
 		  {
 		    uniform_val = NULL_TREE;
 		    break;
@@ -4510,6 +4513,7 @@  vect_lower_load_permutations (loop_vec_info loop_vinfo,
 					 group_lanes,
 					 &max_nunits, matches, &limit,
 					 &tree_size, bst_map);
+      gcc_assert (!SLP_TREE_LOAD_PERMUTATION (l0).exists ());
 
       if (ld_lanes_lanes != 0)
 	{