diff mbox series

Add single-lane SLP support to .GOMP_SIMD_LANE vectorization

Message ID 20240701075754.CAC6D139C2@imap1.dmz-prg2.suse.org
State New
Headers show
Series Add single-lane SLP support to .GOMP_SIMD_LANE vectorization | expand

Commit Message

Richard Biener July 1, 2024, 7:57 a.m. UTC
The following adds support for single-lane SLP .GOMP_SIMD_LANE
vectorization.

This doesn't handle much, esp. g++.dg/vect/simd-*.cc with their
'inscan' uses are unhandled.

	* tree-vect-slp.cc (no_arg_map): New.
	(vect_get_operand_map): Handle IFN_GOMP_SIMD_LANE.
	(vect_build_slp_tree_1): Likewise.
	* tree-vect-stmts.cc (vectorizable_call): Handle single-lane SLP
	for .GOMP_SIMD_LANE calls.
---
 gcc/tree-vect-slp.cc   | 11 +++++++++++
 gcc/tree-vect-stmts.cc | 27 +++++++++++++++++++--------
 2 files changed, 30 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 3138a815da7..f3743997e9c 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -505,6 +505,7 @@  static const int cond_expr_maps[3][5] = {
   { 4, -2, -1, 1, 2 },
   { 4, -1, -2, 2, 1 }
 };
+static const int no_arg_map[] = { 0 };
 static const int arg0_map[] = { 1, 0 };
 static const int arg1_map[] = { 1, 1 };
 static const int arg2_map[] = { 1, 2 };
@@ -585,6 +586,9 @@  vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false,
 	  case IFN_CTZ:
 	    return arg0_map;
 
+	  case IFN_GOMP_SIMD_LANE:
+	    return no_arg_map;
+
 	  default:
 	    break;
 	  }
@@ -1168,6 +1172,8 @@  vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	      ldst_p = true;
 	      rhs_code = CFN_MASK_STORE;
 	    }
+	  else if (cfn == CFN_GOMP_SIMD_LANE)
+	    ;
 	  else if ((cfn != CFN_LAST
 		    && cfn != CFN_MASK_CALL
 		    && internal_fn_p (cfn)
@@ -1271,6 +1277,11 @@  vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	      need_same_oprnds = true;
 	      first_op1 = gimple_call_arg (call_stmt, 1);
 	    }
+	  else if (rhs_code == CFN_GOMP_SIMD_LANE)
+	    {
+	      need_same_oprnds = true;
+	      first_op1 = gimple_call_arg (call_stmt, 1);
+	    }
 	}
       else
 	{
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 840ff8a3406..270c5a5dd34 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3341,7 +3341,7 @@  vectorizable_call (vec_info *vinfo,
   if (ifn == IFN_LAST && !fndecl)
     {
       if (cfn == CFN_GOMP_SIMD_LANE
-	  && !slp_node
+	  && (!slp_node || SLP_TREE_LANES (slp_node) == 1)
 	  && loop_vinfo
 	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
 	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
@@ -3487,18 +3487,15 @@  vectorizable_call (vec_info *vinfo,
 	  /* Build argument list for the vectorized call.  */
 	  if (slp_node)
 	    {
-	      vec<tree> vec_oprnds0;
-
+	      unsigned int vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
 	      vect_get_slp_defs (vinfo, slp_node, &vec_defs);
-	      vec_oprnds0 = vec_defs[0];
 
 	      /* Arguments are ready.  Create the new vector stmt.  */
-	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
+	      for (i = 0; i < vec_num; ++i)
 		{
 		  int varg = 0;
 		  if (masked_loop_p && reduc_idx >= 0)
 		    {
-		      unsigned int vec_num = vec_oprnds0.length ();
 		      /* Always true for SLP.  */
 		      gcc_assert (ncopies == 1);
 		      vargs[varg++] = vect_get_loop_mask (loop_vinfo,
@@ -3539,11 +3536,26 @@  vectorizable_call (vec_info *vinfo,
 		      vect_finish_stmt_generation (vinfo, stmt_info,
 						   new_stmt, gsi);
 		    }
+		  else if (cfn == CFN_GOMP_SIMD_LANE)
+		    {
+		      /* ???  For multi-lane SLP we'd need to build
+			 { 0, 0, .., 1, 1, ... }.  */
+		      tree cst = build_index_vector (vectype_out,
+						     i * nunits_out, 1);
+		      tree new_var
+			= vect_get_new_ssa_name (vectype_out, vect_simple_var,
+						 "cst_");
+		      gimple *init_stmt = gimple_build_assign (new_var, cst);
+		      vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
+		      new_temp = make_ssa_name (vec_dest);
+		      new_stmt = gimple_build_assign (new_temp, new_var);
+		      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
+						   gsi);
+		    }
 		  else
 		    {
 		      if (len_opno >= 0 && len_loop_p)
 			{
-			  unsigned int vec_num = vec_oprnds0.length ();
 			  /* Always true for SLP.  */
 			  gcc_assert (ncopies == 1);
 			  tree len
@@ -3557,7 +3569,6 @@  vectorizable_call (vec_info *vinfo,
 			}
 		      else if (mask_opno >= 0 && masked_loop_p)
 			{
-			  unsigned int vec_num = vec_oprnds0.length ();
 			  /* Always true for SLP.  */
 			  gcc_assert (ncopies == 1);
 			  tree mask = vect_get_loop_mask (loop_vinfo,