diff mbox series

[v2,3/8] tree-ifcvt: Enforce zero else value after maskload.

Message ID 20241018142220.173482-4-rdapp@ventanamicro.com
State New
Headers show
Series Add maskload else operand. | expand

Commit Message

Robin Dapp Oct. 18, 2024, 2:22 p.m. UTC
When predicating a load we implicitly assume that the else value is
zero.  This matters in case the loaded value is padded (like e.g.
a Bool) and we must ensure that the padding bytes are zero on targets
that don't implicitly zero inactive elements.

In order to formalize this this patch queries the target for
its supported else operand and uses that for the maskload call.
Subsequently, if the else operand is nonzero, a cond_expr enforcing
a zero else value is emitted.

gcc/ChangeLog:

	* tree-if-conv.cc (predicate_load_or_store): Enforce zero else
	value for padded types.
	(predicate_statements): Use sequence instead of statement.
---
 gcc/tree-if-conv.cc | 112 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 94 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 90c754a4814..9623426e1e1 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -2531,12 +2531,15 @@  mask_exists (int size, const vec<int> &vec)
 
 /* Helper function for predicate_statements.  STMT is a memory read or
    write and it needs to be predicated by MASK.  Return a statement
-   that does so.  */
+   that does so.  SSA_NAMES is the set of SSA names defined earlier in
+   STMT's block. */
 
-static gimple *
-predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
+static gimple_seq
+predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask,
+			 hash_set<tree_ssa_name_hash> *ssa_names)
 {
-  gcall *new_stmt;
+  gimple_seq stmts = NULL;
+  gcall *call_stmt;
 
   tree lhs = gimple_assign_lhs (stmt);
   tree rhs = gimple_assign_rhs1 (stmt);
@@ -2552,21 +2555,88 @@  predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
 		   ref);
   if (TREE_CODE (lhs) == SSA_NAME)
     {
-      new_stmt
-	= gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
-				      ptr, mask);
-      gimple_call_set_lhs (new_stmt, lhs);
-      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+      /* Get the preferred vector mode and its corresponding mask for the
+	 masked load.  We need this to query the target's supported else
+	 operands.  */
+      machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+      scalar_mode smode = as_a <scalar_mode> (mode);
+
+      machine_mode vmode = targetm.vectorize.preferred_simd_mode (smode);
+      machine_mode mask_mode
+	= targetm.vectorize.get_mask_mode (vmode).require ();
+
+      auto_vec<int> elsvals;
+      internal_fn ifn;
+      bool have_masked_load
+	= target_supports_mask_load_store_p (vmode, mask_mode, true, &ifn,
+					     &elsvals);
+
+      /* We might need to explicitly zero inactive elements if there are
+	 padding bits in the type that might leak otherwise.
+	 Refer to PR115336.  */
+      bool need_zero
+	= TYPE_PRECISION (TREE_TYPE (lhs)) < GET_MODE_PRECISION (smode);
+
+      int elsval;
+      bool implicit_zero = false;
+      if (have_masked_load)
+	{
+	  gcc_assert (elsvals.length ());
+
+	  /* But not if the target already provide implicit zeroing of inactive
+	     elements.  */
+	  implicit_zero = elsvals.contains (MASK_LOAD_ELSE_ZERO);
+
+	  /* For now, just use the first else value if zero is unsupported.  */
+	  elsval = implicit_zero ? MASK_LOAD_ELSE_ZERO : *elsvals.begin ();
+	}
+      else
+	{
+	  /* We cannot vectorize this either way so just use a zero even
+	     if it is unsupported.  */
+	  elsval = MASK_LOAD_ELSE_ZERO;
+	}
+
+      tree els = vect_get_mask_load_else (elsval, TREE_TYPE (lhs));
+
+      call_stmt
+	= gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
+				      ptr, mask, els);
+
+      /* Build the load call and, if the else value is nonzero,
+	 a COND_EXPR that enforces it.  */
+      tree loadlhs;
+      if (!need_zero || implicit_zero)
+	gimple_call_set_lhs (call_stmt, gimple_get_lhs (stmt));
+      else
+	{
+	  loadlhs = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "_ifc_");
+	  ssa_names->add (loadlhs);
+	  gimple_call_set_lhs (call_stmt, loadlhs);
+	}
+      gimple_set_vuse (call_stmt, gimple_vuse (stmt));
+      gimple_seq_add_stmt (&stmts, call_stmt);
+
+      if (need_zero && !implicit_zero)
+	{
+	  tree cond_rhs
+	    = fold_build_cond_expr (TREE_TYPE (loadlhs), mask, loadlhs,
+				    build_zero_cst (TREE_TYPE (loadlhs)));
+	  gassign *cond_stmt
+	    = gimple_build_assign (gimple_get_lhs (stmt), cond_rhs);
+	  gimple_seq_add_stmt (&stmts, cond_stmt);
+	}
     }
   else
     {
-      new_stmt
+      call_stmt
 	= gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
 				      mask, rhs);
-      gimple_move_vops (new_stmt, stmt);
+      gimple_move_vops (call_stmt, stmt);
+      gimple_seq_add_stmt (&stmts, call_stmt);
     }
-  gimple_call_set_nothrow (new_stmt, true);
-  return new_stmt;
+  gimple_call_set_nothrow (call_stmt, true);
+  return stmts;
 }
 
 /* STMT uses OP_LHS.  Check whether it is equivalent to:
@@ -2836,7 +2906,6 @@  predicate_statements (loop_p loop)
 	    {
 	      tree lhs = gimple_assign_lhs (stmt);
 	      tree mask;
-	      gimple *new_stmt;
 	      gimple_seq stmts = NULL;
 	      machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
 	      /* We checked before setting GF_PLF_2 that an equivalent
@@ -2870,11 +2939,18 @@  predicate_statements (loop_p loop)
 		  vect_masks.safe_push (mask);
 		}
 	      if (gimple_assign_single_p (stmt))
-		new_stmt = predicate_load_or_store (&gsi, stmt, mask);
-	      else
-		new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
+		{
+		  gimple_seq call_seq
+		    = predicate_load_or_store (&gsi, stmt, mask, &ssa_names);
 
-	      gsi_replace (&gsi, new_stmt, true);
+		  gsi_replace_with_seq (&gsi, call_seq, true);
+		}
+	      else
+		{
+		  gimple *new_stmt;
+		  new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
+		  gsi_replace (&gsi, new_stmt, true);
+		}
 	    }
 	  else if (((lhs = gimple_assign_lhs (stmt)), true)
 		   && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))