diff mbox series

[3/4] openmp: Add IFN_GOMP_MAX_VF

Message ID 20241106152722.2821586-4-ams@baylibre.com
State New
Headers show
Series [1/4] openmp: Tune omp_max_vf for offload targets | expand

Commit Message

Andrew Stubbs Nov. 6, 2024, 3:27 p.m. UTC
Delay omp_max_vf call until after the host and device compilers have diverged
so that the max_vf value can be tuned exactly right on both variants.

This change means that the ompdevlow pass must be enabled for functions that
use OpenMP directives with both "simd" and "schedule" enabled.

gcc/ChangeLog:

	* internal-fn.cc (expand_GOMP_MAX_VF): New function.
	* internal-fn.def (GOMP_MAX_VF): New internal function.
	* omp-expand.cc (omp_adjust_chunk_size): Emit IFN_GOMP_MAX_VF when
	called in offload context, otherwise assume host context.
	* omp-offload.cc (execute_omp_device_lower): Expand IFN_GOMP_MAX_VF.
---
 gcc/internal-fn.cc  |  8 ++++++++
 gcc/internal-fn.def |  1 +
 gcc/omp-expand.cc   | 30 ++++++++++++++++++++++--------
 gcc/omp-offload.cc  |  3 +++
 4 files changed, 34 insertions(+), 8 deletions(-)

Comments

Jakub Jelinek Nov. 6, 2024, 3:37 p.m. UTC | #1
On Wed, Nov 06, 2024 at 03:27:21PM +0000, Andrew Stubbs wrote:
> Delay omp_max_vf call until after the host and device compilers have diverged
> so that the max_vf value can be tuned exactly right on both variants.
> 
> This change means that the ompdevlow pass must be enabled for functions that
> use OpenMP directives with both "simd" and "schedule" enabled.
> 
> gcc/ChangeLog:
> 
> 	* internal-fn.cc (expand_GOMP_MAX_VF): New function.
> 	* internal-fn.def (GOMP_MAX_VF): New internal function.
> 	* omp-expand.cc (omp_adjust_chunk_size): Emit IFN_GOMP_MAX_VF when
> 	called in offload context, otherwise assume host context.
> 	* omp-offload.cc (execute_omp_device_lower): Expand IFN_GOMP_MAX_VF.

> +  tree vf_minus_one = fold_build2 (MINUS_EXPR, type, vf,
> +				   build_int_cst (type, 1));
> +  tree negative_vf = fold_build1 (NEGATE_EXPR, type, vf);

This could invoke UB if vf is LONG_MIN or similar, but I think at least now
we can expect omp_max_vf to return reasonably small values whose negation is
well defined even in signed types.

> +  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, vf_minus_one);
> +  return fold_build2 (BIT_AND_EXPR, type, chunk_size, negative_vf);

So ok.

	Jakub
diff mbox series

Patch

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 1b3fe7be047..0ee5f5bc7c5 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -510,6 +510,14 @@  expand_GOMP_SIMT_VF (internal_fn, gcall *)
 
 /* This should get expanded in omp_device_lower pass.  */
 
+static void
+expand_GOMP_MAX_VF (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
+/* This should get expanded in omp_device_lower pass.  */
+
 static void
 expand_GOMP_TARGET_REV (internal_fn, gcall *)
 {
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 2d455938271..c3d0efc0f2c 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -465,6 +465,7 @@  DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_MAX_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_ORDERED_PRED, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_VOTE_ANY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
index b0f9d375b6c..80fb1843445 100644
--- a/gcc/omp-expand.cc
+++ b/gcc/omp-expand.cc
@@ -229,15 +229,29 @@  omp_adjust_chunk_size (tree chunk_size, bool simd_schedule, bool offload)
   if (!simd_schedule || integer_zerop (chunk_size))
     return chunk_size;
 
-  poly_uint64 vf = omp_max_vf (offload);
-  if (known_eq (vf, 1U))
-    return chunk_size;
-
+  tree vf;
   tree type = TREE_TYPE (chunk_size);
-  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
-			    build_int_cst (type, vf - 1));
-  return fold_build2 (BIT_AND_EXPR, type, chunk_size,
-		      build_int_cst (type, -vf));
+
+  if (offload)
+    {
+      cfun->curr_properties &= ~PROP_gimple_lomp_dev;
+      vf = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_MAX_VF,
+					 unsigned_type_node, 0);
+      vf = fold_convert (type, vf);
+    }
+  else
+    {
+      poly_uint64 vf_num = omp_max_vf (false);
+      if (known_eq (vf_num, 1U))
+	return chunk_size;
+      vf = build_int_cst (type, vf_num);
+    }
+
+  tree vf_minus_one = fold_build2 (MINUS_EXPR, type, vf,
+				   build_int_cst (type, 1));
+  tree negative_vf = fold_build1 (NEGATE_EXPR, type, vf);
+  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, vf_minus_one);
+  return fold_build2 (BIT_AND_EXPR, type, chunk_size, negative_vf);
 }
 
 /* Collect additional arguments needed to emit a combined
diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc
index 25ce8133fe5..372b019f9d6 100644
--- a/gcc/omp-offload.cc
+++ b/gcc/omp-offload.cc
@@ -2754,6 +2754,9 @@  execute_omp_device_lower ()
 	  case IFN_GOMP_SIMT_VF:
 	    rhs = build_int_cst (type, vf);
 	    break;
+	  case IFN_GOMP_MAX_VF:
+	    rhs = build_int_cst (type, omp_max_vf (false));
+	    break;
 	  case IFN_GOMP_SIMT_ORDERED_PRED:
 	    rhs = vf == 1 ? integer_zero_node : NULL_TREE;
 	    if (rhs || !lhs)