diff mbox

[hsa] Pass kernel launch attributes through a device-specific argument

Message ID 20151113185136.GW2460@virgil.suse.cz
State New
Headers show

Commit Message

Martin Jambor Nov. 13, 2015, 6:51 p.m. UTC
Hi,

this hsa patch is analogous to the for-trunk RFC I have sent a while
ago and implements passing HSA-specific grid sizes through a
device-specific argument.  Committed to the branch.

Thanks,

Martin


2015-11-13  Martin Jambor  <mjambor@suse.cz>

include/
	* gomp-constants.h (GOMP_TARGET_ARG_FIRST_DEVICE_SPECIFIC): New
	constant.
	(GOMP_TARGET_ARG_NUM_TEAMS): Likewise.
	(GOMP_TARGET_ARG_THREAD_LIMIT): Likewise.
	(GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES): Likewise.

gcc/
	* builtin-types.def
	(BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT_PTR): Turned
	into BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR.
	* omp-builtins.def (BUILT_IN_GOMP_TARGET): Updated type.
	* omp-low.c (get_target_arguments): New function.
	(expand_omp_target): Call it, do not calculate num_teams and
	thread_limit.

gcc/fortran
	* types.def:
	(BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT_PTR): Turned
	into BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR.

libgomp/
	* libgomp.h (gomp_device_descr): Update type of run_func.
	* libgomp_g.h (GOMP_target_ext): Update type.
	* oacc-host.c (host_run): Likewise.
	* target.c (GOMP_target_ext): Change type, pass arguments to plugins.
	* plugin/plugin-hsa.c (parse_launch_attributes): Parse arguments.
	(GOMP_OFFLOAD_run): Update type.

liboffloadmic/plugin/
	* libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_run): Update type.
---
 gcc/builtin-types.def                            |  6 +-
 gcc/fortran/types.def                            |  4 +-
 gcc/omp-builtins.def                             |  2 +-
 gcc/omp-low.c                                    | 85 ++++++++++++++++--------
 include/gomp-constants.h                         | 10 +++
 libgomp/libgomp.h                                |  2 +-
 libgomp/libgomp_g.h                              |  3 +-
 libgomp/oacc-host.c                              |  2 +-
 libgomp/plugin/plugin-hsa.c                      | 31 +++++++--
 libgomp/target.c                                 | 13 ++--
 liboffloadmic/plugin/libgomp-plugin-intelmic.cpp |  3 +-
 11 files changed, 109 insertions(+), 52 deletions(-)
diff mbox

Patch

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index ef854c4..251c980 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -557,9 +557,9 @@  DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
 		     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
 		     BT_BOOL, BT_UINT, BT_PTR, BT_INT)
 
-DEF_FUNCTION_TYPE_11 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT_PTR,
-		      BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
-		      BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT, BT_PTR)
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
+		     BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
+		     BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
 		      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index 14e6970..d5f44ab 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -222,9 +222,9 @@  DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
 		     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
 		     BT_BOOL, BT_UINT, BT_PTR, BT_INT)
 
-DEF_FUNCTION_TYPE_11 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT_PTR,
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
 		      BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
-		      BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT, BT_PTR)
+		      BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
 		      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index c75da11..20c06b7 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -343,7 +343,7 @@  DEF_GOMP_BUILTIN (BUILT_IN_GOMP_OFFLOAD_REGISTER, "GOMP_offload_register",
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_OFFLOAD_UNREGISTER, "GOMP_offload_unregister",
 		  BT_FN_VOID_PTR_INT_PTR, ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET, "GOMP_target_ext",
-		  BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT_PTR,
+		  BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
 		  ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_DATA, "GOMP_target_data_ext",
 		  BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index c7c9c3b..d10636b 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12598,6 +12598,62 @@  get_kernel_launch_attributes (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
   return build_fold_addr_expr (lattrs);
 }
 
+/* Create an array of arguments that is then passed to GOMP_target.  */
+
+static tree
+get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
+{
+  auto_vec <tree, 4> args;
+  tree clauses = gimple_omp_target_clauses (tgt_stmt);
+  tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
+  if (c)
+    {
+      t = fold_convert (ptr_type_node, OMP_CLAUSE_NUM_TEAMS_EXPR (c));
+      t = force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
+    }
+  else
+    t = fold_convert (ptr_type_node, integer_minus_one_node);
+  args.quick_push (t);
+  c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
+  if (c)
+    {
+      t = fold_convert (ptr_type_node, OMP_CLAUSE_THREAD_LIMIT_EXPR (c));
+      t = force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
+    }
+  else
+    t = fold_convert (ptr_type_node, integer_minus_one_node);
+  args.quick_push (t);
+
+  /* Add HSA-specific grid sizes, if available.  */
+  if (gimple_omp_target_dimensions (tgt_stmt))
+    {
+      args.quick_push (build_int_cst (ptr_type_node,
+				     GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES));
+      args.quick_push (get_kernel_launch_attributes (gsi, tgt_stmt));
+    }
+
+  /* Produce more, perhaps device specific, arguments here.  */
+
+  tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
+							  args.length () + 1),
+				  ".omp_target_args");
+  for (unsigned i = 0; i < args.length (); i++)
+    {
+      tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+			 build_int_cst (integer_type_node, i),
+			 NULL_TREE, NULL_TREE);
+      gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
+			 GSI_SAME_STMT);
+    }
+  tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+		     build_int_cst (integer_type_node, args.length ()),
+		     NULL_TREE, NULL_TREE);
+  gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
+		     GSI_SAME_STMT);
+  TREE_ADDRESSABLE (argarray) = 1;
+  return build_fold_addr_expr (argarray);
+}
+
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
 static void
@@ -13004,34 +13060,7 @@  expand_omp_target (struct omp_region *region)
 	depend = build_int_cst (ptr_type_node, 0);
       args.quick_push (depend);
       if (start_ix == BUILT_IN_GOMP_TARGET)
-	{
-	  c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
-	  if (c)
-	    {
-	      t = fold_convert (integer_type_node,
-				OMP_CLAUSE_NUM_TEAMS_EXPR (c));
-	      t = force_gimple_operand_gsi (&gsi, t, true, NULL,
-					    true, GSI_SAME_STMT);
-	    }
-	  else
-	    t = integer_minus_one_node;
-	  args.quick_push (t);
-	  c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
-	  if (c)
-	    {
-	      t = fold_convert (integer_type_node,
-				OMP_CLAUSE_THREAD_LIMIT_EXPR (c));
-	      t = force_gimple_operand_gsi (&gsi, t, true, NULL,
-					    true, GSI_SAME_STMT);
-	    }
-	  else
-	    t = integer_minus_one_node;
-	  args.quick_push (t);
-	  if (gimple_omp_target_dimensions (entry_stmt))
-	    args.quick_push (get_kernel_launch_attributes (&gsi, entry_stmt));
-	  else
-	    args.quick_push (build_zero_cst (ptr_type_node));
-	}
+	args.quick_push (get_target_arguments (&gsi, entry_stmt));
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index ad587d1..344f59e 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -224,6 +224,16 @@  enum gomp_map_kind
 #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
 #define GOMP_LAUNCH_OP_MAX 0xffff
 
+/* First device-specific identifier among target arguments. */
+#define GOMP_TARGET_ARG_FIRST_DEVICE_SPECIFIC	2
+/* Target argument index of NUM_TEAMS */
+#define GOMP_TARGET_ARG_NUM_TEAMS		0
+/* Target argument index of THREAD_LIMIT */
+#define GOMP_TARGET_ARG_THREAD_LIMIT		1
+
+/* Identifiers of device-specific target arguments.  */
+#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES	1
+
 /* HSA specific data structures.  */
 
 /* Structure describing the run-time and grid properties of an HSA kernel
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 68a72aa..78953db 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -876,7 +876,7 @@  struct gomp_device_descr
   void *(*dev2host_func) (int, void *, const void *, size_t);
   void *(*host2dev_func) (int, void *, const void *, size_t);
   void *(*dev2dev_func) (int, void *, const void *, size_t);
-  void (*run_func) (int, void *, void *, const void *);
+  void (*run_func) (int, void *, void *, void **);
   bool (*can_run_func) (void *);
 
   /* Splay tree containing information about mapped memory regions.  */
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index cc34c6f..9c90d59 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -278,8 +278,7 @@  extern void GOMP_single_copy_end (void *);
 extern void GOMP_target (int, void (*) (void *), const void *,
 			 size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *,
-			     unsigned short *, unsigned int, void **,
-			     int, int, const void *);
+			     unsigned short *, unsigned int, void **, void **);
 extern void GOMP_target_data (int, const void *,
 			      size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_data_ext (int, size_t, void **, size_t *,
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 6f938ae..a769211 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -124,7 +124,7 @@  host_host2dev (int n __attribute__ ((unused)),
 
 static void
 host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
-	  const void* kern_launch __attribute__ ((unused)))
+	  void **args __attribute__((unused)))
 {
   void (*fn)(void *) = (void (*)(void *)) fn_ptr;
 
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index adc0444..0993a8f 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -1052,11 +1052,28 @@  init_kernel (struct kernel_info *kernel)
    values, then store INPUT or DEF into *RESULT.  */
 
 static bool
-parse_launch_attributes (const void *input,
+parse_launch_attributes (void **input,
 			 struct GOMP_kernel_launch_attributes *def,
-			 const struct GOMP_kernel_launch_attributes **result)
+			 struct GOMP_kernel_launch_attributes **result)
 {
   if (!input)
+    GOMP_PLUGIN_fatal ("No target arguments provided");
+
+  bool attrs_found = false;
+  input += GOMP_TARGET_ARG_FIRST_DEVICE_SPECIFIC;
+  while (*input)
+    {
+      uintptr_t id = (uintptr_t) *input;
+      input++;
+      if (id == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES)
+	{
+	  attrs_found = true;
+	  break;
+	}
+      input++;
+    }
+
+  if (!attrs_found)
     {
       def->ndim = 1;
       def->gdims[0] = 1;
@@ -1070,8 +1087,8 @@  parse_launch_attributes (const void *input,
       return true;
     }
 
-  const struct GOMP_kernel_launch_attributes *kla;
-  kla = (const struct GOMP_kernel_launch_attributes *) input;
+  struct GOMP_kernel_launch_attributes *kla;
+  kla = (struct GOMP_kernel_launch_attributes *) *input;
   *result = kla;
   if (kla->ndim != 1)
     GOMP_PLUGIN_fatal ("HSA does not yet support number of dimensions "
@@ -1115,13 +1132,13 @@  failure:
    identified by FN_PTR which must point to a kernel_info structure.  */
 
 void
-GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, const void* kern_launch)
+GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void** args)
 {
   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
   struct agent_info *agent = kernel->agent;
   struct GOMP_kernel_launch_attributes def;
-  const struct GOMP_kernel_launch_attributes *kla;
-  if (!parse_launch_attributes (kern_launch, &def, &kla))
+  struct GOMP_kernel_launch_attributes *kla;
+  if (!parse_launch_attributes (args, &def, &kla))
     {
       HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
       return;
diff --git a/libgomp/target.c b/libgomp/target.c
index 05f7652..10fc9c4 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1401,6 +1401,11 @@  GOMP_target (int device, void (*fn) (void *), const void *unused,
    and several arguments have been added:
    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
    DEPEND is array of dependencies, see GOMP_task for details.
+   ARGS is a pointer to an array consisting of NUM_TEAMS, THREAD_LIMIT and a
+   variable number of device-specific arguments, which always take two elements
+   where the first specifies the type and the second the actual value.  The
+   last element of the array is a single NULL.
+
    NUM_TEAMS is positive if GOMP_teams will be called in the body with
    that value, or 1 if teams construct is not present, or 0, if
    teams construct does not have num_teams clause and so the choice is
@@ -1414,14 +1419,10 @@  GOMP_target (int device, void (*fn) (void *), const void *unused,
 void
 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
-		 unsigned int flags, void **depend, int num_teams,
-		 int thread_limit, const void *kernel_launch)
+		 unsigned int flags, void **depend, void **args)
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
-  (void) num_teams;
-  (void) thread_limit;
-
   /* If there are depend clauses, but nowait is not present,
      block the parent task until the dependencies are resolved
      and then just continue with the rest of the function as if it
@@ -1462,7 +1463,7 @@  GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
     }
   devicep->run_func (devicep->target_id, fn_addr,
 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
-		     kernel_launch);
+		     args);
   gomp_free_thread (thr);
   *thr = old_thr;
   if (tgt_vars)
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index 26ac6fe..5314333 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -501,7 +501,8 @@  GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
 }
 
 extern "C" void
-GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
+GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars,
+		  void **args __attribute__((unused)))
 {
   TRACE ("(tgt_fn = %p, tgt_vars = %p)", tgt_fn, tgt_vars);