diff mbox

[hsa] Pass kernel range to libgomp

Message ID 20140925234156.GA20259@virgil.suse
State New
Headers show

Commit Message

Martin Jambor Sept. 25, 2014, 11:41 p.m. UTC
Hi,

this patch enhances the interface in between gcc and the current HSA
libgomp entry point to communicate the HSA grid and group size.

Bootstrapping of course only showed there were no warnings but I've
done it anyway, my small collection of OMP tests did not regress.
Committed to the hsa branch.

Thanks,

Martin


2014-09-26  Martin Jambor  <mjambor@suse.cz>

gcc/
	* hsa-gen.c (hsa_dim_array_type, hsa_range_dimnum_decl)
	(hsa_range_grid_decl, hsa_range_group_decl)
	(hsa_launch_range_type): New variables.
	(init_hsa_functions): Also build the type of range structure.
	(insert_store_range_dim): New function.
	(wrap_hsa): Rename decl to desc.  Also build and pass range
	information.
	* README.hsa: Marked as obsolete.

libgomp/
	* hsaokra.c (__hsa_launch_kernel): Expect a pointer to
	__hsa_launch_range as the second parameter, pass the range to okra if
	non-NULL.
---
 gcc/README.hsa    |  13 ++++++
 gcc/hsa-gen.c     | 121 +++++++++++++++++++++++++++++++++++++++++++-----------
 libgomp/hsaokra.c |  24 ++++++++---
 3 files changed, 130 insertions(+), 28 deletions(-)
diff mbox

Patch

diff --git a/gcc/README.hsa b/gcc/README.hsa
index d165cd9..baad817 100644
--- a/gcc/README.hsa
+++ b/gcc/README.hsa
@@ -1,3 +1,16 @@ 
+Please note that in this particular revision, this README file is
+outdated, a new version reacting to all incompatible changes will be
+committed at the end of a batch.  I suggest you only use this revision
+if you really have a reason to do it, otherwise check out a more
+recent one.
+
+In particular, in this revision it is impossible to directly use the
+simulator any more, only the OKRA interface is supported (but you can
+still use the simulator through it).  The rest still works, provided
+your OKRA and LIBHSAIL tools support and work with HSAIL 0.95.
+
+===========================================================================
+
 For playing with the HSA branch there are two viable approaches:
 1) Use the HSA simulator directly (via a shared library)
 2) Use the OKRA wrapper, which comes in two flavors itself,
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 9bf899e..7a0e642 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -1971,6 +1971,11 @@  generate_hsa (void)
 
 static GTY(()) tree hsa_launch_fn;
 static GTY(()) tree hsa_kernel_desc_type;
+static GTY(()) tree hsa_dim_array_type;
+static GTY(()) tree hsa_range_dimnum_decl;
+static GTY(()) tree hsa_range_grid_decl;
+static GTY(()) tree hsa_range_group_decl;
+static GTY(()) tree hsa_launch_range_type;
 
 static void
 init_hsa_functions (void)
@@ -2010,12 +2015,42 @@  init_hsa_functions (void)
   finish_builtin_struct (hsa_kernel_desc_type, "__hsa_kernel_desc",
 			 fields, NULL_TREE);
 
-  /* __hsa_launch_kernel (__hsa_kernel_desc * kd, void* attr, uint64_t *args) */
+
+  tree dim_arr_index_type;
+  dim_arr_index_type = build_index_type (build_int_cst (integer_type_node, 2));
+  hsa_dim_array_type = build_array_type (uint32_type_node, dim_arr_index_type);
+
+  hsa_launch_range_type = make_node (RECORD_TYPE);
+  fields = NULL_TREE;
+  hsa_range_dimnum_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+				      get_identifier ("dimension"),
+				      uint32_type_node);
+  DECL_CHAIN (hsa_range_dimnum_decl) = NULL_TREE;
+
+  hsa_range_grid_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+				    get_identifier ("global_size"),
+				    hsa_dim_array_type);
+  DECL_CHAIN (hsa_range_grid_decl) = hsa_range_dimnum_decl;
+  hsa_range_group_decl = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+				     get_identifier ("group_size"),
+				     hsa_dim_array_type);
+  DECL_CHAIN (hsa_range_group_decl) = hsa_range_grid_decl;
+  tree reserved = build_decl (BUILTINS_LOCATION, FIELD_DECL,
+			      get_identifier ("reserved"), uint32_type_node);
+  DECL_CHAIN (reserved) = hsa_range_group_decl;
+
+  /* This is in fact okra_range_s, but let's call everything HSA, at least for
+     now.  */
+  finish_builtin_struct (hsa_launch_range_type, "__hsa_launch_range",
+			 reserved, NULL_TREE);
+
+  /* __hsa_launch_kernel (__hsa_kernel_desc * kd, __hsa_launch_range* range,
+     uint64_t *args) */
 
   launch_fn_type
     = build_function_type_list (void_type_node,
 				build_pointer_type (hsa_kernel_desc_type),
-				ptr_type_node,
+				build_pointer_type (hsa_launch_range_type),
 				build_pointer_type (uint64_type_node),
 				NULL_TREE);
 
@@ -2024,6 +2059,25 @@  init_hsa_functions (void)
 		     launch_fn_type);
 }
 
+/* Insert before the current statement in GSI a store of VALUE to INDEX of
+   array (of type hsa_dim_array_type) FLD_DECL of RANGE_VAR. */
+
+static void
+insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
+			tree fld_decl, int index, int value)
+{
+  tree ref = build4 (ARRAY_REF, uint32_type_node,
+		     build3 (COMPONENT_REF, hsa_dim_array_type,
+			     range_var, fld_decl, NULL_TREE),
+		     build_int_cst (integer_type_node, index),
+		     NULL_TREE, NULL_TREE);
+  gsi_insert_before (gsi,
+		     gimple_build_assign (ref,
+					  build_int_cst (uint32_type_node,
+							 value)),
+		     GSI_SAME_STMT);
+}
+
 static unsigned int
 wrap_hsa (void)
 {
@@ -2047,39 +2101,60 @@  wrap_hsa (void)
 	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, str);
 
 	    int slen = IDENTIFIER_LENGTH (DECL_ASSEMBLER_NAME (fndecl));
-	    if (asprintf (&tmpname, "&%s", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fndecl))) < 0)
+	    if (asprintf (&tmpname, "&%s",
+			  IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fndecl))) < 0)
 	      gcc_unreachable ();
 	    sanitize_hsa_name (tmpname + 1);
 
 	    str = build_string_literal (slen + 2, tmpname);
 	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, str);
-	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, size_int (gimple_call_num_args (gsi_stmt (gsi))));
+	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+				    size_int (gimple_call_num_args
+					      (gsi_stmt (gsi))));
 	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, null_pointer_node);
 	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, null_pointer_node);
 
-	    tree initval = build_constructor (hsa_kernel_desc_type, v);
+	    tree desc_initval = build_constructor (hsa_kernel_desc_type, v);
 
 	    /* Create a new VAR_DECL of type descriptor.  */
 	    char tmp_name[32];
 	    static unsigned int var_id;
 	    ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kd", var_id++);
-	    tree decl = build_decl (gimple_location (gsi_stmt (gsi)),
-				    VAR_DECL, get_identifier (tmp_name),
-				    hsa_kernel_desc_type);
-	    TREE_STATIC (decl) = 1;
-	    TREE_PUBLIC (decl) = 0;
-	    DECL_ARTIFICIAL (decl) = 1;
-	    DECL_IGNORED_P (decl) = 1;
-	    DECL_EXTERNAL (decl) = 0;
-
-	    TREE_CONSTANT (initval) = 1;
-	    TREE_STATIC (initval) = 1;
-	    DECL_INITIAL (decl) = initval;
-	    varpool_node::finalize_decl (decl);
-
-	    decl = build_fold_addr_expr (decl);
-	    tree args = create_tmp_var (build_array_type_nelts (uint64_type_node,
-								gimple_call_num_args (gsi_stmt (gsi))),
+	    tree desc = build_decl (gimple_location (gsi_stmt (gsi)),
+					 VAR_DECL, get_identifier (tmp_name),
+					 hsa_kernel_desc_type);
+	    TREE_STATIC (desc) = 1;
+	    TREE_PUBLIC (desc) = 0;
+	    DECL_ARTIFICIAL (desc) = 1;
+	    DECL_IGNORED_P (desc) = 1;
+	    DECL_EXTERNAL (desc) = 0;
+
+	    TREE_CONSTANT (desc_initval) = 1;
+	    TREE_STATIC (desc_initval) = 1;
+	    DECL_INITIAL (desc) = desc_initval;
+	    varpool_node::finalize_decl (desc);
+	    desc = build_fold_addr_expr (desc);
+
+	    /* We fill in range dynamically because later on we'd like to
+	       decide about the values at run time.  */
+	    tree range = create_tmp_var (hsa_launch_range_type, "__hsa_range");
+	    tree dimref = build3 (COMPONENT_REF, uint32_type_node,
+				  range, hsa_range_dimnum_decl, NULL_TREE);
+	    tree u32one = build_int_cst (uint32_type_node, 1);
+	    gsi_insert_before (&gsi,
+			       gimple_build_assign (dimref, u32one),
+			       GSI_SAME_STMT);
+	    insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 0, 256);
+	    insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 1, 1);
+	    insert_store_range_dim (&gsi, range, hsa_range_grid_decl, 2, 1);
+	    insert_store_range_dim (&gsi, range, hsa_range_group_decl, 0, 16);
+	    insert_store_range_dim (&gsi, range, hsa_range_group_decl, 1, 1);
+	    insert_store_range_dim (&gsi, range, hsa_range_group_decl, 2, 1);
+	    range = build_fold_addr_expr (range);
+
+	    tree args = create_tmp_var
+	      (build_array_type_nelts (uint64_type_node,
+				       gimple_call_num_args (gsi_stmt (gsi))),
 					NULL);
 
 	    for (unsigned i = 0; i < gimple_call_num_args (gsi_stmt (gsi)); i++)
@@ -2103,7 +2178,7 @@  wrap_hsa (void)
 
 	    /* XXX doesn't handle calls with lhs, doesn't remove EH
 	       edges.  */
-	    launch = gimple_build_call (hsa_launch_fn, 3, decl, null_pointer_node, args);
+	    launch = gimple_build_call (hsa_launch_fn, 3, desc, range, args);
 	    gsi_insert_before (&gsi, launch, GSI_SAME_STMT);
 	    unlink_stmt_vdef (gsi_stmt (gsi));
 	    gsi_remove (&gsi, true);
diff --git a/libgomp/hsaokra.c b/libgomp/hsaokra.c
index 8d9b2e4..ada926c 100644
--- a/libgomp/hsaokra.c
+++ b/libgomp/hsaokra.c
@@ -100,11 +100,13 @@  typedef struct __hsa_kernel_desc_
   okra_context_t *context;
 } __hsa_kernel_desc;
 
-void * __hsa_launch_kernel (__hsa_kernel_desc *, __hsa_launch_attrs *attrs,
-			    __hsa_kernelarg *args);
+typedef okra_range_t __hsa_launch_range;
+
+void * __hsa_launch_kernel (__hsa_kernel_desc *, __hsa_launch_range *,
+			    __hsa_kernelarg *);
 
 void *
-__hsa_launch_kernel (__hsa_kernel_desc * _kd, __hsa_launch_attrs *attrs,
+__hsa_launch_kernel (__hsa_kernel_desc * _kd, __hsa_launch_range *range_p,
 		     __hsa_kernelarg *args)
 {
   okra_status_t status;
@@ -166,9 +168,21 @@  __hsa_launch_kernel (__hsa_kernel_desc * _kd, __hsa_launch_attrs *attrs,
   range.dimension = 1;
   range.global_size[0] = 256;
   range.group_size[0] = 16;
+  if (!range_p)
+    {
+      range.dimension = 1;
+      range.global_size[0] = 256;
+      range.group_size[0] = 16;
+      range_p = &range;
+    }
   if (debug > 0)
-    fprintf (stderr, "HSA: launching kernel %s\n", _kd->name);
-  status = _okra_execute_kernel (context, kernel, &range);
+    {
+      fprintf (stderr, "HSA: launching kernel %s\n", _kd->name);
+      fprintf (stderr, "dim: %u, s0: %u, g0: %u, r: %u\n", range_p->dimension,
+	       range_p->global_size[0], range_p->group_size[0],
+	       range_p->reserved);
+    }
+  status = _okra_execute_kernel (context, kernel, range_p);
   if (status != OKRA_SUCCESS)
     {
       fprintf (stderr, "Failed to launch kernel\n");