@@ -450,6 +450,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_ULONG_ULONG_ULONGPTR, BT_BOOL, BT_ULONG,
BT_ULONG, BT_PTR_ULONG)
DEF_FUNCTION_TYPE_3 (BT_FN_BOOL_ULONGLONG_ULONGLONG_ULONGLONGPTR, BT_BOOL,
BT_ULONGLONG, BT_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_INT_PTR, BT_VOID, BT_PTR, BT_INT, BT_PTR)
DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR,
BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR)
@@ -223,6 +223,10 @@ unsigned int flag_sanitize_recover = SANITIZE_UNDEFINED | SANITIZE_NONDEFAULT |
Variable
bool dump_base_name_prefixed = false
+; Flag whether HSA generation has been explicitely disabled
+Variable
+bool flag_disable_hsa = false
+
###
Driver
@@ -131,6 +131,12 @@
#endif
+/* Define this to enable support for generating HSAIL. */
+#ifndef USED_FOR_TARGET
+#undef ENABLE_HSA
+#endif
+
+
/* Define if gcc should always pass --build-id to linker. */
#ifndef USED_FOR_TARGET
#undef ENABLE_LD_BUILDID
@@ -7445,6 +7445,13 @@ fi
for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
tgt=`echo $tgt | sed 's/=.*//'`
+
+ if echo "$tgt" | grep "^hsa" > /dev/null ; then
+ enable_hsa=1
+ else
+ enable_offloading=1
+ fi
+
if test x"$offload_targets" = x; then
offload_targets=$tgt
else
@@ -7456,12 +7463,18 @@ cat >>confdefs.h <<_ACEOF
#define OFFLOAD_TARGETS "$offload_targets"
_ACEOF
-if test x"$offload_targets" != x; then
+if test x"$enable_offloading" != x; then
$as_echo "#define ENABLE_OFFLOADING 1" >>confdefs.h
fi
+if test x"$enable_hsa" = x1 ; then
+
+$as_echo "#define ENABLE_HSA 1" >>confdefs.h
+
+fi
+
# Check whether --with-multilib-list was given.
if test "${with_multilib_list+set}" = set; then :
@@ -18162,7 +18175,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 18165 "configure"
+#line 18178 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -18268,7 +18281,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 18271 "configure"
+#line 18284 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -908,6 +908,13 @@ AC_SUBST(accel_dir_suffix)
for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
tgt=`echo $tgt | sed 's/=.*//'`
+
+ if echo "$tgt" | grep "^hsa" > /dev/null ; then
+ enable_hsa=1
+ else
+ enable_offloading=1
+ fi
+
if test x"$offload_targets" = x; then
offload_targets=$tgt
else
@@ -916,11 +923,16 @@ for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
done
AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
[Define to hold the list of target names suitable for offloading.])
-if test x"$offload_targets" != x; then
+if test x"$enable_offloading" != x; then
AC_DEFINE(ENABLE_OFFLOADING, 1,
[Define this to enable support for offloading.])
fi
+if test x"$enable_hsa" = x1 ; then
+ AC_DEFINE(ENABLE_HSA, 1,
+ [Define this to enable support for generating HSAIL.])
+fi
+
AC_ARG_WITH(multilib-list,
[AS_HELP_STRING([--with-multilib-list], [select multilibs (AArch64, SH and x86-64 only)])],
:,
@@ -145,6 +145,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT)
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT)
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_INT_PTR, BT_VOID, BT_PTR, BT_INT, BT_PTR)
DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
@@ -1635,6 +1635,8 @@ hsa_brig_emit_function (void)
}
static GTY(()) tree hsa_ctor_statements;
+static GTY(()) tree hsa_dtor_statements;
+
/* Create a static initializator that will register out brig stufgf with
libgomp. */
@@ -1777,16 +1779,36 @@ hsa_output_kernel_mapping (tree brig_decl)
/* Generate an initializer with a call to the registration routine. */
- /* FIXME: gomp_offload_register has one more enum parameter omitted here. */
+ /* __hsa_register_image is an a testing-only registration routine that will
+ go away once the transition to gomp plugin is complete. However, at th
+ moment we support it too. */
tree reg_fn_type = build_function_type_list (void_type_node, ptr_type_node,
ptr_type_node, NULL_TREE);
tree reg_fn = build_fn_decl ("__hsa_register_image", reg_fn_type);
- append_to_statement_list
- (build_call_expr (reg_fn, 2,
+
+ append_to_statement_list
+ (build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER), 3,
build_fold_addr_expr (hsa_libgomp_host_table),
+ /* 7 stands for HSA */
+ build_int_cst (integer_type_node, 7),
build_fold_addr_expr (hsa_img_descriptor)),
&hsa_ctor_statements);
+ append_to_statement_list
+ (build_call_expr (reg_fn, 2,
+ build_fold_addr_expr (hsa_libgomp_host_table),
+ build_fold_addr_expr (hsa_img_descriptor)),
+ &hsa_ctor_statements);
+
cgraph_build_static_cdtor ('I', hsa_ctor_statements, DEFAULT_INIT_PRIORITY);
+
+ append_to_statement_list
+ (build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER),
+ 3, build_fold_addr_expr (hsa_libgomp_host_table),
+ /* 7 stands for HSA */
+ build_int_cst (integer_type_node, 7),
+ build_fold_addr_expr (hsa_img_descriptor)),
+ &hsa_dtor_statements);
+ cgraph_build_static_cdtor ('D', hsa_dtor_statements, DEFAULT_INIT_PRIORITY);
}
@@ -2306,26 +2306,24 @@ sanitize_hsa_name (char *p)
}
/* Genrate HSAIL reprezentation of the current function and write into a
- special section of the output file. */
+ special section of the output file. If KERNEL is set, the function will be
+ considered an HSA kernel callable from the host, otherwise it will be
+ compiled as an HSA function callable from other HSA code. */
static unsigned int
-generate_hsa (void)
+generate_hsa (bool kernel)
{
vec <hsa_op_reg_p> ssa_map = vNULL;
hsa_init_data_for_cfun ();
-
- bool kern_p = lookup_attribute ("hsa",
- DECL_ATTRIBUTES (current_function_decl))
- || lookup_attribute ("hsakernel", DECL_ATTRIBUTES (current_function_decl));
- hsa_cfun.kern_p = kern_p;
+ hsa_cfun.kern_p = kernel;
ssa_map.safe_grow_cleared (SSANAMES (cfun)->length ());
hsa_cfun.name
= xstrdup (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)));
sanitize_hsa_name (hsa_cfun.name);
- if (kern_p)
+ if (hsa_cfun.kern_p)
hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun.name);
gen_function_parameters (ssa_map);
@@ -2572,18 +2570,24 @@ public:
bool
pass_gen_hsail::gate (function *)
{
- return true;
+#ifdef ENABLE_HSA
+ return !flag_disable_hsa;
+#else
+ return false;
+#endif
}
unsigned int
pass_gen_hsail::execute (function *)
{
- if (lookup_attribute ("hsa", DECL_ATTRIBUTES (current_function_decl))
- || lookup_attribute ("hsafunc",
- DECL_ATTRIBUTES (current_function_decl))
+ if (cgraph_node::get_create (current_function_decl)->offloadable
+ || lookup_attribute ("hsa", DECL_ATTRIBUTES (current_function_decl))
|| lookup_attribute ("hsakernel",
DECL_ATTRIBUTES (current_function_decl)))
- return generate_hsa ();
+ return generate_hsa (true);
+ else if (lookup_attribute ("hsafunc",
+ DECL_ATTRIBUTES (current_function_decl)))
+ return generate_hsa (false);
else
return wrap_all_hsa_calls ();
}
@@ -727,6 +727,11 @@ compile_images_for_offload_targets (unsigned in_argc, char *in_argv[],
offload_names = XCNEWVEC (char *, num_targets + 1);
for (unsigned i = 0; i < num_targets; i++)
{
+ /* HSA does not use LTO-like streaming and a different compiler, skip
+ it. */
+ if (strncmp(names[i], "hsa", 3) == 0)
+ continue;
+
offload_names[i]
= compile_offload_image (names[i], compiler_path, in_argc, in_argv,
compiler_opts, compiler_opt_count,
@@ -256,6 +256,10 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_START, "GOMP_single_copy_start",
BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end",
BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_OFFLOAD_REGISTER, "GOMP_offload_register",
+ BT_FN_VOID_PTR_INT_PTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_OFFLOAD_UNREGISTER, "GOMP_offload_unregister",
+ BT_FN_VOID_PTR_INT_PTR, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET, "GOMP_target",
BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR,
ATTR_NOTHROW_LIST)
@@ -1831,8 +1831,35 @@ common_handle_option (struct gcc_options *opts,
break;
case OPT_foffload_:
- /* Deferred. */
- break;
+ {
+ const char *p = arg;
+ opts->x_flag_disable_hsa = true;
+ while (*p != 0)
+ {
+ const char *comma = strchr (p, ',');
+
+ if ((strncmp (p, "disable", 7) == 0)
+ && (p[7] == ',' || p[7] == '\0'))
+ {
+ opts->x_flag_disable_hsa = true;
+ break;
+ }
+
+ if ((strncmp (p, "hsa", 3) == 0)
+ && (p[3] == ',' || p[3] == '\0'))
+ {
+#ifdef ENABLE_HSA
+ opts->x_flag_disable_hsa = false;
+#else
+ sorry ("HSA has not been enabled during configuration");
+#endif
+ }
+ if (!comma)
+ break;
+ p = comma + 1;
+ }
+ break;
+ }
#ifndef ACCEL_COMPILER
case OPT_foffload_abi_:
@@ -89,7 +89,8 @@ DIST_COMMON = $(top_srcdir)/plugin/Makefrag.am ChangeLog \
$(srcdir)/omp_lib.f90.in $(srcdir)/libgomp_f.h.in \
$(srcdir)/libgomp.spec.in $(srcdir)/../depcomp
@PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
-@USE_FORTRAN_TRUE@am__append_2 = openacc.f90
+@PLUGIN_HSA_TRUE@am__append_2 = libgomp-plugin-hsa.la
+@USE_FORTRAN_TRUE@am__append_3 = openacc.f90
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -156,6 +157,17 @@ libgomp_plugin_host_nonshm_la_LINK = $(LIBTOOL) --tag=CC \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(libgomp_plugin_host_nonshm_la_LDFLAGS) $(LDFLAGS) -o $@
am__DEPENDENCIES_1 =
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_DEPENDENCIES = libgomp.la \
+@PLUGIN_HSA_TRUE@ $(am__DEPENDENCIES_1)
+@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_OBJECTS = \
+@PLUGIN_HSA_TRUE@ libgomp_plugin_hsa_la-plugin-hsa.lo
+libgomp_plugin_hsa_la_OBJECTS = $(am_libgomp_plugin_hsa_la_OBJECTS)
+libgomp_plugin_hsa_la_LINK = $(LIBTOOL) --tag=CC \
+ $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libgomp_plugin_hsa_la_LDFLAGS) $(LDFLAGS) -o $@
+@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_rpath = -rpath \
+@PLUGIN_HSA_TRUE@ $(toolexeclibdir)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_DEPENDENCIES = libgomp.la \
@PLUGIN_NVPTX_TRUE@ $(am__DEPENDENCIES_1)
@PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_OBJECTS = \
@@ -197,6 +209,7 @@ FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(libgomp_plugin_host_nonshm_la_SOURCES) \
+ $(libgomp_plugin_hsa_la_SOURCES) \
$(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES)
MULTISRCTOP =
MULTIBUILDTOP =
@@ -265,6 +278,8 @@ FC = @FC@
FCFLAGS = @FCFLAGS@
FGREP = @FGREP@
GREP = @GREP@
+HSA_RUNTIME_INCLUDE = @HSA_RUNTIME_INCLUDE@
+HSA_RUNTIME_LIB = @HSA_RUNTIME_LIB@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
@@ -309,6 +324,10 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
+PLUGIN_HSA = @PLUGIN_HSA@
+PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@
+PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@
+PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@
PLUGIN_NVPTX = @PLUGIN_NVPTX@
PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@
@@ -401,7 +420,7 @@ libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
AM_CPPFLAGS = $(addprefix -I, $(search_path))
AM_CFLAGS = $(XCFLAGS)
AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
-toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) \
+toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) $(am__append_2) \
libgomp-plugin-host_nonshm.la
nodist_toolexeclib_HEADERS = libgomp.spec
@@ -426,7 +445,7 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
- hsa.c $(am__append_2)
+ hsa.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -437,6 +456,16 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
@PLUGIN_NVPTX_TRUE@ $(lt_host_flags) $(PLUGIN_NVPTX_LDFLAGS)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
+
+# Heterogenous Systems Architecture plugin
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LDFLAGS = \
+@PLUGIN_HSA_TRUE@ $(libgomp_plugin_hsa_version_info) \
+@PLUGIN_HSA_TRUE@ $(lt_host_flags) $(PLUGIN_HSA_LDFLAGS)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static
libgomp_plugin_host_nonshm_version_info = -version-info $(libtool_VERSION)
libgomp_plugin_host_nonshm_la_SOURCES = plugin/plugin-host.c
libgomp_plugin_host_nonshm_la_CPPFLAGS = $(AM_CPPFLAGS) -DHOST_NONSHM_PLUGIN
@@ -574,6 +603,8 @@ clean-toolexeclibLTLIBRARIES:
done
libgomp-plugin-host_nonshm.la: $(libgomp_plugin_host_nonshm_la_OBJECTS) $(libgomp_plugin_host_nonshm_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_host_nonshm_la_DEPENDENCIES)
$(libgomp_plugin_host_nonshm_la_LINK) -rpath $(toolexeclibdir) $(libgomp_plugin_host_nonshm_la_OBJECTS) $(libgomp_plugin_host_nonshm_la_LIBADD) $(LIBS)
+libgomp-plugin-hsa.la: $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_hsa_la_DEPENDENCIES)
+ $(libgomp_plugin_hsa_la_LINK) $(am_libgomp_plugin_hsa_la_rpath) $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_LIBADD) $(LIBS)
libgomp-plugin-nvptx.la: $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_nvptx_la_DEPENDENCIES)
$(libgomp_plugin_nvptx_la_LINK) $(am_libgomp_plugin_nvptx_la_rpath) $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_LIBADD) $(LIBS)
libgomp.la: $(libgomp_la_OBJECTS) $(libgomp_la_DEPENDENCIES) $(EXTRA_libgomp_la_DEPENDENCIES)
@@ -598,6 +629,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@
@@ -652,6 +684,13 @@ libgomp_plugin_host_nonshm_la-plugin-host.lo: plugin/plugin-host.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_host_nonshm_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_host_nonshm_la-plugin-host.lo `test -f 'plugin/plugin-host.c' || echo '$(srcdir)/'`plugin/plugin-host.c
+libgomp_plugin_hsa_la-plugin-hsa.lo: plugin/plugin-hsa.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_hsa_la-plugin-hsa.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='plugin/plugin-hsa.c' object='libgomp_plugin_hsa_la-plugin-hsa.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c
+
libgomp_plugin_nvptx_la-plugin-nvptx.lo: plugin/plugin-nvptx.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_nvptx_la-plugin-nvptx.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo
@@ -116,6 +116,9 @@
/* Define to the version of this package. */
#undef PACKAGE_VERSION
+/* Define to 1 if the HSA plugin is built, 0 if not. */
+#undef PLUGIN_HSA
+
/* Define to 1 if the NVIDIA plugin is built, 0 if not. */
#undef PLUGIN_NVPTX
@@ -627,10 +627,18 @@ LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE
LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE
OPT_LDFLAGS
SECTION_LDFLAGS
+PLUGIN_HSA_FALSE
+PLUGIN_HSA_TRUE
PLUGIN_NVPTX_FALSE
PLUGIN_NVPTX_TRUE
offload_additional_lib_paths
offload_additional_options
+PLUGIN_HSA_LIBS
+PLUGIN_HSA_LDFLAGS
+PLUGIN_HSA_CPPFLAGS
+PLUGIN_HSA
+HSA_RUNTIME_LIB
+HSA_RUNTIME_INCLUDE
PLUGIN_NVPTX_LIBS
PLUGIN_NVPTX_LDFLAGS
PLUGIN_NVPTX_CPPFLAGS
@@ -782,6 +790,9 @@ enable_maintainer_mode
with_cuda_driver
with_cuda_driver_include
with_cuda_driver_lib
+with_hsa_runtime
+with_hsa_runtime_include
+with_hsa_runtime_lib
enable_linux_futex
enable_tls
enable_symvers
@@ -1453,6 +1464,16 @@ Optional Packages:
--with-cuda-driver-lib=PATH
specify directory for the installed CUDA driver
library
+ --with-hsa-runtime=PATH specify prefix directory for installed HSA run-time
+ package. Equivalent to
+ --with-hsa-runtime-include=PATH/include plus
+ --with-hsa-runtime-lib=PATH/lib
+ --with-hsa-runtime-include=PATH
+ specify directory for installed HSA run-time include
+ files
+ --with-hsa-runtime-lib=PATH
+ specify directory for the installed HSA run-time
+ library
Some influential environment variables:
CC C compiler command
@@ -11121,7 +11142,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11124 "configure"
+#line 11145 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -11227,7 +11248,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11230 "configure"
+#line 11251 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -15223,6 +15244,61 @@ PLUGIN_NVPTX_LIBS=
+# Look for HSA run-time, its includes and libraries
+
+HSA_RUNTIME_INCLUDE=
+HSA_RUNTIME_LIB=
+
+
+HSA_RUNTIME_CPPFLAGS=
+HSA_RUNTIME_LDFLAGS=
+
+
+# Check whether --with-hsa-runtime was given.
+if test "${with_hsa_runtime+set}" = set; then :
+ withval=$with_hsa_runtime;
+fi
+
+
+# Check whether --with-hsa-runtime-include was given.
+if test "${with_hsa_runtime_include+set}" = set; then :
+ withval=$with_hsa_runtime_include;
+fi
+
+
+# Check whether --with-hsa-runtime-lib was given.
+if test "${with_hsa_runtime_lib+set}" = set; then :
+ withval=$with_hsa_runtime_lib;
+fi
+
+if test "x$with_hsa_runtime" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime/include
+ HSA_RUNTIME_LIB=$with_hsa_runtime/lib
+fi
+if test "x$with_hsa_runtime_include" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime_include
+fi
+if test "x$with_hsa_runtime_lib" != x; then
+ HSA_RUNTIME_LIB=$with_hsa_runtime_lib
+fi
+if test "x$HSA_RUNTIME_INCLUDE" != x; then
+ HSA_RUNTIME_CPPFLAGS=-I$HSA_RUNTIME_INCLUDE
+fi
+if test "x$HSA_RUNTIME_LIB" != x; then
+ HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
+fi
+
+PLUGIN_HSA=0
+PLUGIN_HSA_CPPFLAGS=
+PLUGIN_HSA_LDFLAGS=
+PLUGIN_HSA_LIBS=
+
+
+
+
+
+
+
# Get offload targets and path to install tree of offloading compiler.
offload_additional_options=
offload_additional_lib_paths=
@@ -15275,6 +15351,46 @@ rm -f core conftest.err conftest.$ac_objext \
;;
esac
;;
+ hsa*)
+ tgt_name=hsa
+ PLUGIN_HSA=$tgt
+ PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ PLUGIN_HSA_LDFLAGS=$HSA_RUNTIME_LDFLAGS
+ PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt"
+
+ PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
+ CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
+ PLUGIN_HSA_save_LDFLAGS=$LDFLAGS
+ LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS"
+ PLUGIN_HSA_save_LIBS=$LIBS
+ LIBS="$PLUGIN_HSA_LIBS $LIBS"
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include "hsa.h"
+int
+main ()
+{
+hsa_status_t status = hsa_init ()
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ PLUGIN_HSA=1
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
+ LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ LIBS=$PLUGIN_HSA_save_LIBS
+ case $PLUGIN_HSA in
+ hsa*)
+ HSA_NVPTX=0
+ as_fn_error "HSA run-time package required for HSA support" "$LINENO" 5
+ ;;
+ esac
+ ;;
*)
as_fn_error "unknown offload target specified" "$LINENO" 5
;;
@@ -15311,6 +15427,19 @@ cat >>confdefs.h <<_ACEOF
#define PLUGIN_NVPTX $PLUGIN_NVPTX
_ACEOF
+ if test $PLUGIN_HSA = 1; then
+ PLUGIN_HSA_TRUE=
+ PLUGIN_HSA_FALSE='#'
+else
+ PLUGIN_HSA_TRUE='#'
+ PLUGIN_HSA_FALSE=
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define PLUGIN_HSA $PLUGIN_HSA
+_ACEOF
+
# Check for functions needed.
@@ -16693,6 +16822,10 @@ if test -z "${PLUGIN_NVPTX_TRUE}" && test -z "${PLUGIN_NVPTX_FALSE}"; then
as_fn_error "conditional \"PLUGIN_NVPTX\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${PLUGIN_HSA_TRUE}" && test -z "${PLUGIN_HSA_FALSE}"; then
+ as_fn_error "conditional \"PLUGIN_HSA\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE}"; then
as_fn_error "conditional \"LIBGOMP_BUILD_VERSIONED_SHLIB\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -48,7 +48,8 @@ enum offload_target_type
OFFLOAD_TARGET_TYPE_HOST = 2,
OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3,
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
- OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
+ OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
+ OFFLOAD_TARGET_TYPE_HSA = 7
};
/* Auxiliary struct, used for transferring pairs of addresses from plugin
@@ -39,6 +39,19 @@ libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS)
libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
endif
+if PLUGIN_HSA
+# Heterogenous Systems Architecture plugin
+libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION)
+toolexeclib_LTLIBRARIES += libgomp-plugin-hsa.la
+libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c
+libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS)
+libgomp_plugin_hsa_la_LDFLAGS = $(libgomp_plugin_hsa_version_info) \
+ $(lt_host_flags)
+libgomp_plugin_hsa_la_LDFLAGS += $(PLUGIN_HSA_LDFLAGS)
+libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS)
+libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static
+endif
+
libgomp_plugin_host_nonshm_version_info = -version-info $(libtool_VERSION)
toolexeclib_LTLIBRARIES += libgomp-plugin-host_nonshm.la
libgomp_plugin_host_nonshm_la_SOURCES = plugin/plugin-host.c
@@ -82,6 +82,54 @@ AC_SUBST(PLUGIN_NVPTX_CPPFLAGS)
AC_SUBST(PLUGIN_NVPTX_LDFLAGS)
AC_SUBST(PLUGIN_NVPTX_LIBS)
+# Look for HSA run-time, its includes and libraries
+
+HSA_RUNTIME_INCLUDE=
+HSA_RUNTIME_LIB=
+AC_SUBST(HSA_RUNTIME_INCLUDE)
+AC_SUBST(HSA_RUNTIME_LIB)
+HSA_RUNTIME_CPPFLAGS=
+HSA_RUNTIME_LDFLAGS=
+
+AC_ARG_WITH(hsa-runtime,
+ [AS_HELP_STRING([--with-hsa-runtime=PATH],
+ [specify prefix directory for installed HSA run-time package.
+ Equivalent to --with-hsa-runtime-include=PATH/include
+ plus --with-hsa-runtime-lib=PATH/lib])])
+AC_ARG_WITH(hsa-runtime-include,
+ [AS_HELP_STRING([--with-hsa-runtime-include=PATH],
+ [specify directory for installed HSA run-time include files])])
+AC_ARG_WITH(hsa-runtime-lib,
+ [AS_HELP_STRING([--with-hsa-runtime-lib=PATH],
+ [specify directory for the installed HSA run-time library])])
+if test "x$with_hsa_runtime" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime/include
+ HSA_RUNTIME_LIB=$with_hsa_runtime/lib
+fi
+if test "x$with_hsa_runtime_include" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime_include
+fi
+if test "x$with_hsa_runtime_lib" != x; then
+ HSA_RUNTIME_LIB=$with_hsa_runtime_lib
+fi
+if test "x$HSA_RUNTIME_INCLUDE" != x; then
+ HSA_RUNTIME_CPPFLAGS=-I$HSA_RUNTIME_INCLUDE
+fi
+if test "x$HSA_RUNTIME_LIB" != x; then
+ HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
+fi
+
+PLUGIN_HSA=0
+PLUGIN_HSA_CPPFLAGS=
+PLUGIN_HSA_LDFLAGS=
+PLUGIN_HSA_LIBS=
+AC_SUBST(PLUGIN_HSA)
+AC_SUBST(PLUGIN_HSA_CPPFLAGS)
+AC_SUBST(PLUGIN_HSA_LDFLAGS)
+AC_SUBST(PLUGIN_HSA_LIBS)
+
+
+
# Get offload targets and path to install tree of offloading compiler.
offload_additional_options=
offload_additional_lib_paths=
@@ -123,6 +171,35 @@ if test x"$enable_offload_targets" != x; then
;;
esac
;;
+ hsa*)
+ tgt_name=hsa
+ PLUGIN_HSA=$tgt
+ PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ PLUGIN_HSA_LDFLAGS=$HSA_RUNTIME_LDFLAGS
+ PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt"
+
+ PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
+ CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
+ PLUGIN_HSA_save_LDFLAGS=$LDFLAGS
+ LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS"
+ PLUGIN_HSA_save_LIBS=$LIBS
+ LIBS="$PLUGIN_HSA_LIBS $LIBS"
+
+ AC_LINK_IFELSE(
+ [AC_LANG_PROGRAM(
+ [#include "hsa.h"],
+ [hsa_status_t status = hsa_init ()])],
+ [PLUGIN_HSA=1])
+ CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
+ LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ LIBS=$PLUGIN_HSA_save_LIBS
+ case $PLUGIN_HSA in
+ hsa*)
+ HSA_NVPTX=0
+ AC_MSG_ERROR([HSA run-time package required for HSA support])
+ ;;
+ esac
+ ;;
*)
AC_MSG_ERROR([unknown offload target specified])
;;
@@ -146,3 +223,6 @@ AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1])
AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX],
[Define to 1 if the NVIDIA plugin is built, 0 if not.])
+AM_CONDITIONAL([PLUGIN_HSA], [test $PLUGIN_HSA = 1])
+AC_DEFINE_UNQUOTED([PLUGIN_HSA], [$PLUGIN_HSA],
+ [Define to 1 if the HSA plugin is built, 0 if not.])
new file mode 100644
@@ -0,0 +1,805 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include "libgomp-plugin.h"
+#include "hsa.h"
+#include "hsa_ext_finalize.h"
+
+/* Part of the libgomp plugin interface. Return the name of the accelerator,
+ which is "hsa". */
+
+const char *
+GOMP_OFFLOAD_get_name (void)
+{
+ return "hsa";
+}
+
+/* Part of the libgomp plugin interface. Return the specific capabilities the
+ HSA accelerator have. */
+
+unsigned int
+GOMP_OFFLOAD_get_caps (void)
+{
+ return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
+}
+
+/* Part of the libgomp plugin interface. Identify as HSA accelerator. */
+
+int
+GOMP_OFFLOAD_get_type (void)
+{
+ return OFFLOAD_TARGET_TYPE_HSA;
+}
+
+/* Flag to decide whether print to stderr information about what is going on.
+ Set in init_debug depending on environment variables. */
+
+static bool debug;
+
+/* Initialize debug according to the environment. */
+
+static void
+init_debug (void)
+{
+ if (getenv ("HSA_DEBUG"))
+ debug = true;
+ else
+ debug = false;
+}
+
+/* Report a fatal error STR together with the HSA error corresponding to STATUS
+ and terminate execution of the current process. */
+
+static void
+hsa_fatal (const char *str, hsa_status_t status)
+{
+ const char* hsa_error;
+ hsa_status_string (status, &hsa_error);
+ GOMP_PLUGIN_fatal ("HSA fatal error: %s (%s)", str, hsa_error);
+}
+
+/* Data passed by the static initializer of a compilation unit containing BRIG
+ to GOMP_offload_register. */
+
+struct brig_image_desc
+{
+ hsa_ext_module_t brig_module;
+ const char *names;
+};
+
+struct agent_info;
+
+/* Information required to identify, finalize and run any given kernel. */
+
+struct kernel_info
+{
+ /* Name of the kernel, required to locate it within the brig module. */
+ const char *name;
+ /* The specific agent the kernel has been or will be finalized for and run
+ on. */
+ struct agent_info *agent;
+ /* Mutex enforcing that at most once thread ever initializes a kernel for
+ use. A thread should have locked agent->modules_rwlock for reading before
+ acquiring it. */
+ pthread_mutex_t init_mutex;
+ /* Flag indicating whether the kernel has been initialized and all fields
+ below it contain valid data. */
+ bool initialized;
+ /* The object to be put into the dispatch queue. */
+ uint64_t object;
+ /* Required size of kernel arguments. */
+ uint32_t kernarg_segment_size;
+ /* Required size of group segment. */
+ uint32_t group_segment_size;
+ /* Required size of private segment. */
+ uint32_t private_segment_size;
+};
+
+/* Information about a particular brig module, its image and kernels. */
+
+struct module_info
+{
+ /* The next and previous module in the linked list of modules of an agent. */
+ struct module_info *next, *prev;
+ /* The description with which the program has registered the image. */
+ struct brig_image_desc *image_desc;
+
+ /* Number of kernels in this module. */
+ int kernel_count;
+ /* An array of kernel_info structures describing each kernel in this
+ module. */
+ struct kernel_info kernels[];
+};
+
+/* Description of an HSA GPU agent and the program associated with it. */
+
+struct agent_info
+{
+ /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
+ hsa_agent_t id;
+ /* Whether the agent has been initialized. The fields below are usable only
+ if it has been. */
+ bool initialized;
+ /* The HSA ISA of this agent. */
+ hsa_isa_t isa;
+ /* Command queue of the agent. */
+ hsa_queue_t* command_q;
+ /* The HSA memory region from which to allocate kernel arguments. */
+ hsa_region_t kernarg_region;
+
+ /* Read-write lock that protects kernels which are running or about to be run
+ from interference with loading and unloading of images. Needs to be
+ locked for reading while a kernel is being run, and for writing if the
+ list of modules is manipulated (and thus the HSA program invalidated). */
+ pthread_rwlock_t modules_rwlock;
+ /* The first module in a linked list of modules associated with this
+ kernel. */
+ struct module_info *first_module;
+
+ /* Mutex enforcing that only one thread will finalize the HSA program. A
+ thread should have locked agent->modules_rwlock for reading before
+ acquiring it. */
+ pthread_mutex_t prog_mutex;
+ /* Flag whether the HSA program that consists of all the modules has been
+ finalized. */
+ bool prog_finalized;
+ /* HSA executable - the finalized program that is used to locate kernels. */
+ hsa_executable_t executable;
+};
+
+/* Information about the whole HSA environment and all of its agents. */
+
+struct hsa_context_info
+{
+ /* Whether the structure has been initialized. */
+ bool initialized;
+ /* Number of usable GPU HSA agents in the system. */
+ int agent_count;
+ /* Array of agent_info structures describing the individual HSA agents. */
+ struct agent_info *agents;
+};
+
+/* Information about the whole HSA environment and all of its agents. */
+
+static struct hsa_context_info hsa_context;
+
+/* Return true if the agent is a GPU and acceptable of concurrent submissions
+ from different threads. */
+
+static bool
+suitable_hsa_agent_p (hsa_agent_t agent)
+{
+ hsa_device_type_t device_type;
+ hsa_status_t status = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE,
+ &device_type);
+ if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU)
+ return false;
+
+ uint32_t features = 0;
+ status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features);
+ if (status != HSA_STATUS_SUCCESS
+ || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
+ return false;
+ hsa_queue_type_t queue_type;
+ status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
+ if (status != HSA_STATUS_SUCCESS
+ || (queue_type != HSA_QUEUE_TYPE_MULTI))
+ return false;
+
+ return true;
+}
+
+/* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
+ agent_count in hsa_context. */
+
+static hsa_status_t
+count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
+{
+ if (suitable_hsa_agent_p (agent))
+ hsa_context.agent_count++;
+ return HSA_STATUS_SUCCESS;
+}
+
+/* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
+ id to the describing structure in the hsa context. The index of the
+ structure is pointed to by DATA, increment it afterwards. */
+
+static hsa_status_t
+assign_agent_ids (hsa_agent_t agent, void *data)
+{
+ if (suitable_hsa_agent_p (agent))
+ {
+ int *agent_index = (int *) data;
+ hsa_context.agents[*agent_index].id = agent;
+ ++*agent_index;
+ }
+ return HSA_STATUS_SUCCESS;
+}
+
+/* Initialize hsa_context if it has not already been done. */
+
+static void
+init_hsa_context (void)
+{
+ hsa_status_t status;
+ int agent_index = 0;
+
+ if (hsa_context.initialized)
+ return;
+ init_debug ();
+ status = hsa_init ();
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Run-time could not be initialized", status);
+ if (debug)
+ fprintf (stderr, "HSA run-time initialized\n");
+ status = hsa_iterate_agents (count_gpu_agents, NULL);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("HSA GPU devices could not be enumerated", status);
+ if (debug)
+ fprintf (stderr, "There are %i HSA GPU devices.\n", hsa_context.agent_count);
+
+ hsa_context.agents
+ = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
+ * sizeof (struct agent_info));
+ status = hsa_iterate_agents (assign_agent_ids, &agent_index);
+ if (agent_index != hsa_context.agent_count)
+ GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents");
+ hsa_context.initialized = true;
+}
+
+/* Callback of dispatch queues to report errors. */
+
+static void
+queue_callback(hsa_status_t status, hsa_queue_t* queue __attribute__ ((unused)),
+ void* data __attribute__ ((unused)))
+{
+ hsa_fatal ("Asynchronous queue error", status);
+}
+
+/* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
+ used for kernarg allocations and if so write it to the memory pointed to by
+ DATA and break the query. */
+
+static hsa_status_t get_kernarg_memory_region (hsa_region_t region, void* data)
+{
+ hsa_status_t status;
+ hsa_region_segment_t segment;
+
+ status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
+ if (status != HSA_STATUS_SUCCESS)
+ return status;
+ if (segment != HSA_REGION_SEGMENT_GLOBAL)
+ return HSA_STATUS_SUCCESS;
+
+ uint32_t flags;
+ status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
+ if (status != HSA_STATUS_SUCCESS)
+ return status;
+ if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
+ {
+ hsa_region_t* ret = (hsa_region_t*) data;
+ *ret = region;
+ return HSA_STATUS_INFO_BREAK;
+ }
+ return HSA_STATUS_SUCCESS;
+}
+
+/* Part of the libgomp plugin interface. Return the number of HSA devices on
+ the system. */
+
+int
+GOMP_OFFLOAD_get_num_devices (void)
+{
+ init_hsa_context ();
+ return hsa_context.agent_count;
+}
+
+/* Part of the libgomp plugin interface. Initialize agent number N so that it
+ can be used for computation. */
+
+void
+GOMP_OFFLOAD_init_device (int n)
+{
+ init_hsa_context ();
+ if (n >= hsa_context.agent_count)
+ GOMP_PLUGIN_fatal ("Request to initialize non-existing HSA device %i", n);
+ struct agent_info *agent = &hsa_context.agents[n];
+
+ if (agent->initialized)
+ return;
+
+ if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
+ GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent rwlock");
+ if (pthread_mutex_init (&agent->prog_mutex, NULL))
+ GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent program mutex");
+
+ uint32_t queue_size;
+ hsa_status_t status;
+ status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+ &queue_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error requesting maximum queue size of the HSA agent", status);
+ status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error querying the ISA of the agent", status);
+ status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
+ queue_callback, NULL, UINT32_MAX, UINT32_MAX,
+ &agent->command_q);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error creating command queue", status);
+
+ agent->kernarg_region.handle = (uint64_t) -1;
+ status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
+ &agent->kernarg_region);
+ if (agent->kernarg_region.handle == (uint64_t) -1)
+ GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel "
+ "arguments");
+ if (debug)
+ fprintf (stderr, "HSA agent initialized, queue has id %llu\n",
+ (long long unsigned) agent->command_q->id);
+ agent->initialized = true;
+}
+
+/* Verify that hsa_context has already been initialized and return the
+ agent_info structure describing device number N. */
+
+static struct agent_info *
+get_agent_info (int n)
+{
+ if (!hsa_context.initialized)
+ GOMP_PLUGIN_fatal ("Attempt to use uninitialized HSA context.");
+ if (n >= hsa_context.agent_count)
+ GOMP_PLUGIN_fatal ("Request to operate on anon-existing HSA device %i", n);
+ if (!hsa_context.agents[n].initialized)
+ GOMP_PLUGIN_fatal ("Attempt to use an uninitialized HSA agent.");
+ return &hsa_context.agents[n];
+}
+
+/* Insert MODULE to the linked list of modules of AGENT. */
+
+static void
+add_module_to_agent (struct agent_info *agent, struct module_info *module)
+{
+ if (agent->first_module)
+ agent->first_module->prev = module;
+ module->next = agent->first_module;
+ module->prev = NULL;
+ agent->first_module = module;
+}
+
+/* Remove MODULE from the linked list of modules of AGENT. */
+
+static void
+remove_module_from_agent (struct agent_info *agent, struct module_info *module)
+{
+ if (agent->first_module == module)
+ agent->first_module = module->next;
+ if (module->prev)
+ module->prev->next = module->next;
+ if (module->next)
+ module->next->prev = module->prev;
+}
+
+/* Free the HSA program in agent and everything associated with it and set
+ agent->prog_finalized and the initialized flags of all kernels to false. */
+
+static void
+destroy_hsa_program (struct agent_info *agent)
+{
+ hsa_status_t status;
+
+ if (debug)
+ fprintf (stderr, "Destroying the current HSA program.\n");
+
+ status = hsa_executable_destroy (agent->executable);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not destroy HSA executable", status);
+
+ struct module_info *module;
+ for (module = agent->first_module; module; module = module->next)
+ {
+ int i;
+ for (i = 0; i < module->kernel_count; i++)
+ module->kernels[i].initialized = false;
+ }
+ agent->prog_finalized = false;
+}
+
+/* Part of the libgomp plugin interface. Load BRIG module described by struct
+ brig_image_desc in TARGET_DATA and return references to kernel descriptors
+ in TARGET_TABLE. */
+
+int
+GOMP_OFFLOAD_load_image (int ord, void *target_data,
+ struct addr_pair **target_table)
+{
+ struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
+ struct agent_info *agent;
+ struct addr_pair *pair;
+ struct module_info *module;
+ struct kernel_info *kernel;
+ int kernel_count = 0;
+ const char *p;
+
+ agent = get_agent_info (ord);
+ if (pthread_rwlock_wrlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
+ if (agent->prog_finalized)
+ destroy_hsa_program (agent);
+
+ p = image_desc->names;
+ while (*p)
+ {
+ kernel_count++;
+ do
+ p++;
+ while (*p);
+ p++;
+ }
+ if (kernel_count == 0)
+ GOMP_PLUGIN_fatal ("No kernels encountered in a brig module description");
+ if (debug)
+ fprintf (stderr, "Encountered %d kernels in an image\n", kernel_count);
+ pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
+ *target_table = pair;
+ module = (struct module_info *)
+ GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
+ + kernel_count * sizeof (struct kernel_info));
+ module->image_desc = image_desc;
+ module->kernel_count = kernel_count;
+
+ p = image_desc->names;
+ kernel = &module->kernels[0];
+ while (*p)
+ {
+ pair->start = (uintptr_t) kernel;
+ pair->end = (uintptr_t) (kernel + 1);
+ kernel->name = p;
+ kernel->agent = agent;
+ if (pthread_mutex_init (&kernel->init_mutex, NULL))
+ GOMP_PLUGIN_fatal ("Failed to initialize an HSA kernel mutex");
+ kernel++;
+ pair++;
+ do
+ p++;
+ while (*p);
+ p++;
+ }
+
+ add_module_to_agent (agent, module);
+ if (pthread_rwlock_unlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+ return kernel_count;
+}
+
+/* Create and finalize the program consisting of all loaded modules. */
+
+static void
+create_and_finalize_hsa_program (struct agent_info *agent)
+{
+ hsa_status_t status;
+ hsa_ext_program_t prog_handle;
+ int mi = 0;
+
+ if (pthread_mutex_lock (&agent->prog_mutex))
+ GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
+ if (agent->prog_finalized)
+ {
+ if (pthread_mutex_unlock (&agent->prog_mutex))
+ GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
+ return;
+ }
+
+ status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
+ HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+ NULL, &prog_handle);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not create an HSA program", status);
+ if (debug)
+ fprintf (stderr, "Created a finalizer program\n");
+
+ struct module_info *module = agent->first_module;
+ while (module)
+ {
+ status = hsa_ext_program_add_module(prog_handle,
+ module->image_desc->brig_module);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not add a module to the HSA program", status);
+ if (debug)
+ fprintf (stderr, "Added module %i to the HSA program\n", mi);
+ module = module->next;
+ mi++;
+ }
+ hsa_ext_control_directives_t control_directives;
+ memset (&control_directives, 0, sizeof (control_directives));
+ hsa_code_object_t code_object;
+ status = hsa_ext_program_finalize(prog_handle, agent->isa,
+ HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
+ control_directives, "",
+ HSA_CODE_OBJECT_TYPE_PROGRAM,
+ &code_object);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Finalization of the HSA program failed", status);
+ if (debug)
+ fprintf (stderr, "Finalization done\n");
+ hsa_ext_program_destroy (prog_handle);
+
+ status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
+ "", &agent->executable);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not create HSA executable", status);
+
+ status = hsa_executable_load_code_object(agent->executable, agent->id,
+ code_object, "");
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not add a code object to the HSA executable", status);
+ status = hsa_executable_freeze(agent->executable, "");
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not freeze the HSA executable", status);
+
+ if (debug)
+ fprintf (stderr, "Froze HSA executable with the finalized code object\n");
+ agent->prog_finalized = true;
+ if (pthread_mutex_unlock (&agent->prog_mutex))
+ GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
+}
+
+/* Do all the work that is necessary before running KERNEL for the first time.
+ The function assumes the program has been created, finalized and frozen by
+ create_and_finalize_hsa_program. */
+
+static void
+init_kernel (struct kernel_info *kernel)
+{
+ if (pthread_mutex_lock (&kernel->init_mutex))
+ GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
+ if (kernel->initialized)
+ {
+ if (pthread_mutex_unlock (&kernel->init_mutex))
+ GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
+ "mutex");
+ return;
+ }
+
+ hsa_status_t status;
+ struct agent_info *agent = kernel->agent;
+ hsa_executable_symbol_t kernel_symbol;
+ status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name,
+ agent->id, 0, &kernel_symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not find symbol for kernel in the code object", status);
+ if (debug)
+ fprintf (stderr, "Located kernel %s\n", kernel->name);
+ status = hsa_executable_symbol_get_info
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a kernel object from its symbol", status);
+ status = hsa_executable_symbol_get_info
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
+ &kernel->kernarg_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not get info about kernel argument size", status);
+ status = hsa_executable_symbol_get_info
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
+ &kernel->group_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not get info about kernel group segment size", status);
+ status = hsa_executable_symbol_get_info
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
+ &kernel->private_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not get info about kernel private segment size", status);
+
+ if (debug)
+ {
+ fprintf (stderr, "Kernel structure for %s fully initialized\n",
+ kernel->name);
+ fprintf (stderr, " group_segment_size: %u\n",
+ (unsigned) kernel->group_segment_size);
+ fprintf (stderr, " private_segment_size: %u\n",
+ (unsigned) kernel->private_segment_size);
+ fprintf (stderr, " kernarg_segment_size: %u\n",
+ (unsigned) kernel->kernarg_segment_size);
+ }
+ kernel->initialized = true;
+ if (pthread_mutex_unlock (&kernel->init_mutex))
+ GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
+ "mutex");
+}
+
+/* Part of the libgomp plugin interface. Run a kernel on a device N and pass
+ the it an array of pointers in VARS as a parameter. The kernel is
+ identified by FN_PTR which must point to a kernel_info structure. */
+
+void
+GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars)
+{
+ struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
+ struct agent_info *agent = kernel->agent;
+ if (pthread_rwlock_rdlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
+
+ create_and_finalize_hsa_program (agent);
+ init_kernel (kernel);
+
+ hsa_status_t status;
+ void *kernarg_addr;
+ /* Allocate the kernel argument buffer from the correct region. */
+ status = hsa_memory_allocate (agent->kernarg_region,
+ kernel->kernarg_segment_size, &kernarg_addr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
+ hsa_signal_t sync_signal;
+ status = hsa_signal_create (1, 0, NULL, &sync_signal);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error creating the HSA sync signal", status);
+
+ uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1);
+ if (debug)
+ fprintf (stderr, "Got AQL index %llu\n", (long long int) index);
+
+ /* Wait until the queue is not full before writing the packet. */
+ while (index - hsa_queue_load_read_index_acquire(agent->command_q)
+ >= agent->command_q->size)
+ ;
+
+ hsa_kernel_dispatch_packet_t *packet;
+ packet = ((hsa_kernel_dispatch_packet_t*) agent->command_q->base_address)
+ + index % agent->command_q->size;
+ hsa_signal_store_relaxed (sync_signal, 1);
+ memset (((uint8_t *)packet) + 4, 0, sizeof (*packet) - 4);
+ packet->setup |= (uint16_t) 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+ packet->grid_size_x = 1;
+ packet->workgroup_size_x = 1;
+ packet->grid_size_y = 1;
+ packet->workgroup_size_y = 1;
+ packet->grid_size_z = 1;
+ packet->workgroup_size_z = 1;
+ packet->private_segment_size = kernel->private_segment_size;
+ packet->group_segment_size = kernel->group_segment_size;
+ packet->kernel_object = kernel->object;
+ packet->kernarg_address = kernarg_addr;
+ packet->completion_signal = sync_signal;
+ memcpy (kernarg_addr, &vars, sizeof(vars));
+
+ uint16_t header;
+ header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+ if (debug)
+ fprintf (stderr, "Going to dispatch kernel %s\n", kernel->name);
+
+ __atomic_store_n ((uint16_t*)(&packet->header), header, __ATOMIC_RELEASE);
+ hsa_signal_store_release (agent->command_q->doorbell_signal, index);
+
+ if (debug)
+ fprintf (stderr, "Kernel dispatched, waiting for completion\n");
+ hsa_signal_wait_acquire(sync_signal, HSA_SIGNAL_CONDITION_LT, 1,
+ UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
+ if (debug)
+ fprintf (stderr, "Kernel %s returned\n", kernel->name);
+ hsa_signal_destroy(sync_signal);
+ hsa_memory_free (kernarg_addr);
+ if (pthread_rwlock_unlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+}
+
+/* Deinitialize all information associated with MODULE and kernels within
+ it. */
+
+void
+destroy_module (struct module_info *module)
+{
+ int i;
+ for (i = 0; i < module->kernel_count; i++)
+ if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
+ GOMP_PLUGIN_fatal ("Failed to destroy an HSA kernel initialization mutex");
+}
+
+/* Part of the libgomp plugin interface. Unload BRIG module described by
+ struct brig_image_desc in TARGET_DATA from agent number N. */
+
+void
+GOMP_OFFLOAD_unload_image (int n, void *target_data)
+{
+ struct agent_info *agent;
+ agent = get_agent_info (n);
+ if (pthread_rwlock_wrlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
+
+ struct module_info *module = agent->first_module;
+ while (module)
+ {
+ if (module->image_desc == target_data)
+ break;
+ module = module->next;
+ }
+ if (!module)
+ GOMP_PLUGIN_fatal ("Attempt to unload an image that has never been "
+ "loaded before");
+
+ remove_module_from_agent (agent, module);
+ destroy_module (module);
+ free (module);
+ if (agent->prog_finalized)
+ destroy_hsa_program (agent);
+ if (pthread_rwlock_unlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+}
+
+/* Part of the libgomp plugin interface. Deinitialize all information and
+ status associated with agent number N. We do not attempt any
+ synchronization, assuming the user and libgomp will not attempt
+ deinitialization of a device that is in any way being used at the same
+ time. */
+
+void
+GOMP_OFFLOAD_fini_device (int n)
+{
+ struct agent_info *agent = get_agent_info (n);
+ if (!agent->initialized)
+ return;
+
+ struct module_info *next_module = agent->first_module;
+ while (next_module)
+ {
+ struct module_info *module = next_module;
+ next_module = module->next;
+ destroy_module (module);
+ free (module);
+ }
+ agent->first_module = NULL;
+ if (agent->prog_finalized)
+ destroy_hsa_program (agent);
+
+ hsa_status_t status = hsa_queue_destroy (agent->command_q);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error destroying command queue", status);
+ if (pthread_mutex_destroy (&agent->prog_mutex))
+ GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex");
+ if (pthread_rwlock_destroy (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent rwlock");
+ agent->initialized = false;
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void *
+GOMP_OFFLOAD_alloc (int ord, size_t size)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_alloc is not implemented because "
+ "it should never be called");
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void
+GOMP_OFFLOAD_free (int ord, void *ptr)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_free is not implemented because "
+ "it should never be called");
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void *
+GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
+ "it should never be called");
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void *
+GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
+ "it should never be called");
+}
@@ -992,9 +992,12 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
fn_addr = (void *) tgt_fn->tgt_offset;
}
- struct target_mem_desc *tgt_vars
- = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
- true);
+ struct target_mem_desc *tgt_vars;
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ tgt_vars = NULL;
+ else
+ tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
+ false, true);
struct gomp_thread old_thr, *thr = gomp_thread ();
old_thr = *thr;
memset (thr, '\0', sizeof (*thr));
@@ -1003,10 +1006,12 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
thr->place = old_thr.place;
thr->ts.place_partition_len = gomp_places_list_len;
}
- devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+ devicep->run_func (devicep->target_id, fn_addr,
+ tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs);
gomp_free_thread (thr);
*thr = old_thr;
- gomp_unmap_vars (tgt_vars, true);
+ if (tgt_vars)
+ gomp_unmap_vars (tgt_vars, true);
}
void
@@ -1016,9 +1021,10 @@ GOMP_target_data (int device, const void *unused, size_t mapnum,
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
+ || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
{
- /* Host fallback. */
+ /* Host fallback or accelerators with memory coherent access. */
struct gomp_task_icv *icv = gomp_icv (false);
if (icv->target_data)
{
@@ -111,6 +111,8 @@ FC = @FC@
FCFLAGS = @FCFLAGS@
FGREP = @FGREP@
GREP = @GREP@
+HSA_RUNTIME_INCLUDE = @HSA_RUNTIME_INCLUDE@
+HSA_RUNTIME_LIB = @HSA_RUNTIME_LIB@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
@@ -155,6 +157,10 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
+PLUGIN_HSA = @PLUGIN_HSA@
+PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@
+PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@
+PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@
PLUGIN_NVPTX = @PLUGIN_NVPTX@
PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@