GCN: libgomp+mkoffload.cc: Prepare for reverse offload fn lookup
Add support to GCN for reverse lookup of function name to prepare for
'omp target device(ancestor:1)'.
gcc/ChangeLog:
* config/gcn/mkoffload.cc (process_asm): Create .offload_func_table,
similar to pre-existing .offload_var_table.
libgomp/ChangeLog:
* plugin/plugin-gcn.c (GOMP_OFFLOAD_load_image): Read
.offload_func_table to populate rev_fn_table when requested.
gcc/config/gcn/mkoffload.cc | 11 ++++++++++-
libgomp/plugin/plugin-gcn.c | 26 +++++++++++++++++++++++++-
2 files changed, 35 insertions(+), 2 deletions(-)
@@ -537,63 +537,72 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
case IN_VARS:
{
char *varname;
unsigned varsize;
if (sscanf (buf, " .8byte %ms\n", &varname))
{
fputs (buf, out);
fgets (buf, sizeof (buf), in);
if (!sscanf (buf, " .8byte %u\n", &varsize))
abort ();
var_count++;
}
break;
}
case IN_FUNCS:
{
char *funcname;
if (sscanf (buf, "\t.8byte\t%ms\n", &funcname))
{
+ fputs (buf, out);
obstack_ptr_grow (&fns_os, funcname);
fn_count++;
continue;
}
break;
}
}
char dummy;
if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0)
{
state = IN_VARS;
/* Add a global symbol to allow plugin-gcn.c to locate the table
at runtime. It can't use the "offload_var_table.N" emitted by
the compiler because a) they're not global, and b) there's one
for each input file combined into the binary. */
fputs (buf, out);
fputs ("\t.global .offload_var_table\n"
"\t.type .offload_var_table, @object\n"
".offload_var_table:\n",
out);
}
else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
- state = IN_FUNCS;
+ {
+ state = IN_FUNCS;
+ /* Likewise for .gnu.offload_vars; used for reverse offload. */
+ fputs (buf, out);
+ fputs ("\t.global .offload_func_table\n"
+ "\t.type .offload_func_table, @object\n"
+ ".offload_func_table:\n",
+ out);
+ }
else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
{
state = IN_METADATA;
regcount.kernel_name = NULL;
regcount.sgpr_count = regcount.vgpr_count = -1;
}
else if (sscanf (buf, " .section %c", &dummy) > 0
|| sscanf (buf, " .text%c", &dummy) > 0
|| sscanf (buf, " .bss%c", &dummy) > 0
|| sscanf (buf, " .data%c", &dummy) > 0
|| sscanf (buf, " .ident %c", &dummy) > 0)
state = IN_CODE;
else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0)
{
state = IN_CODE;
gcc_assert (regcount.kernel_name != NULL
&& regcount.sgpr_count >= 0
&& regcount.vgpr_count >= 0);
obstack_grow (®counts_os, ®count, sizeof (regcount));
@@ -3353,7 +3353,7 @@ GOMP_OFFLOAD_init_device (int n)
int
GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct addr_pair **target_table,
- uint64_t **rev_fn_table __attribute__((unused)))
+ uint64_t **rev_fn_table)
{
if (GOMP_VERSION_DEV (version) != GOMP_VERSION_GCN)
{
@@ -3520,6 +3520,30 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
if (module->fini_array_func)
kernel_count--;
+ if (rev_fn_table != NULL && kernel_count == 0)
+ *rev_fn_table = NULL;
+ else if (rev_fn_table != NULL)
+ {
+ hsa_status_t status;
+ hsa_executable_symbol_t var_symbol;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ ".offload_func_table",
+ agent->id, 0, &var_symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not find symbol for variable in the code object",
+ status);
+ uint64_t fn_table_addr;
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &fn_table_addr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+ *rev_fn_table = GOMP_PLUGIN_malloc (kernel_count * sizeof (uint64_t));
+ GOMP_OFFLOAD_dev2host (agent->device_id, *rev_fn_table,
+ (void*) fn_table_addr,
+ kernel_count * sizeof (uint64_t));
+ }
+
return kernel_count + var_count + other_count;
}