commit ac4269627c5b3f5d5c20fab7517c066ae6dfce74
Author: Julian Brown <julian@codesourcery.com>
Date: Mon Nov 2 06:31:47 2015 -0800
OpenACC host_data support using mapping regions.
@@ -1250,6 +1250,7 @@ static const struct omp_pragma_def oacc_pragmas[] = {
{ "data", PRAGMA_OACC_DATA },
{ "enter", PRAGMA_OACC_ENTER_DATA },
{ "exit", PRAGMA_OACC_EXIT_DATA },
+ { "host_data", PRAGMA_OACC_HOST_DATA },
{ "kernels", PRAGMA_OACC_KERNELS },
{ "loop", PRAGMA_OACC_LOOP },
{ "parallel", PRAGMA_OACC_PARALLEL },
@@ -32,6 +32,7 @@ enum pragma_kind {
PRAGMA_OACC_DATA,
PRAGMA_OACC_ENTER_DATA,
PRAGMA_OACC_EXIT_DATA,
+ PRAGMA_OACC_HOST_DATA,
PRAGMA_OACC_KERNELS,
PRAGMA_OACC_LOOP,
PRAGMA_OACC_PARALLEL,
@@ -165,6 +166,7 @@ enum pragma_omp_clause {
PRAGMA_OACC_CLAUSE_SELF,
PRAGMA_OACC_CLAUSE_SEQ,
PRAGMA_OACC_CLAUSE_TILE,
+ PRAGMA_OACC_CLAUSE_USE_DEVICE,
PRAGMA_OACC_CLAUSE_VECTOR,
PRAGMA_OACC_CLAUSE_VECTOR_LENGTH,
PRAGMA_OACC_CLAUSE_WAIT,
@@ -10139,6 +10139,8 @@ c_parser_omp_clause_name (c_parser *parser)
result = PRAGMA_OMP_CLAUSE_UNTIED;
else if (!strcmp ("use_device_ptr", p))
result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR;
+ else if (!strcmp ("use_device", p))
+ result = PRAGMA_OACC_CLAUSE_USE_DEVICE;
break;
case 'v':
if (!strcmp ("vector", p))
@@ -11485,6 +11487,15 @@ c_parser_oacc_clause_tile (c_parser *parser, tree list)
return c;
}
+/* OpenACC 2.0:
+ use_device ( variable-list ) */
+
+static tree
+c_parser_oacc_clause_use_device (c_parser *parser, tree list)
+{
+ return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list);
+}
+
/* OpenACC:
wait ( int-expr-list ) */
@@ -12786,6 +12797,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask,
clauses = c_parser_oacc_data_clause (parser, c_kind, clauses);
c_name = "self";
break;
+ case PRAGMA_OACC_CLAUSE_USE_DEVICE:
+ clauses = c_parser_oacc_clause_use_device (parser, clauses);
+ c_name = "use_device";
+ break;
case PRAGMA_OACC_CLAUSE_SEQ:
clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ,
clauses);
@@ -13280,6 +13295,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter)
/* OpenACC 2.0:
+ # pragma acc host_data oacc-data-clause[optseq] new-line
+ structured-block
+*/
+
+#define OACC_HOST_DATA_CLAUSE_MASK \
+ ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) )
+
+static tree
+c_parser_oacc_host_data (location_t loc, c_parser *parser)
+{
+ tree stmt, clauses, block;
+
+ clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK,
+ "#pragma acc host_data");
+
+ block = c_begin_omp_parallel ();
+ add_stmt (c_parser_omp_structured_block (parser));
+ stmt = c_finish_oacc_host_data (loc, clauses, block);
+ return stmt;
+}
+
+
+/* OpenACC 2.0:
# pragma acc loop oacc-loop-clause[optseq] new-line
structured-block
@@ -16573,6 +16611,9 @@ c_parser_omp_construct (c_parser *parser)
case PRAGMA_OACC_DATA:
stmt = c_parser_oacc_data (loc, parser);
break;
+ case PRAGMA_OACC_HOST_DATA:
+ stmt = c_parser_oacc_host_data (loc, parser);
+ break;
case PRAGMA_OACC_KERNELS:
case PRAGMA_OACC_PARALLEL:
strcpy (p_name, "#pragma acc");
@@ -642,6 +642,7 @@ extern tree c_finish_goto_ptr (location_t, tree);
extern tree c_expr_to_decl (tree, bool *, bool *);
extern tree c_finish_omp_construct (location_t, enum tree_code, tree, tree);
extern tree c_finish_oacc_data (location_t, tree, tree);
+extern tree c_finish_oacc_host_data (location_t, tree, tree);
extern tree c_begin_omp_parallel (void);
extern tree c_finish_omp_parallel (location_t, tree, tree);
extern tree c_begin_omp_task (void);
@@ -11541,6 +11541,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block)
return add_stmt (stmt);
}
+/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound
+ statement. LOC is the location of the OACC_HOST_DATA. */
+
+tree
+c_finish_oacc_host_data (location_t loc, tree clauses, tree block)
+{
+ tree stmt;
+
+ block = c_end_compound_stmt (loc, block, true);
+
+ stmt = make_node (OACC_HOST_DATA);
+ TREE_TYPE (stmt) = void_type_node;
+ OACC_HOST_DATA_CLAUSES (stmt) = clauses;
+ OACC_HOST_DATA_BODY (stmt) = block;
+ SET_EXPR_LOCATION (stmt, loc);
+
+ return add_stmt (stmt);
+}
+
/* Like c_begin_compound_stmt, except force the retention of the BLOCK. */
tree
@@ -12981,6 +13000,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd)
bitmap_set_bit (&map_head, DECL_UID (t));
goto check_dup_generic;
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_IS_DEVICE_PTR:
case OMP_CLAUSE_USE_DEVICE_PTR:
t = OMP_CLAUSE_DECL (c);
@@ -6317,6 +6317,7 @@ extern void finish_omp_threadprivate (tree);
extern tree begin_omp_structured_block (void);
extern tree finish_omp_structured_block (tree);
extern tree finish_oacc_data (tree, tree);
+extern tree finish_oacc_host_data (tree, tree);
extern tree finish_omp_construct (enum tree_code, tree, tree);
extern tree begin_omp_parallel (void);
extern tree finish_omp_parallel (tree, tree);
@@ -29262,6 +29262,8 @@ cp_parser_omp_clause_name (cp_parser *parser)
result = PRAGMA_OMP_CLAUSE_UNTIED;
else if (!strcmp ("use_device_ptr", p))
result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR;
+ else if (!strcmp ("use_device", p))
+ result = PRAGMA_OACC_CLAUSE_USE_DEVICE;
break;
case 'v':
if (!strcmp ("vector", p))
@@ -31614,6 +31616,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask,
clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses);
c_name = "self";
break;
+ case PRAGMA_OACC_CLAUSE_USE_DEVICE:
+ clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE,
+ clauses);
+ c_name = "use_device";
+ break;
case PRAGMA_OACC_CLAUSE_SEQ:
clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ,
clauses, here);
@@ -34525,6 +34532,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok)
return stmt;
}
+#define OACC_HOST_DATA_CLAUSE_MASK \
+ ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) )
+
+/* OpenACC 2.0:
+ # pragma acc host_data <clauses> new-line
+ structured-block */
+
+static tree
+cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok)
+{
+ tree stmt, clauses, block;
+ unsigned int save;
+
+ clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK,
+ "#pragma acc host_data", pragma_tok);
+
+ block = begin_omp_parallel ();
+ save = cp_parser_begin_omp_structured_block (parser);
+ cp_parser_statement (parser, NULL_TREE, false, NULL);
+ cp_parser_end_omp_structured_block (parser, save);
+ stmt = finish_oacc_host_data (clauses, block);
+ return stmt;
+}
+
/* OpenACC 2.0:
# pragma acc enter data oacc-enter-data-clause[optseq] new-line
@@ -35789,6 +35820,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok)
case PRAGMA_OACC_EXIT_DATA:
stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false);
break;
+ case PRAGMA_OACC_HOST_DATA:
+ stmt = cp_parser_oacc_host_data (parser, pragma_tok);
+ break;
case PRAGMA_OACC_KERNELS:
case PRAGMA_OACC_PARALLEL:
strcpy (p_name, "#pragma acc");
@@ -36363,6 +36397,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context)
case PRAGMA_OACC_DATA:
case PRAGMA_OACC_ENTER_DATA:
case PRAGMA_OACC_EXIT_DATA:
+ case PRAGMA_OACC_HOST_DATA:
case PRAGMA_OACC_KERNELS:
case PRAGMA_OACC_PARALLEL:
case PRAGMA_OACC_LOOP:
@@ -6810,6 +6810,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd)
}
break;
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_IS_DEVICE_PTR:
case OMP_CLAUSE_USE_DEVICE_PTR:
field_ok = allow_fields;
@@ -7365,6 +7366,24 @@ finish_oacc_data (tree clauses, tree block)
return add_stmt (stmt);
}
+/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound
+ statement. */
+
+tree
+finish_oacc_host_data (tree clauses, tree block)
+{
+ tree stmt;
+
+ block = finish_omp_structured_block (block);
+
+ stmt = make_node (OACC_HOST_DATA);
+ TREE_TYPE (stmt) = void_type_node;
+ OACC_HOST_DATA_CLAUSES (stmt) = clauses;
+ OACC_HOST_DATA_BODY (stmt) = block;
+
+ return add_stmt (stmt);
+}
+
/* Generate OMP construct CODE, with BODY and CLAUSES as its compound
statement. */
@@ -1353,6 +1353,9 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs,
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
kind = " oacc_enter_exit_data";
break;
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ kind = " oacc_host_data";
+ break;
default:
gcc_unreachable ();
}
@@ -170,6 +170,7 @@ enum gf_mask {
GF_OMP_TARGET_KIND_OACC_DATA = 7,
GF_OMP_TARGET_KIND_OACC_UPDATE = 8,
GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9,
+ GF_OMP_TARGET_KIND_OACC_HOST_DATA = 10,
/* True on an GIMPLE_OMP_RETURN statement if the return does not require
a thread synchronization via some sort of barrier. The exact barrier
@@ -6004,6 +6005,7 @@ is_gimple_omp_oacc (const gimple *stmt)
case GF_OMP_TARGET_KIND_OACC_DATA:
case GF_OMP_TARGET_KIND_OACC_UPDATE:
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
return true;
default:
return false;
@@ -6294,6 +6294,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
case OMP_TARGET_DATA:
case OMP_TARGET_ENTER_DATA:
case OMP_TARGET_EXIT_DATA:
+ case OACC_HOST_DATA:
ctx->target_firstprivatize_array_bases = true;
default:
break;
@@ -6559,6 +6560,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
case OMP_TARGET_DATA:
case OMP_TARGET_ENTER_DATA:
case OMP_TARGET_EXIT_DATA:
+ case OACC_HOST_DATA:
if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
|| (OMP_CLAUSE_MAP_KIND (c)
== GOMP_MAP_FIRSTPRIVATE_REFERENCE))
@@ -6968,6 +6970,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
}
goto do_notice;
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT;
goto do_add;
@@ -7203,7 +7206,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
break;
case OMP_CLAUSE_DEVICE_RESIDENT:
- case OMP_CLAUSE_USE_DEVICE:
remove = true;
break;
@@ -8961,6 +8963,9 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
case OMP_TEAMS:
ort = OMP_TEAMS_COMBINED (expr) ? ORT_COMBINED_TEAMS : ORT_TEAMS;
break;
+ case OACC_HOST_DATA:
+ ort = ORT_TARGET_DATA;
+ break;
default:
gcc_unreachable ();
}
@@ -8982,6 +8987,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
switch (TREE_CODE (expr))
{
case OACC_DATA:
+ case OACC_HOST_DATA:
end_ix = BUILT_IN_GOACC_DATA_END;
break;
case OMP_TARGET_DATA:
@@ -9013,6 +9019,10 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_KERNELS,
OMP_CLAUSES (expr));
break;
+ case OACC_HOST_DATA:
+ stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_HOST_DATA,
+ OMP_CLAUSES (expr));
+ break;
case OACC_PARALLEL:
stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_PARALLEL,
OMP_CLAUSES (expr));
@@ -10122,12 +10132,12 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
ret = GS_ALL_DONE;
break;
- case OACC_HOST_DATA:
case OACC_DECLARE:
sorry ("directive not yet implemented");
ret = GS_ALL_DONE;
break;
+ case OACC_HOST_DATA:
case OACC_DATA:
case OACC_KERNELS:
case OACC_PARALLEL:
@@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update",
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
BT_FN_VOID_INT_INT_VAR,
ATTR_NOTHROW_LIST)
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_HOST_DATA, "GOACC_host_data",
+ BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
@@ -390,8 +390,8 @@ scan_omp_op (tree *tp, omp_context *ctx)
}
static void lower_omp (gimple_seq *, omp_context *);
-static tree lookup_decl_in_outer_ctx (tree, omp_context *);
-static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
+static tree lookup_decl_in_outer_ctx (tree, omp_context *, bool = false);
+static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *, bool = false);
/* Find an OMP clause of type KIND within CLAUSES. */
@@ -1935,6 +1935,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
install_var_local (decl, ctx);
break;
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
decl = OMP_CLAUSE_DECL (c);
if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
@@ -2134,7 +2135,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
break;
case OMP_CLAUSE_DEVICE_RESIDENT:
- case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE__CACHE_:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
@@ -2288,6 +2288,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_NOGROUP:
case OMP_CLAUSE_DEFAULTMAP:
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
case OMP_CLAUSE__CILK_FOR_COUNT_:
case OMP_CLAUSE_ASYNC:
@@ -2302,7 +2303,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
break;
case OMP_CLAUSE_DEVICE_RESIDENT:
- case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE__CACHE_:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
@@ -3608,6 +3608,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break;
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
stmt_name = "enter/exit data"; break;
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
+ break;
default: gcc_unreachable ();
}
switch (gimple_omp_target_kind (ctx->stmt))
@@ -3619,6 +3621,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
case GF_OMP_TARGET_KIND_OACC_KERNELS:
ctx_stmt_name = "kernels"; break;
case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ ctx_stmt_name = "host_data"; break;
default: gcc_unreachable ();
}
@@ -3941,13 +3945,22 @@ maybe_lookup_ctx (gimple *stmt)
parallelism happens only rarely. */
static tree
-lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
+lookup_decl_in_outer_ctx (tree decl, omp_context *ctx,
+ bool skip_hostdata)
{
tree t;
omp_context *up;
for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
- t = maybe_lookup_decl (decl, up);
+ {
+ if (skip_hostdata
+ && gimple_code (up->stmt) == GIMPLE_OMP_TARGET
+ && gimple_omp_target_kind (up->stmt)
+ == GF_OMP_TARGET_KIND_OACC_HOST_DATA)
+ continue;
+
+ t = maybe_lookup_decl (decl, up);
+ }
gcc_assert (!ctx->is_nested || t || is_global_var (decl));
@@ -3959,13 +3972,22 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
in outer contexts. */
static tree
-maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
+maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx,
+ bool skip_hostdata)
{
tree t = NULL;
omp_context *up;
for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
- t = maybe_lookup_decl (decl, up);
+ {
+ if (skip_hostdata
+ && gimple_code (up->stmt) == GIMPLE_OMP_TARGET
+ && gimple_omp_target_kind (up->stmt)
+ == GF_OMP_TARGET_KIND_OACC_HOST_DATA)
+ continue;
+
+ t = maybe_lookup_decl (decl, up);
+ }
return t ? t : decl;
}
@@ -12458,6 +12480,7 @@ expand_omp_target (struct omp_region *region)
break;
case GF_OMP_TARGET_KIND_DATA:
case GF_OMP_TARGET_KIND_OACC_DATA:
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
data_region = true;
break;
default:
@@ -12697,6 +12720,9 @@ expand_omp_target (struct omp_region *region)
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
break;
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ start_ix = BUILT_IN_GOACC_HOST_DATA;
+ break;
default:
gcc_unreachable ();
}
@@ -12820,6 +12846,7 @@ expand_omp_target (struct omp_region *region)
{
case BUILT_IN_GOACC_DATA_START:
case BUILT_IN_GOMP_TARGET_DATA:
+ case BUILT_IN_GOACC_HOST_DATA:
break;
case BUILT_IN_GOMP_TARGET:
case BUILT_IN_GOMP_TARGET_UPDATE:
@@ -13127,6 +13154,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
case GF_OMP_TARGET_KIND_OACC_KERNELS:
case GF_OMP_TARGET_KIND_OACC_DATA:
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
break;
case GF_OMP_TARGET_KIND_UPDATE:
case GF_OMP_TARGET_KIND_ENTER_DATA:
@@ -14920,6 +14948,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
break;
case GF_OMP_TARGET_KIND_DATA:
case GF_OMP_TARGET_KIND_OACC_DATA:
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
data_region = true;
break;
default:
@@ -15025,7 +15054,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
{
- if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))
+ if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx,
+ true))
&& varpool_node::get_create (var)->offloadable)
continue;
@@ -15124,6 +15154,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
}
break;
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
case OMP_CLAUSE_IS_DEVICE_PTR:
var = OMP_CLAUSE_DECL (c);
@@ -15262,7 +15293,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
talign = DECL_ALIGN_UNIT (ovar);
if (nc)
{
- var = lookup_decl_in_outer_ctx (ovar, ctx);
+ var = lookup_decl_in_outer_ctx (ovar, ctx, true);
x = build_sender_ref (ovar, ctx);
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
@@ -15509,12 +15540,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
build_int_cstu (tkind_type, tkind));
break;
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
case OMP_CLAUSE_IS_DEVICE_PTR:
ovar = OMP_CLAUSE_DECL (c);
var = lookup_decl_in_outer_ctx (ovar, ctx);
x = build_sender_ref (ovar, ctx);
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE)
tkind = GOMP_MAP_USE_DEVICE_PTR;
else
tkind = GOMP_MAP_FIRSTPRIVATE_INT;
@@ -15717,10 +15750,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gimple_build_assign (new_var, x));
}
break;
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
case OMP_CLAUSE_IS_DEVICE_PTR:
var = OMP_CLAUSE_DECL (c);
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE)
x = build_sender_ref (var, ctx);
else
x = build_receiver_ref (var, false, ctx);
@@ -16707,6 +16742,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region,
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
case GF_OMP_TARGET_KIND_OACC_KERNELS:
case GF_OMP_TARGET_KIND_OACC_DATA:
+ case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
break;
case GF_OMP_TARGET_KIND_UPDATE:
case GF_OMP_TARGET_KIND_ENTER_DATA:
@@ -1072,6 +1072,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_SHARED:
case OMP_CLAUSE_TO_DECLARE:
case OMP_CLAUSE_LINK:
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
case OMP_CLAUSE_IS_DEVICE_PTR:
do_decl_clause:
@@ -1719,6 +1720,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_SHARED:
case OMP_CLAUSE_TO_DECLARE:
case OMP_CLAUSE_LINK:
+ case OMP_CLAUSE_USE_DEVICE:
case OMP_CLAUSE_USE_DEVICE_PTR:
case OMP_CLAUSE_IS_DEVICE_PTR:
do_decl_clause:
@@ -393,6 +393,7 @@ GOACC_2.0 {
GOACC_2.0.1 {
global:
GOACC_parallel_keyed;
+ GOACC_host_data;
} GOACC_2.0;
GOMP_PLUGIN_1.0 {
@@ -490,6 +490,46 @@ GOACC_wait (int async, int num_waits, ...)
goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
}
+void
+GOACC_host_data (int device, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ struct target_mem_desc *tgt;
+
+#ifdef HAVE_INTTYPES_H
+ gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
+ __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
+#else
+ gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
+ __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
+#endif
+
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ /* Host fallback or 'do nothing'. */
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ || host_fallback)
+ {
+ tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+ GOMP_MAP_VARS_OPENACC);
+ tgt->prev = thr->mapped_data;
+ thr->mapped_data = tgt;
+
+ return;
+ }
+
+ gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
+ tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ GOMP_MAP_VARS_OPENACC);
+ gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
+ tgt->prev = thr->mapped_data;
+ thr->mapped_data = tgt;
+}
+
int
GOACC_get_num_threads (void)
{
new file mode 100644
@@ -0,0 +1,118 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+
+void
+saxpy_host (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ y[i] = y[i] + a * x[i];
+}
+
+#pragma acc routine
+void
+saxpy_target (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ y[i] = y[i] + a * x[i];
+}
+
+int
+main(int argc, char **argv)
+{
+#define N 8
+ int i;
+ float x_ref[N], y_ref[N];
+ float x[N], y[N];
+ cublasHandle_t h;
+ float a = 2.0;
+
+#pragma acc data copyin (x[0:N]) copy (y[0:N])
+ {
+ float *xp, *yp;
+#pragma acc host_data use_device (x, y)
+ {
+#pragma acc parallel pcopy (xp, yp)
+ {
+ xp = x;
+ yp = y;
+ }
+ }
+
+ if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ x[i] = x_ref[i] = 4.0 + i;
+ y[i] = y_ref[i] = 3.0;
+ }
+
+ saxpy_host (N, a, x_ref, y_ref);
+
+ cublasCreate (&h);
+
+#pragma acc data copyin (x[0:N]) copy (y[0:N])
+ {
+#pragma acc host_data use_device (x, y)
+ {
+ cublasSaxpy (h, N, &a, x, 1, y, 1);
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (y[i] != y_ref[i])
+ abort ();
+ }
+
+#pragma acc data create (x[0:N]) copyout (y[0:N])
+ {
+#pragma acc kernels
+ for (i = 0; i < N; i++)
+ y[i] = 3.0;
+
+#pragma acc host_data use_device (x, y)
+ {
+ cublasSaxpy (h, N, &a, x, 1, y, 1);
+ }
+ }
+
+ cublasDestroy (h);
+
+ for (i = 0; i < N; i++)
+ {
+ if (y[i] != y_ref[i])
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ y[i] = 3.0;
+
+#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
+ {
+#pragma acc host_data use_device (x, y)
+ {
+#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
+ saxpy_target (N, a, x, y);
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (y[i] != y_ref[i])
+ abort ();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+char *global_in_host;
+
+void foo (char *in)
+{
+ if (!acc_is_present (global_in_host, sizeof (*global_in_host))
+ || in != acc_deviceptr (global_in_host))
+ abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+ char mydata[1024];
+
+ global_in_host = mydata;
+
+#pragma acc data copyin(mydata)
+ {
+#pragma acc host_data use_device (mydata)
+ {
+ foo (mydata);
+ }
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+#define N 1024
+
+int main (int argc, char* argv[])
+{
+ int x[N];
+
+#pragma acc data copyin (x[0:N])
+ {
+ int *xp;
+#pragma acc host_data use_device (x)
+ {
+#pragma acc parallel present (x) copyout (xp)
+ {
+ xp = x;
+ }
+ }
+
+ if (xp != acc_deviceptr (x))
+ abort ();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+#define N 1024
+
+int main (int argc, char* argv[])
+{
+ int x[N], *xp2;
+
+#pragma acc data copyin (x[0:N])
+ {
+ int *xp;
+#pragma acc host_data use_device (x)
+ {
+#pragma acc data present (x)
+ {
+ xp = x;
+ }
+ xp2 = x;
+ }
+
+ if (xp != acc_deviceptr (x) || xp2 != xp)
+ abort ();
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,38 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+#define N 1024
+
+int main (int argc, char* argv[])
+{
+ int x[N], y[N], *yp;
+
+ yp = y + 1;
+
+#pragma acc data copyin (x[0:N])
+ {
+ int *xp, *yp2;
+#pragma acc host_data use_device (x)
+ {
+#pragma acc data present (x) copyin (y)
+ {
+#pragma acc host_data use_device (yp)
+ {
+ xp = x;
+ yp2 = yp;
+ }
+
+ if (yp2 != acc_deviceptr (yp))
+ abort ();
+ }
+ }
+
+ if (xp != acc_deviceptr (x))
+ abort ();
+
+ }
+
+ return 0;
+}