@@ -6385,6 +6385,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
case OMP_CLAUSE_MERGEABLE:
case OMP_CLAUSE_PROC_BIND:
case OMP_CLAUSE_SAFELEN:
+ case OMP_CLAUSE_TILE:
break;
case OMP_CLAUSE_ALIGNED:
@@ -6770,6 +6771,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p)
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_TILE:
break;
default:
@@ -8410,21 +8412,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
break;
case OACC_KERNELS:
- if (OACC_KERNELS_COMBINED (*expr_p))
- sorry ("directive not yet implemented");
- else
- gimplify_omp_workshare (expr_p, pre_p);
- ret = GS_ALL_DONE;
- break;
-
case OACC_PARALLEL:
- if (OACC_PARALLEL_COMBINED (*expr_p))
- sorry ("directive not yet implemented");
- else
- gimplify_omp_workshare (expr_p, pre_p);
- ret = GS_ALL_DONE;
- break;
-
case OACC_DATA:
case OMP_SECTIONS:
case OMP_SINGLE:
@@ -1928,6 +1928,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_BIND:
+ case OMP_CLAUSE_NOHOST:
+ case OMP_CLAUSE_TILE:
sorry ("Clause not supported yet");
break;
@@ -2055,6 +2058,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_BIND:
+ case OMP_CLAUSE_NOHOST:
+ case OMP_CLAUSE_TILE:
sorry ("Clause not supported yet");
break;
@@ -2742,7 +2748,10 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx)
{
for (omp_context *ctx_ = ctx; ctx_ != NULL; ctx_ = ctx_->outer)
if (is_gimple_omp (ctx_->stmt)
- && is_gimple_omp_oacc (ctx_->stmt))
+ && is_gimple_omp_oacc (ctx_->stmt)
+ /* Except for atomic codes that we share with OpenMP. */
+ && ! (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD
+ || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE))
{
error_at (gimple_location (stmt),
"non-OpenACC construct inside of OpenACC region");
@@ -390,7 +390,19 @@ enum omp_clause_code {
OMP_CLAUSE_NUM_WORKERS,
/* OpenACC clause: vector_length (integer-expression). */
- OMP_CLAUSE_VECTOR_LENGTH
+ OMP_CLAUSE_VECTOR_LENGTH,
+
+ /* OpenACC clause: bind ( identifer | string ). */
+ OMP_CLAUSE_BIND,
+
+ /* OpenACC clause: nohost. */
+ OMP_CLAUSE_NOHOST,
+
+ /* OpenACC clause: tile ( size-expr-list ). */
+ OMP_CLAUSE_TILE,
+
+ /* OpenACC clause: device_type ( device-type-list). */
+ OMP_CLAUSE_DEVICE_TYPE
};
#undef DEFTREESTRUCT
@@ -799,6 +799,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags)
case OMP_CLAUSE_INDEPENDENT:
pp_string (pp, "independent");
break;
+ case OMP_CLAUSE_TILE:
+ pp_string (pp, "tile(");
+ dump_generic_node (pp, OMP_CLAUSE_TILE_LIST (clause),
+ spc, flags, false);
+ pp_right_paren (pp);
+ break;
default:
/* Should never happen. */
@@ -369,6 +369,10 @@ unsigned const char omp_clause_num_ops[] =
1, /* OMP_CLAUSE_NUM_GANGS */
1, /* OMP_CLAUSE_NUM_WORKERS */
1, /* OMP_CLAUSE_VECTOR_LENGTH */
+ 1, /* OMP_CLAUSE_BIND */
+ 0, /* OMP_CLAUSE_NOHOST */
+ 1, /* OMP_CLAUSE_TILE */
+ 2 /* OMP_CLAUSE_DEVICE_TYPE */
};
const char * const omp_clause_code_name[] =
@@ -427,7 +431,11 @@ const char * const omp_clause_code_name[] =
"vector",
"num_gangs",
"num_workers",
- "vector_length"
+ "vector_length",
+ "bind",
+ "nohost",
+ "tile",
+ "device_type"
};
@@ -11237,6 +11245,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__SIMDUID_:
case OMP_CLAUSE__CILK_FOR_COUNT_:
+ case OMP_CLAUSE_BIND:
WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
/* FALLTHRU */
@@ -11255,6 +11264,8 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
case OMP_CLAUSE_TASKGROUP:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_NOHOST:
+ case OMP_CLAUSE_TILE:
WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
case OMP_CLAUSE_LASTPRIVATE:
@@ -1312,15 +1312,6 @@ extern void protected_set_expr_location (tree, location_t);
#define OMP_SECTION_LAST(NODE) \
(OMP_SECTION_CHECK (NODE)->base.private_flag)
-/* True on an OACC_KERNELS statement if is represents combined kernels loop
- directive. */
-#define OACC_KERNELS_COMBINED(NODE) \
- (OACC_KERNELS_CHECK (NODE)->base.private_flag)
-
-/* Like OACC_KERNELS_COMBINED, but for parallel loop directive. */
-#define OACC_PARALLEL_COMBINED(NODE) \
- (OACC_PARALLEL_CHECK (NODE)->base.private_flag)
-
/* True on an OMP_PARALLEL statement if it represents an explicit
combined parallel work-sharing constructs. */
#define OMP_PARALLEL_COMBINED(NODE) \
@@ -1391,6 +1382,9 @@ extern void protected_set_expr_location (tree, location_t);
#define OMP_CLAUSE_VECTOR_LENGTH_EXPR(NODE) \
OMP_CLAUSE_OPERAND ( \
OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_VECTOR_LENGTH), 0)
+#define OMP_CLAUSE_BIND_NAME(NODE) \
+ OMP_CLAUSE_OPERAND ( \
+ OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_BIND), 0)
#define OMP_CLAUSE_DEPEND_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEPEND)->omp_clause.subcode.depend_kind)
@@ -1495,6 +1489,15 @@ extern void protected_set_expr_location (tree, location_t);
#define OMP_CLAUSE_DEFAULT_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEFAULT)->omp_clause.subcode.default_kind)
+#define OMP_CLAUSE_TILE_LIST(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 0)
+
+#define OMP_CLAUSE_DEVICE_TYPE_DEVICES(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE), 0)
+
+#define OMP_CLAUSE_DEVICE_TYPE_CLAUSES(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE), 1)
+
/* SSA_NAME accessors. */
/* Returns the IDENTIFIER_NODE giving the SSA name a name or NULL_TREE
@@ -70,6 +70,10 @@ enum gomp_map_kind
/* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is implicitly
POINTER_SIZE_UNITS. */
GOMP_MAP_FORCE_DEVICEPTR = (GOMP_MAP_FLAG_SPECIAL_1 | 0),
+ /* OpenACC device_resident. */
+ GOMP_MAP_DEVICE_RESIDENT = (GOMP_MAP_FLAG_SPECIAL_1 | 1),
+ /* OpenACC link. */
+ GOMP_MAP_LINK = (GOMP_MAP_FLAG_SPECIAL_1 | 2),
/* Allocate. */
GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC),
/* ..., and copy to device. */
@@ -479,6 +479,9 @@ update_dev_host (int is_dev, void *h, size_t s)
{
splay_tree_key n;
void *d;
+
+ goacc_lazy_initialize ();
+
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
@@ -424,3 +424,31 @@
"st.param.u32 [%out_retval],%retval;\n" \
"ret;\n" \
"}\n"
+
+ #define GOMP_ATOMIC_PTX \
+ ".version 3.1\n" \
+ ".target sm_30\n" \
+ ".address_size 64\n" \
+ ".global .align 4 .u32 libgomp_ptx_lock;\n" \
+ ".visible .func GOMP_atomic_start;\n" \
+ ".visible .func GOMP_atomic_start\n" \
+ "{\n" \
+ " .reg .pred %p<2>;\n" \
+ " .reg .s32 %r<2>;\n" \
+ " .reg .s64 %rd<2>;\n" \
+ "BB5_1:\n" \
+ " mov.u64 %rd1, libgomp_ptx_lock;\n" \
+ " atom.global.cas.b32 %r1, [%rd1], 0, 1;\n" \
+ " setp.ne.s32 %p1, %r1, 0;\n" \
+ " @%p1 bra BB5_1;\n" \
+ " ret;\n" \
+ "}\n" \
+ ".visible .func GOMP_atomic_end;\n" \
+ ".visible .func GOMP_atomic_end\n" \
+ "{\n" \
+ " .reg .s32 %r<2>;\n" \
+ " .reg .s64 %rd<2>;\n" \
+ " mov.u64 %rd1, libgomp_ptx_lock;\n" \
+ " atom.global.exch.b32 %r1, [%rd1], 0;\n" \
+ " ret;\n" \
+ "}\n"
@@ -863,6 +863,16 @@ link_ptx (CUmodule *module, char *ptx_code)
cuda_error (r));
}
+ char *gomp_atomic_ptx = GOMP_ATOMIC_PTX;
+ r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, gomp_atomic_ptx,
+ strlen (gomp_atomic_ptx) + 1, 0, 0, 0, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+ GOMP_PLUGIN_fatal ("cuLinkAddData (gomp_atomic_ptx) error: %s",
+ cuda_error (r));
+ }
+
r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code,
strlen (ptx_code) + 1, 0, 0, 0, 0);
if (r != CUDA_SUCCESS)