@@ -1,3 +1,10 @@
+2019-08-13 Julian Brown <julian@codesourcery.com>
+
+ * omp-oacc-kernels.c (add_wait): New function, split out of...
+ (add_async_clauses_and_wait): ...here. Call new outlined function.
+ (decompose_kernels_region_body): Add wait at the end of
+ explicitly-asynchronous kernels regions.
+
2019-08-08 Julian Brown <julian@codesourcery.com>
* config/gcn/gcn.c (gcn_goacc_validate_dims): Ensure
@@ -900,6 +900,18 @@ maybe_build_inner_data_region (location_t loc, gimple *body,
return body;
}
+static void
+add_wait (location_t loc, gimple_seq *region_body)
+{
+ /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */
+ tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
+ tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
+ gimple *wait_call = gimple_build_call (wait_fn, 2,
+ sync_arg, integer_zero_node);
+ gimple_set_location (wait_call, loc);
+ gimple_seq_add_stmt (region_body, wait_call);
+}
+
/* Helper function of decompose_kernels_region_body. The statements in
REGION_BODY are expected to be decomposed parallel regions; add an
"async" clause to each. Also add a "wait" pragma at the end of the
@@ -923,13 +935,7 @@ add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt),
target_clauses);
}
- /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */
- tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
- tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
- gimple *wait_call = gimple_build_call (wait_fn, 2,
- sync_arg, integer_zero_node);
- gimple_set_location (wait_call, loc);
- gimple_seq_add_stmt (region_body, wait_call);
+ add_wait (loc, region_body);
}
/* Auxiliary analysis of the body of a kernels region, to determine for each
@@ -1378,6 +1384,14 @@ decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
a wait directive at the end. */
if (async_clause == NULL)
add_async_clauses_and_wait (loc, ®ion_body);
+ else
+ /* !!! If we have asynchronous parallel blocks inside a (synchronous) data
+ region, then target memory will get unmapped at the point the data
+ region ends, even if the inner asynchronous parallels have not yet
+ completed. For kernels marked "async", we might want to use "enter data
+ async(...)" and "exit data async(...)" instead.
+ For now, insert a (synchronous) wait at the end of the block. */
+ add_wait (loc, ®ion_body);
tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body));
gimple *body = gimple_build_bind (kernels_locals, region_body,