@@ -1,3 +1,17 @@
+2019-08-13 Julian Brown <julian@codesourcery.com>
+
+ * plugin/plugin-gcn.c (struct copy_data): Add using_src_copy field.
+ (copy_data): Free temporary buffer if using.
+ (queue_push_copy): Add using_src_copy parameter.
+ (GOMP_OFFLOAD_dev2dev, GOMP_OFFLOAD_async_dev2host): Update calls to
+ queue_push_copy.
+ (GOMP_OFFLOAD_async_host2dev): Likewise. Allocate temporary buffer and
+ copy source data to it immediately.
+ * target.c (gomp_copy_host2dev): Update function comment.
+ (copy_host2dev_immediate): Remove.
+ (gomp_map_pointer, gomp_map_vars_internal): Replace calls to
+ copy_host2dev_immediate with calls to gomp_copy_host2dev.
+
2019-08-08 Julian Brown <julian@codesourcery.com>
* plugin/plugin-gcn.c (gcn_exec): Use 1 for the default number of
@@ -3063,6 +3063,7 @@ struct copy_data
const void *src;
size_t len;
bool use_hsa_memory_copy;
+ bool using_src_copy;
struct goacc_asyncqueue *aq;
};
@@ -3077,12 +3078,14 @@ copy_data (void *data_)
hsa_fns.hsa_memory_copy_fn (data->dst, data->src, data->len);
else
memcpy (data->dst, data->src, data->len);
+ if (data->using_src_copy)
+ free ((void *) data->src);
free (data);
}
static void
queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src,
- size_t len, bool use_hsa_memory_copy)
+ size_t len, bool use_hsa_memory_copy, bool using_src_copy)
{
if (DEBUG_QUEUES)
HSA_DEBUG ("queue_push_copy %d:%d: %zu bytes from (%p) to (%p)\n",
@@ -3093,6 +3096,7 @@ queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src,
data->src = src;
data->len = len;
data->use_hsa_memory_copy = use_hsa_memory_copy;
+ data->using_src_copy = using_src_copy;
data->aq = aq;
queue_push_callback (aq, copy_data, data);
}
@@ -3137,7 +3141,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst, const void *src, size_t n)
{
struct agent_info *agent = get_agent_info (device);
maybe_init_omp_async (agent);
- queue_push_copy (agent->omp_async_queue, dst, src, n, false);
+ queue_push_copy (agent->omp_async_queue, dst, src, n, false, false);
return true;
}
@@ -3469,7 +3473,15 @@ GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
{
struct agent_info *agent = get_agent_info (device);
assert (agent == aq->agent);
- queue_push_copy (aq, dst, src, n, image_address_p (agent, dst));
+ /* The source data does not necessarily remain live until the deferred
+ copy happens. Taking a snapshot of the data here avoids reading
+ uninitialised data later, but means that (a) data is copied twice and
+ (b) modifications to the copied data between the "spawning" point of
+ the asynchronous kernel and when it is executed will not be seen.
+ But, that is probably correct. */
+ void *src_copy = GOMP_PLUGIN_malloc (n);
+ memcpy (src_copy, src, n);
+ queue_push_copy (aq, dst, src_copy, n, image_address_p (agent, dst), true);
return true;
}
@@ -3479,7 +3491,7 @@ GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
{
struct agent_info *agent = get_agent_info (device);
assert (agent == aq->agent);
- queue_push_copy (aq, dst, src, n, image_address_p (agent, src));
+ queue_push_copy (aq, dst, src, n, image_address_p (agent, src), false);
return true;
}
@@ -303,10 +303,9 @@ gomp_to_device_kind_p (int kind)
}
/* Copy host memory to an offload device. In asynchronous mode (if AQ is
- non-NULL), this is only safe when the source memory is a global or heap
- location (otherwise a copy may take place from a dangling pointer to an
- expired stack frame). Use copy_host2dev_immediate for copies from stack
- locations. */
+ non-NULL), H may point to a stack location. It is up to the underlying
+ plugin to ensure that this data is read immediately, rather than at some
+ later point when the stack frame will likely have been destroyed. */
attribute_hidden void
gomp_copy_host2dev (struct gomp_device_descr *devicep,
@@ -346,17 +345,6 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
}
-/* Use this variant for host-to-device copies from stack locations that may not
- be live at the time an asynchronous copy operation takes place. */
-
-static void
-copy_host2dev_immediate (struct gomp_device_descr *devicep, void *d,
- const void *h, size_t sz,
- struct gomp_coalesce_buf *cbuf)
-{
- gomp_copy_host2dev (devicep, NULL, d, h, sz, cbuf);
-}
-
attribute_hidden void
gomp_copy_dev2host (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq,
@@ -617,10 +605,10 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
if (cur_node.host_start == (uintptr_t) NULL)
{
cur_node.tgt_offset = (uintptr_t) NULL;
- copy_host2dev_immediate (devicep,
- (void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset,
- sizeof (void *), cbuf);
+ gomp_copy_host2dev (devicep, aq,
+ (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset, sizeof (void *),
+ cbuf);
return;
}
/* Add bias to the pointer value. */
@@ -639,9 +627,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
array section. Now subtract bias to get what we want
to initialize the pointer with. */
cur_node.tgt_offset -= bias;
- copy_host2dev_immediate (devicep, (void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset, sizeof (void *),
- cbuf);
+ gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
}
static void
@@ -1460,13 +1447,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
if (cur_node.tgt_offset)
cur_node.tgt_offset -= sizes[i];
- copy_host2dev_immediate (devicep,
- (void *) (n->tgt->tgt_start
- + n->tgt_offset
- + cur_node.host_start
- - n->host_start),
- (void *) &cur_node.tgt_offset,
- sizeof (void *), cbufp);
+ gomp_copy_host2dev (devicep, aq,
+ (void *) (n->tgt->tgt_start
+ + n->tgt_offset
+ + cur_node.host_start
+ - n->host_start),
+ (void *) &cur_node.tgt_offset,
+ sizeof (void *), cbufp);
cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start - n->host_start;
continue;
@@ -1705,8 +1692,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
/* We intentionally do not use coalescing here, as it's not
data allocated by the current call to this function. */
- copy_host2dev_immediate (devicep, (void *) n->tgt_offset,
- &tgt_addr, sizeof (void *), NULL);
+ gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
+ &tgt_addr, sizeof (void *), NULL);
}
array++;
}
@@ -1828,9 +1815,10 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
for (i = 0; i < mapnum; i++)
{
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
- copy_host2dev_immediate (devicep,
- (void *) (tgt->tgt_start + i * sizeof (void *)),
- (void *) &cur_node.tgt_offset, sizeof (void *), cbufp);
+ gomp_copy_host2dev (devicep, aq,
+ (void *) (tgt->tgt_start + i * sizeof (void *)),
+ (void *) &cur_node.tgt_offset, sizeof (void *),
+ cbufp);
}
}