@@ -1002,6 +1002,7 @@ struct gomp_device_descr
enum gomp_map_vars_kind
{
GOMP_MAP_VARS_OPENACC,
+ GOMP_MAP_VARS_OPENACC_ENTER_DATA,
GOMP_MAP_VARS_TARGET,
GOMP_MAP_VARS_DATA,
GOMP_MAP_VARS_ENTER_DATA
@@ -1010,7 +1011,8 @@ enum gomp_map_vars_kind
struct gomp_coalesce_buf;
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
-extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
+extern void gomp_acc_remove_pointer (void **, size_t *, unsigned short *,
+ int, void *, bool, int);
extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
unsigned short *);
struct gomp_coalesce_buf;
@@ -1039,10 +1041,12 @@ extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
size_t, void **, void **,
size_t *, void *, bool,
enum gomp_map_vars_kind);
+extern void gomp_acc_data_env_remove_tgt (struct target_mem_desc **,
+ struct target_mem_desc *);
extern void gomp_unmap_tgt (struct target_mem_desc *);
-extern void gomp_unmap_vars (struct target_mem_desc *, bool, bool);
+extern void gomp_unmap_vars (struct target_mem_desc *, bool);
extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
- struct goacc_asyncqueue *, bool);
+ struct goacc_asyncqueue *);
extern void gomp_init_device (struct gomp_device_descr *);
extern bool gomp_fini_device (struct gomp_device_descr *);
extern void gomp_unload_device (struct gomp_device_descr *);
@@ -373,14 +373,14 @@ goacc_async_unmap_tgt (void *ptr)
attribute_hidden void
goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
- struct goacc_asyncqueue *aq, bool finalize)
+ struct goacc_asyncqueue *aq)
{
struct gomp_device_descr *devicep = tgt->device_descr;
/* Increment reference to delay freeing of device memory until callback
has triggered. */
tgt->refcount++;
- gomp_unmap_vars_async (tgt, true, aq, finalize);
+ gomp_unmap_vars_async (tgt, true, aq);
devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
(void *) tgt);
}
@@ -391,7 +391,7 @@ acc_shutdown_1 (acc_device_t d)
{
struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt;
- gomp_unmap_vars (tgt, false, false);
+ gomp_unmap_vars (tgt, false);
}
walk->dev = NULL;
@@ -112,7 +112,7 @@ void goacc_host_init (void);
void goacc_init_asyncqueues (struct gomp_device_descr *);
bool goacc_fini_asyncqueues (struct gomp_device_descr *);
void goacc_async_copyout_unmap_vars (struct target_mem_desc *,
- struct goacc_asyncqueue *, bool);
+ struct goacc_asyncqueue *);
void goacc_async_free (struct gomp_device_descr *,
struct goacc_asyncqueue *, void *);
struct goacc_asyncqueue *get_goacc_asyncqueue (int);
@@ -52,6 +52,25 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
return key;
}
+/* Helper for lookup_dev. Iterate over splay tree. */
+
+static splay_tree_key
+lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
+{
+ splay_tree_key k = &node->key;
+ struct target_mem_desc *t = k->tgt;
+
+ if (d >= t->tgt_start && d + s <= t->tgt_end)
+ return k;
+
+ if (node->left)
+ return lookup_dev_1 (node->left, d, s);
+ if (node->right)
+ return lookup_dev_1 (node->right, d, s);
+
+ return NULL;
+}
+
/* Return block containing [D->S), or NULL if not contained.
The list isn't ordered by device address, so we have to iterate
over the whole array. This is not expected to be a common
@@ -59,35 +78,12 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
remains locked on exit. */
static splay_tree_key
-lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
+lookup_dev (splay_tree mem_map, void *d, size_t s)
{
- int i;
- struct target_mem_desc *t;
-
- if (!tgt)
- return NULL;
-
- for (t = tgt; t != NULL; t = t->prev)
- {
- if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
- break;
- }
-
- if (!t)
+ if (!mem_map || !mem_map->root)
return NULL;
- for (i = 0; i < t->list_count; i++)
- {
- void * offset;
-
- splay_tree_key k = &t->array[i].key;
- offset = d - t->tgt_start + k->tgt_offset;
-
- if (k->host_start + offset <= (void *) k->host_end)
- return k;
- }
-
- return NULL;
+ return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
}
/* OpenACC is silent on how memory exhaustion is indicated. We return
@@ -165,7 +161,7 @@ acc_free (void *d)
/* We don't have to call lazy open here, as the ptr value must have
been returned by acc_malloc. It's not permitted to pass NULL in
(unless you got that null from acc_malloc). */
- if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
+ if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
{
void *offset;
@@ -325,7 +321,7 @@ acc_hostptr (void *d)
gomp_mutex_lock (&acc_dev->lock);
- n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
+ n = lookup_dev (&acc_dev->mem_map, d, 1);
if (!n)
{
@@ -422,7 +418,7 @@ acc_map_data (void *h, void *d, size_t s)
(int)s);
}
- if (lookup_dev (thr->dev->openacc.data_environ, d, s))
+ if (lookup_dev (&thr->dev->mem_map, d, s))
{
gomp_mutex_unlock (&acc_dev->lock);
gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
@@ -436,11 +432,6 @@ acc_map_data (void *h, void *d, size_t s)
tgt->list[0].key->refcount = REFCOUNT_INFINITY;
}
- gomp_mutex_lock (&acc_dev->lock);
- tgt->prev = acc_dev->openacc.data_environ;
- acc_dev->openacc.data_environ = tgt;
- gomp_mutex_unlock (&acc_dev->lock);
-
if (profiling_setup_p)
{
thr->prof_info = NULL;
@@ -448,11 +439,83 @@ acc_map_data (void *h, void *d, size_t s)
}
}
+/* Remove the target_mem_desc holding the mapping for MAPNUM HOSTADDRS from
+ the OpenACC data environment pointed to by DATA_ENV. The device lock
+ should be held before calling, and remains locked on exit. */
+
+static void
+gomp_acc_data_env_remove (struct gomp_device_descr *acc_dev,
+ struct target_mem_desc **data_env, void **hostaddrs,
+ int mapnum)
+{
+ struct target_mem_desc *t, *tp;
+
+ for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
+ {
+ bool all_match = true;
+
+ /* We must locate the target descriptor by "value", matching each
+ hostaddr that it describes. */
+ if (t->list_count != mapnum)
+ continue;
+
+ for (int i = 0; i < t->list_count; i++)
+ if (t->list[i].key
+ && (t->list[i].key->host_start + t->list[i].offset
+ != (uintptr_t) hostaddrs[i]))
+ {
+ all_match = false;
+ break;
+ }
+
+ if (all_match)
+ {
+ if (t->refcount > 1)
+ t->refcount--;
+ else
+ {
+ if (tp)
+ tp->prev = t->prev;
+ else
+ *data_env = t->prev;
+ }
+ return;
+ }
+ }
+
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("cannot find data mapping to remove in data environment");
+}
+
+/* Similar, but removes target_mem_desc REMOVE from the DATA_ENV, in case its
+ reference count drops to zero resulting in it being unmapped (in
+ target.c:gomp_unmap_tgt). Unlike the above function it is not an error if
+ REMOVE is not present in the environment. The device lock should be held
+ before calling, and remains locked on exit. */
+
+attribute_hidden void
+gomp_acc_data_env_remove_tgt (struct target_mem_desc **data_env,
+ struct target_mem_desc *remove)
+{
+ struct target_mem_desc *t, *tp;
+
+ for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
+ if (t == remove)
+ {
+ if (tp)
+ tp->prev = t->prev;
+ else
+ *data_env = t->prev;
+ return;
+ }
+}
+
void
acc_unmap_data (void *h)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ struct splay_tree_key_s cur_node;
/* No need to call lazy open, as the address must have been mapped. */
@@ -466,12 +529,11 @@ acc_unmap_data (void *h)
= __builtin_expect (goacc_profiling_setup_p (thr, &prof_info, &api_info),
false);
- size_t host_size;
-
gomp_mutex_lock (&acc_dev->lock);
- splay_tree_key n = lookup_host (acc_dev, h, 1);
- struct target_mem_desc *t;
+ cur_node.host_start = (uintptr_t) h;
+ cur_node.host_end = cur_node.host_start + 1;
+ splay_tree_key n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
if (!n)
{
@@ -479,47 +541,28 @@ acc_unmap_data (void *h)
gomp_fatal ("%p is not a mapped block", (void *)h);
}
- host_size = n->host_end - n->host_start;
-
if (n->host_start != (uintptr_t) h)
{
+ size_t host_size = n->host_end - n->host_start;
gomp_mutex_unlock (&acc_dev->lock);
gomp_fatal ("[%p,%d] surrounds %p",
(void *) n->host_start, (int) host_size, (void *) h);
}
- /* Mark for removal. */
- n->refcount = 1;
+ splay_tree_remove (&acc_dev->mem_map, n);
- t = n->tgt;
+ struct target_mem_desc *tgt = n->tgt;
- if (t->refcount == 2)
+ if (tgt->refcount > 0)
+ tgt->refcount--;
+ else
{
- struct target_mem_desc *tp;
-
- /* This is the last reference, so pull the descriptor off the
- chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
- freeing the device memory. */
- t->tgt_end = 0;
- t->to_free = 0;
-
- for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
- tp = t, t = t->prev)
- if (n->tgt == t)
- {
- if (tp)
- tp->prev = t->prev;
- else
- acc_dev->openacc.data_environ = t->prev;
-
- break;
- }
+ free (tgt->array);
+ free (tgt);
}
gomp_mutex_unlock (&acc_dev->lock);
- gomp_unmap_vars (t, true, false);
-
if (profiling_setup_p)
{
thr->prof_info = NULL;
@@ -585,6 +628,24 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
n->refcount++;
n->dynamic_refcount++;
}
+
+ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)
+ + sizeof (tgt->list[0]));
+ tgt->refcount = 1;
+ tgt->tgt_start = 0;
+ tgt->tgt_end = 0;
+ tgt->to_free = NULL;
+ tgt->prev = acc_dev->openacc.data_environ;
+ tgt->list_count = 1;
+ tgt->device_descr = acc_dev;
+ tgt->list[0].key = n;
+ tgt->list[0].copy_from = false;
+ tgt->list[0].always_copy_from = false;
+ tgt->list[0].do_detach = false;
+ tgt->list[0].offset = (uintptr_t) h - n->host_start;
+ tgt->list[0].length = 0;
+ acc_dev->openacc.data_environ = tgt;
+
gomp_mutex_unlock (&acc_dev->lock);
}
else if (!(f & FLAG_CREATE))
@@ -609,18 +670,19 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
goacc_aq aq = get_goacc_asyncqueue (async);
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
- &kinds, true, GOMP_MAP_VARS_OPENACC);
- /* Initialize dynamic refcount. */
- tgt->list[0].key->dynamic_refcount = 1;
- tgt->list[0].key->attach_count = NULL;
+ &kinds, true,
+ GOMP_MAP_VARS_OPENACC_ENTER_DATA);
- gomp_mutex_lock (&acc_dev->lock);
+ for (int i = 0; i < tgt->list_count; i++)
+ if (tgt->list[i].key)
+ tgt->list[i].key->dynamic_refcount++;
- d = tgt->to_free;
+ gomp_mutex_lock (&acc_dev->lock);
tgt->prev = acc_dev->openacc.data_environ;
acc_dev->openacc.data_environ = tgt;
-
gomp_mutex_unlock (&acc_dev->lock);
+
+ d = tgt->to_free;
}
if (profiling_setup_p)
@@ -753,11 +815,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
n->dynamic_refcount = 0;
n->attach_count = NULL;
}
- if (n->refcount < n->dynamic_refcount)
- {
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("Dynamic reference counting assert fail\n");
- }
if (f & FLAG_FINALIZE)
{
@@ -772,21 +829,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
if (n->refcount == 0)
{
- if (n->tgt->refcount == 2)
- {
- struct target_mem_desc *tp, *t;
- for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
- tp = t, t = t->prev)
- if (n->tgt == t)
- {
- if (tp)
- tp->prev = t->prev;
- else
- acc_dev->openacc.data_environ = t->prev;
- break;
- }
- }
-
if (f & FLAG_COPYOUT)
{
goacc_aq aq = get_goacc_asyncqueue (async);
@@ -968,38 +1010,16 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
- if (acc_is_present (*hostaddrs, *sizes))
- {
- splay_tree_key n;
- gomp_mutex_lock (&acc_dev->lock);
- n = lookup_host (acc_dev, *hostaddrs, *sizes);
- gomp_mutex_unlock (&acc_dev->lock);
-
- tgt = n->tgt;
- for (size_t i = 0; i < tgt->list_count; i++)
- if (tgt->list[i].key == n)
- {
- for (size_t j = 0; j < mapnum; j++)
- if (i + j < tgt->list_count && tgt->list[i + j].key)
- {
- tgt->list[i + j].key->refcount++;
- tgt->list[i + j].key->dynamic_refcount++;
- }
- return;
- }
- /* Should not reach here. */
- gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
- }
-
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
goacc_aq aq = get_goacc_asyncqueue (async);
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
- NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
+ NULL, sizes, kinds, true,
+ GOMP_MAP_VARS_OPENACC_ENTER_DATA);
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
- /* Initialize dynamic refcount. */
- tgt->list[0].key->dynamic_refcount = 1;
- tgt->list[0].key->attach_count = NULL;
+ for (size_t i = 0; i < tgt->list_count; i++)
+ if (tgt->list[i].key)
+ tgt->list[i].key->dynamic_refcount++;
gomp_mutex_lock (&acc_dev->lock);
tgt->prev = acc_dev->openacc.data_environ;
@@ -1008,96 +1028,83 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
}
void
-gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
- int finalize, int mapnum)
+gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ int async, void *detach_from, bool finalize,
+ int mapnum)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ struct splay_tree_key_s cur_node;
splay_tree_key n;
- struct target_mem_desc *t;
- int minrefs = (mapnum == 1) ? 2 : 3;
-
- if (!acc_is_present (h, s))
- return;
gomp_mutex_lock (&acc_dev->lock);
- n = lookup_host (acc_dev, h, 1);
-
- if (!n)
+ if (detach_from)
{
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("%p is not a mapped block", (void *)h);
+ splay_tree_key n2 = lookup_host (acc_dev, detach_from, 1);
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ gomp_detach_pointer (acc_dev, aq, n2, (uintptr_t) detach_from, finalize,
+ NULL);
}
- gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
-
- t = n->tgt;
+ gomp_acc_data_env_remove (acc_dev, &acc_dev->openacc.data_environ, hostaddrs,
+ mapnum);
- if (n->refcount < n->dynamic_refcount)
+ for (int i = 0; i < mapnum; i++)
{
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("Dynamic reference counting assert fail\n");
- }
-
- if (finalize)
- {
- n->refcount -= n->dynamic_refcount;
- n->dynamic_refcount = 0;
- }
- else if (n->dynamic_refcount)
- {
- n->dynamic_refcount--;
- n->refcount--;
- }
+ int kind = kinds[i] & 0xff;
+ bool copyfrom = false;
- gomp_mutex_unlock (&acc_dev->lock);
-
- if (n->refcount == 0)
- {
- if (t->refcount == minrefs)
- {
- /* This is the last reference, so pull the descriptor off the
- chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
- freeing the device memory. */
- struct target_mem_desc *tp;
- for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
- tp = t, t = t->prev)
+ switch (kind)
+ {
+ case GOMP_MAP_FROM:
+ case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_ALWAYS_FROM:
+ copyfrom = true;
+ /* Fallthrough. */
+ case GOMP_MAP_TO_PSET:
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_DELETE:
+ case GOMP_MAP_RELEASE:
+ case GOMP_MAP_DETACH:
+ case GOMP_MAP_FORCE_DETACH:
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start
+ + ((kind == GOMP_MAP_DETACH
+ || kind == GOMP_MAP_FORCE_DETACH
+ || kind == GOMP_MAP_POINTER)
+ ? sizeof (void *) : sizes[i]);
+ n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+ if (n == NULL)
+ continue;
+ if (finalize)
{
- if (n->tgt == t)
- {
- if (tp)
- tp->prev = t->prev;
- else
- acc_dev->openacc.data_environ = t->prev;
- break;
- }
+ n->refcount -= n->dynamic_refcount;
+ n->dynamic_refcount = 0;
}
- }
-
- /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
- n->refcount = 1;
- t->refcount = minrefs;
- for (size_t i = 0; i < t->list_count; i++)
- if (t->list[i].key == n)
- {
- t->list[i].copy_from = force_copyfrom ? 1 : 0;
- break;
- }
-
- /* If running synchronously, unmap immediately. */
- if (async < acc_async_noval)
- gomp_unmap_vars (t, true, finalize);
- else
- {
- goacc_aq aq = get_goacc_asyncqueue (async);
- goacc_async_copyout_unmap_vars (t, aq, finalize);
+ else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
+ {
+ n->refcount--;
+ n->dynamic_refcount--;
+ }
+ if (copyfrom)
+ gomp_copy_dev2host (acc_dev, NULL, (void *) cur_node.host_start,
+ (void *) (n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start
+ - n->host_start),
+ cur_node.host_end - cur_node.host_start);
+ if (n->refcount == 0)
+ gomp_remove_var (acc_dev, n);
+ break;
+ default:
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("gomp_acc_remove_pointer unhandled kind 0x%.2x",
+ kind);
}
}
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
+ gomp_mutex_unlock (&acc_dev->lock);
}
@@ -58,8 +58,12 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds)
case GOMP_MAP_FORCE_TO:
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_TOFROM:
+ case GOMP_MAP_FORCE_TOFROM:
case GOMP_MAP_ALLOC:
case GOMP_MAP_RELEASE:
+ case GOMP_MAP_DECLARE_ALLOCATE:
+ case GOMP_MAP_DECLARE_DEALLOCATE:
{
unsigned char kind1 = kinds[pos + 1] & 0xff;
if (kind1 == GOMP_MAP_POINTER
@@ -392,7 +396,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
&api_info);
}
/* If running synchronously, unmap immediately. */
- gomp_unmap_vars (tgt, true, false);
+ gomp_unmap_vars (tgt, true);
if (profiling_dispatch_p)
{
prof_info.event_type = acc_ev_exit_data_end;
@@ -410,7 +414,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
else
acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs,
devaddrs, dims, tgt, aq);
- goacc_async_copyout_unmap_vars (tgt, aq, false);
+ goacc_async_copyout_unmap_vars (tgt, aq);
}
out:
@@ -647,7 +651,7 @@ GOACC_data_end (void)
gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
thr->mapped_data = tgt->prev;
- gomp_unmap_vars (tgt, true, false);
+ gomp_unmap_vars (tgt, true);
gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
if (profiling_dispatch_p)
@@ -845,18 +849,39 @@ GOACC_enter_exit_data (int device, size_t mapnum,
int elems = sizes[i];
struct splay_tree_key_s k;
splay_tree_key str;
- k.host_start = (uintptr_t) hostaddrs[i];
- k.host_end = k.host_start + 1;
+ uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
+ uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
+ + sizes[i + elems];
+ k.host_start = elems_lo;
+ k.host_end = elems_hi;
gomp_mutex_lock (&acc_dev->lock);
str = splay_tree_lookup (&acc_dev->mem_map, &k);
gomp_mutex_unlock (&acc_dev->lock);
- /* We increment the dynamic reference count for the struct
- itself by the number of struct elements that we
- mapped. */
- if (str->refcount != REFCOUNT_INFINITY)
+ if (str == NULL)
{
- str->refcount += elems;
- str->dynamic_refcount += elems;
+ size_t mapsize = elems_hi - elems_lo;
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ struct target_mem_desc *tgt;
+ unsigned short thiskind = GOMP_MAP_ALLOC;
+ int j;
+ for (j = 0; j < elems; j++)
+ if ((kinds[i + j] & 0xff) != GOMP_MAP_ALLOC)
+ {
+ thiskind = GOMP_MAP_TO;
+ break;
+ }
+ tgt = gomp_map_vars_async (acc_dev, aq, 1,
+ &hostaddrs[i + 1], NULL, &mapsize, &thiskind,
+ true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+
+ for (j = 0; j < tgt->list_count; j++)
+ if (tgt->list[j].key)
+ tgt->list[j].key->dynamic_refcount++;
+
+ gomp_mutex_lock (&acc_dev->lock);
+ tgt->prev = acc_dev->openacc.data_environ;
+ acc_dev->openacc.data_environ = tgt;
+ gomp_mutex_unlock (&acc_dev->lock);
}
i += elems;
}
@@ -962,18 +987,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
int elems = sizes[i];
struct splay_tree_key_s k;
splay_tree_key str;
- k.host_start = (uintptr_t) hostaddrs[i];
- k.host_end = k.host_start + 1;
+ uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
+ uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
+ + sizes[i + elems];
+ k.host_start = elems_lo;
+ k.host_end = elems_hi;
gomp_mutex_lock (&acc_dev->lock);
str = splay_tree_lookup (&acc_dev->mem_map, &k);
gomp_mutex_unlock (&acc_dev->lock);
- /* Decrement dynamic reference count for the struct by the
- number of elements that we are unmapping. */
- if (str->dynamic_refcount >= elems)
- {
- str->dynamic_refcount -= elems;
- str->refcount -= elems;
- }
+ if (str == NULL)
+ gomp_fatal ("[%p,%ld] is not mapped", (void *) elems_lo,
+ (unsigned long) (elems_hi - elems_lo));
i += elems;
}
break;
@@ -989,10 +1013,14 @@ GOACC_enter_exit_data (int device, size_t mapnum,
&sizes[i], &kinds[i]);
else
{
- bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
- || kind == GOMP_MAP_FROM);
- gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom,
- async, finalize, pointer);
+ unsigned short ptrkind = kinds[i + pointer - 1] & 0xff;
+ bool detach = (ptrkind == GOMP_MAP_DETACH
+ || ptrkind == GOMP_MAP_FORCE_DETACH);
+ void *detach_from = detach ? hostaddrs[i + pointer - 1]
+ : NULL;
+ gomp_acc_remove_pointer (&hostaddrs[i], &sizes[i], &kinds[i],
+ async, detach_from, finalize,
+ pointer);
/* See the above comment. */
}
i += pointer - 1;
@@ -629,7 +629,10 @@ gomp_detach_pointer (struct gomp_device_descr *devicep,
idx = (detach_from - n->host_start) / sizeof (void *);
if (!n->attach_count)
- gomp_fatal ("no attachment counters for struct");
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("no attachment counters for struct");
+ }
if (finalize)
n->attach_count[idx] = 1;
@@ -1013,7 +1016,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
continue;
}
cur_node.host_start = (uintptr_t) hostaddrs[i];
- if (!GOMP_MAP_POINTER_P (kind & typemask))
+ if (!GOMP_MAP_POINTER_P (kind & typemask)
+ && (kind & typemask) != GOMP_MAP_ATTACH)
cur_node.host_end = cur_node.host_start + sizes[i];
else
cur_node.host_end = cur_node.host_start + sizeof (void *);
@@ -1281,7 +1285,9 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
tgt->list[i].length = n->host_end - n->host_start;
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
- tgt->list[i].do_detach = true;
+ tgt->list[i].do_detach
+ = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+ n->refcount++;
}
else
{
@@ -1622,6 +1628,8 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
if (tgt->tgt_end)
gomp_free_device_memory (tgt->device_descr, tgt->to_free);
+ gomp_acc_data_env_remove_tgt (&tgt->device_descr->openacc.data_environ, tgt);
+
free (tgt->array);
free (tgt);
}
@@ -1650,17 +1658,18 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
has been done already. */
attribute_hidden void
-gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, bool finalize)
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
{
- gomp_unmap_vars_async (tgt, do_copyfrom, NULL, finalize);
+ gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
}
attribute_hidden void
gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
- struct goacc_asyncqueue *aq, bool finalize)
+ struct goacc_asyncqueue *aq)
{
struct gomp_device_descr *devicep = tgt->device_descr;
+
if (tgt->list_count == 0)
{
free (tgt);
@@ -1685,15 +1694,15 @@ gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
if (k != NULL && tgt->list[i].do_detach)
gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
- + tgt->list[i].offset, finalize,
- NULL);
+ + tgt->list[i].offset,
+ k->refcount == 1, NULL);
}
for (i = 0; i < tgt->list_count; i++)
{
splay_tree_key k = tgt->list[i].key;
- if (k == NULL || tgt->list[i].do_detach)
+ if (k == NULL)
continue;
bool do_unmap = false;
@@ -2314,7 +2323,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
GOMP_MAP_VARS_TARGET);
devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
NULL);
- gomp_unmap_vars (tgt_vars, true, false);
+ gomp_unmap_vars (tgt_vars, true);
}
/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
@@ -2458,7 +2467,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
args);
if (tgt_vars)
- gomp_unmap_vars (tgt_vars, true, false);
+ gomp_unmap_vars (tgt_vars, true);
}
/* Host fallback for GOMP_target_data{,_ext} routines. */
@@ -2527,7 +2536,7 @@ GOMP_target_end_data (void)
{
struct target_mem_desc *tgt = icv->target_data;
icv->target_data = tgt->prev;
- gomp_unmap_vars (tgt, true, false);
+ gomp_unmap_vars (tgt, true);
}
}
@@ -2762,7 +2771,7 @@ gomp_target_task_fn (void *data)
if (ttask->state == GOMP_TARGET_TASK_FINISHED)
{
if (ttask->tgt)
- gomp_unmap_vars (ttask->tgt, true, false);
+ gomp_unmap_vars (ttask->tgt, true);
return false;
}
@@ -182,13 +182,13 @@ main (int argc, char **argv)
exit (EXIT_FAILURE);
}
+ acc_delete (&h_X[0], N * sizeof (float));
+ acc_delete (&h_Y1[0], N * sizeof (float));
+
free (h_X);
free (h_Y1);
free (h_Y2);
- acc_free (d_X);
- acc_free (d_Y);
-
context_check (pctx);
s = cublasDestroy (h);
@@ -176,13 +176,13 @@ main (int argc, char **argv)
exit (EXIT_FAILURE);
}
+ acc_delete (&h_X[0], N * sizeof (float));
+ acc_delete (&h_Y1[0], N * sizeof (float));
+
free (h_X);
free (h_Y1);
free (h_Y2);
- acc_free (d_X);
- acc_free (d_Y);
-
context_check (pctx);
s = cublasDestroy (h);
new file mode 100644
@@ -0,0 +1,59 @@
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+ int a;
+ int **b;
+};
+
+int
+main ()
+{
+ int n = 100, i, j, k;
+ struct dc v = { .a = 3 };
+
+ v.b = (int **) malloc (sizeof (int *) * n);
+ for (i = 0; i < n; i++)
+ v.b[i] = (int *) malloc (sizeof (int) * n);
+
+ for (k = 0; k < 16; k++)
+ {
+#pragma acc data copy(v)
+ {
+#pragma acc data copy(v.b[:n])
+ {
+ for (i = 0; i < n; i++)
+ {
+ acc_copyin (v.b[i], sizeof (int) * n);
+ acc_attach ((void **) &v.b[i]);
+ }
+
+#pragma acc parallel loop
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ v.b[i][j] = v.a + i + j;
+
+ for (i = 0; i < n; i++)
+ {
+ acc_detach ((void **) &v.b[i]);
+ acc_copyout (v.b[i], sizeof (int) * n);
+ }
+ }
+ }
+
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ assert (v.b[i][j] == v.a + i + j);
+
+ assert (!acc_is_present (&v, sizeof (v)));
+ assert (!acc_is_present (v.b, sizeof (int *) * n));
+ for (i = 0; i < n; i++)
+ assert (!acc_is_present (v.b[i], sizeof (int) * n));
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+ int a;
+ int *b;
+};
+
+int
+main ()
+{
+ int n = 100, i, j, k;
+ struct dc v = { .a = 3 };
+
+ v.b = (int *) malloc (sizeof (int) * n);
+
+ for (k = 0; k < 16; k++)
+ {
+#pragma acc enter data copyin(v.a, v.b[0:n])
+
+#pragma acc enter data pcopyin(v.b[0:n])
+
+#pragma acc parallel loop attach(v.b)
+ for (i = 0; i < n; i++)
+ v.b[i] = v.a + i;
+
+#pragma acc exit data copyout(v.b[:n])
+#pragma acc exit data delete(v) finalize
+
+ for (i = 0; i < n; i++)
+ assert (v.b[i] == v.a + i);
+
+ assert (!acc_is_present (&v, sizeof (v)));
+ assert (!acc_is_present (v.b, sizeof (int *) * n));
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,53 @@
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+ int a;
+ int *b;
+ int *c;
+ int *d;
+};
+
+int
+main ()
+{
+ int n = 100, i, j, k;
+ struct dc v = { .a = 3 };
+
+ v.b = (int *) malloc (sizeof (int) * n);
+ v.c = (int *) malloc (sizeof (int) * n);
+ v.d = (int *) malloc (sizeof (int) * n);
+
+#pragma acc enter data copyin(v)
+
+ for (k = 0; k < 16; k++)
+ {
+#pragma acc enter data copyin(v.a, v.b[:n], v.c[:n], v.d[:n])
+
+#pragma acc parallel loop
+ for (i = 0; i < n; i++)
+ v.b[i] = v.a + i;
+
+#pragma acc exit data copyout(v.b[:n])
+#pragma acc exit data copyout(v.c[:n])
+#pragma acc exit data copyout(v.d[:n])
+
+ for (i = 0; i < n; i++)
+ assert (v.b[i] == v.a + i);
+
+ assert (acc_is_present (&v, sizeof (v)));
+ assert (!acc_is_present (v.b, sizeof (int *) * n));
+ assert (!acc_is_present (v.c, sizeof (int *) * n));
+ assert (!acc_is_present (v.d, sizeof (int *) * n));
+ }
+
+#pragma acc exit data copyout(v)
+
+ assert (!acc_is_present (&v, sizeof (v)));
+
+ return 0;
+}
@@ -92,10 +92,6 @@ program test
if (acc_is_present (c) .eqv. .TRUE.) call abort
- !$acc exit data delete (c(0:N))
-
- if (acc_is_present (c) .eqv. .TRUE.) call abort
-
do i = 1, N
if (c(i) .ne. 3.0) call abort
end do
@@ -113,11 +109,6 @@ program test
if (acc_is_present (c) .eqv. .TRUE.) call abort
if (acc_is_present (d) .eqv. .TRUE.) call abort
- !$acc exit data delete (c(0:N), d(0:N))
-
- if (acc_is_present (c) .eqv. .TRUE.) call abort
- if (acc_is_present (d) .eqv. .TRUE.) call abort
-
do i = 1, N
if (c(i) .ne. 5.0) call abort
if (d(i) .ne. 9.0) call abort
@@ -177,8 +168,8 @@ program test
!$acc exit data delete (c(0:N), d(0:N))
- !if (acc_is_present (c) .eqv. .TRUE.) call abort
- !if (acc_is_present (d) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .FALSE.) call abort
+ if (acc_is_present (d) .eqv. .FALSE.) call abort
!$acc exit data delete (c(0:N), d(0:N))
@@ -190,12 +181,7 @@ program test
if (acc_is_present (c) .eqv. .FALSE.) call abort
if (acc_is_present (d) .eqv. .TRUE.) call abort
- !$acc exit data delete (c(0:N), d(0:N))
-
- if (acc_is_present (c) .eqv. .TRUE.) call abort
- if (acc_is_present (d) .eqv. .TRUE.) call abort
-
- !$acc exit data delete (c(0:N), d(0:N))
+ !$acc exit data delete (c(0:N))
if (acc_is_present (c) .eqv. .TRUE.) call abort
if (acc_is_present (d) .eqv. .TRUE.) call abort