@@ -860,8 +860,11 @@ struct splay_tree_key_s {
uintptr_t tgt_offset;
/* Reference count. */
uintptr_t refcount;
- /* Dynamic reference count. */
- uintptr_t dynamic_refcount;
+ /* Reference counts beyond those that represent genuine references in the
+ linked splay tree key/target memory structures, e.g. for multiple OpenACC
+ "present increment" operations (via "acc enter data") refering to the same
+ host-memory block. */
+ uintptr_t virtual_refcount;
/* For a block with attached pointers, the attachment counters for each. */
unsigned short *attach_count;
/* Pointer to the original mapping of "omp declare target link" object. */
@@ -887,13 +890,6 @@ splay_compare (splay_tree_key x, splay_tree_key y)
typedef struct acc_dispatch_t
{
- /* This is a linked list of data mapped using the
- acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas.
- Unlike mapped_data in the goacc_thread struct, unmapping can
- happen out-of-order with respect to mapping. */
- /* This is guarded by the lock in the "outer" struct gomp_device_descr. */
- struct target_mem_desc *data_environ;
-
/* Execute. */
__typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
__typeof (GOMP_OFFLOAD_openacc_exec_params) *exec_params_func;
@@ -1010,9 +1006,9 @@ enum gomp_map_vars_kind
struct gomp_coalesce_buf;
-extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
-extern void gomp_acc_remove_pointer (void **, size_t *, unsigned short *,
- int, void *, bool, int);
+extern void gomp_acc_remove_pointer (struct gomp_device_descr *, void **,
+ size_t *, unsigned short *, int, bool,
+ int);
extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
unsigned short *);
struct gomp_coalesce_buf;
@@ -1041,8 +1037,6 @@ extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
size_t, void **, void **,
size_t *, void *, bool,
enum gomp_map_vars_kind);
-extern void gomp_acc_data_env_remove_tgt (struct target_mem_desc **,
- struct target_mem_desc *);
extern void gomp_unmap_tgt (struct target_mem_desc *);
extern void gomp_unmap_vars (struct target_mem_desc *, bool);
extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
@@ -385,6 +385,24 @@ goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
(void *) tgt);
}
+/* Remove a variable asynchronously. This actually removes the variable
+ mapping immediately, but retains the linked target_mem_desc until the
+ asynchronous operation has completed (as it may still refer to target
+ memory). The device lock must be held before entry, and remains locked on
+ exit. */
+
+attribute_hidden void
+goacc_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key n,
+ struct goacc_asyncqueue *aq)
+{
+ struct target_mem_desc *tgt = n->tgt;
+ assert (tgt);
+ tgt->refcount++;
+ gomp_remove_var (devicep, n);
+ devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
+ (void *) tgt);
+}
+
attribute_hidden void
goacc_async_free (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, void *ptr)
@@ -286,8 +286,6 @@ static struct gomp_device_descr host_dispatch =
.state = GOMP_DEVICE_UNINITIALIZED,
.openacc = {
- .data_environ = NULL,
-
.exec_func = host_openacc_exec,
.exec_params_func = host_openacc_exec_params,
@@ -389,9 +389,9 @@ acc_shutdown_1 (acc_device_t d)
{
while (walk->dev->mem_map.root)
{
- struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt;
-
- gomp_unmap_vars (tgt, false);
+ splay_tree_key k = &walk->dev->mem_map.root->key;
+ k->link_key = NULL;
+ gomp_remove_var (walk->dev, k);
}
walk->dev = NULL;
@@ -109,10 +109,15 @@ void goacc_restore_bind (void);
void goacc_lazy_initialize (void);
void goacc_host_init (void);
+struct splay_tree_key_s;
+
void goacc_init_asyncqueues (struct gomp_device_descr *);
bool goacc_fini_asyncqueues (struct gomp_device_descr *);
void goacc_async_copyout_unmap_vars (struct target_mem_desc *,
struct goacc_asyncqueue *);
+void goacc_remove_var_async (struct gomp_device_descr *devicep,
+ struct splay_tree_key_s *n,
+ struct goacc_asyncqueue *aq);
void goacc_async_free (struct gomp_device_descr *,
struct goacc_asyncqueue *, void *);
struct goacc_asyncqueue *get_goacc_asyncqueue (int);
@@ -439,77 +439,6 @@ acc_map_data (void *h, void *d, size_t s)
}
}
-/* Remove the target_mem_desc holding the mapping for MAPNUM HOSTADDRS from
- the OpenACC data environment pointed to by DATA_ENV. The device lock
- should be held before calling, and remains locked on exit. */
-
-static void
-gomp_acc_data_env_remove (struct gomp_device_descr *acc_dev,
- struct target_mem_desc **data_env, void **hostaddrs,
- int mapnum)
-{
- struct target_mem_desc *t, *tp;
-
- for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
- {
- bool all_match = true;
-
- /* We must locate the target descriptor by "value", matching each
- hostaddr that it describes. */
- if (t->list_count != mapnum)
- continue;
-
- for (int i = 0; i < t->list_count; i++)
- if (t->list[i].key
- && (t->list[i].key->host_start + t->list[i].offset
- != (uintptr_t) hostaddrs[i]))
- {
- all_match = false;
- break;
- }
-
- if (all_match)
- {
- if (t->refcount > 1)
- t->refcount--;
- else
- {
- if (tp)
- tp->prev = t->prev;
- else
- *data_env = t->prev;
- }
- return;
- }
- }
-
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("cannot find data mapping to remove in data environment");
-}
-
-/* Similar, but removes target_mem_desc REMOVE from the DATA_ENV, in case its
- reference count drops to zero resulting in it being unmapped (in
- target.c:gomp_unmap_tgt). Unlike the above function it is not an error if
- REMOVE is not present in the environment. The device lock should be held
- before calling, and remains locked on exit. */
-
-attribute_hidden void
-gomp_acc_data_env_remove_tgt (struct target_mem_desc **data_env,
- struct target_mem_desc *remove)
-{
- struct target_mem_desc *t, *tp;
-
- for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
- if (t == remove)
- {
- if (tp)
- tp->prev = t->prev;
- else
- *data_env = t->prev;
- return;
- }
-}
-
void
acc_unmap_data (void *h)
{
@@ -626,26 +555,9 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
if (n->refcount != REFCOUNT_INFINITY)
{
n->refcount++;
- n->dynamic_refcount++;
+ n->virtual_refcount++;
}
- struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)
- + sizeof (tgt->list[0]));
- tgt->refcount = 1;
- tgt->tgt_start = 0;
- tgt->tgt_end = 0;
- tgt->to_free = NULL;
- tgt->prev = acc_dev->openacc.data_environ;
- tgt->list_count = 1;
- tgt->device_descr = acc_dev;
- tgt->list[0].key = n;
- tgt->list[0].copy_from = false;
- tgt->list[0].always_copy_from = false;
- tgt->list[0].do_detach = false;
- tgt->list[0].offset = (uintptr_t) h - n->host_start;
- tgt->list[0].length = 0;
- acc_dev->openacc.data_environ = tgt;
-
gomp_mutex_unlock (&acc_dev->lock);
}
else if (!(f & FLAG_CREATE))
@@ -655,7 +567,6 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
}
else
{
- struct target_mem_desc *tgt;
size_t mapnum = 1;
unsigned short kinds;
void *hostaddrs = h;
@@ -669,20 +580,15 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
goacc_aq aq = get_goacc_asyncqueue (async);
- tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
- &kinds, true,
- GOMP_MAP_VARS_OPENACC_ENTER_DATA);
-
- for (int i = 0; i < tgt->list_count; i++)
- if (tgt->list[i].key)
- tgt->list[i].key->dynamic_refcount++;
+ gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, &kinds,
+ true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
gomp_mutex_lock (&acc_dev->lock);
- tgt->prev = acc_dev->openacc.data_environ;
- acc_dev->openacc.data_environ = tgt;
+ n = lookup_host (acc_dev, h, s);
+ assert (n != NULL);
+ d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h
+ - n->host_start);
gomp_mutex_unlock (&acc_dev->lock);
-
- d = tgt->to_free;
}
if (profiling_setup_p)
@@ -765,7 +671,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
{
size_t host_size;
splay_tree_key n;
- void *d;
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
@@ -797,9 +702,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
}
- d = (void *) (n->tgt->tgt_start + n->tgt_offset
- + (uintptr_t) h - n->host_start);
-
host_size = n->host_end - n->host_start;
if (n->host_start != (uintptr_t) h || host_size != s)
@@ -812,29 +714,37 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
if (n->refcount == REFCOUNT_INFINITY)
{
n->refcount = 0;
- n->dynamic_refcount = 0;
- n->attach_count = NULL;
+ n->virtual_refcount = 0;
}
if (f & FLAG_FINALIZE)
{
- n->refcount -= n->dynamic_refcount;
- n->dynamic_refcount = 0;
+ n->refcount -= n->virtual_refcount;
+ n->virtual_refcount = 0;
}
- else if (n->dynamic_refcount)
+
+ if (n->virtual_refcount > 0)
{
- n->dynamic_refcount--;
n->refcount--;
+ n->virtual_refcount--;
}
+ else if (n->refcount > 0)
+ n->refcount--;
if (n->refcount == 0)
{
+ goacc_aq aq = get_goacc_asyncqueue (async);
+
if (f & FLAG_COPYOUT)
- {
- goacc_aq aq = get_goacc_asyncqueue (async);
+ {
+ void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + (uintptr_t) h - n->host_start);
gomp_copy_dev2host (acc_dev, aq, h, d, s);
}
- gomp_remove_var (acc_dev, n);
+ if (aq)
+ goacc_remove_var_async (acc_dev, n, aq);
+ else
+ gomp_remove_var (acc_dev, n);
}
gomp_mutex_unlock (&acc_dev->lock);
@@ -1003,53 +913,15 @@ gomp_acc_declare_allocate (bool allocate, size_t mapnum, void **hostaddrs,
}
void
-gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
- void *kinds, int async)
+gomp_acc_remove_pointer (struct gomp_device_descr *acc_dev, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds, int async,
+ bool finalize, int mapnum)
{
- struct target_mem_desc *tgt;
- struct goacc_thread *thr = goacc_thread ();
- struct gomp_device_descr *acc_dev = thr->dev;
-
- gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
- goacc_aq aq = get_goacc_asyncqueue (async);
- tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
- NULL, sizes, kinds, true,
- GOMP_MAP_VARS_OPENACC_ENTER_DATA);
- gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
-
- for (size_t i = 0; i < tgt->list_count; i++)
- if (tgt->list[i].key)
- tgt->list[i].key->dynamic_refcount++;
-
- gomp_mutex_lock (&acc_dev->lock);
- tgt->prev = acc_dev->openacc.data_environ;
- acc_dev->openacc.data_environ = tgt;
- gomp_mutex_unlock (&acc_dev->lock);
-}
-
-void
-gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds,
- int async, void *detach_from, bool finalize,
- int mapnum)
-{
- struct goacc_thread *thr = goacc_thread ();
- struct gomp_device_descr *acc_dev = thr->dev;
struct splay_tree_key_s cur_node;
splay_tree_key n;
gomp_mutex_lock (&acc_dev->lock);
- if (detach_from)
- {
- splay_tree_key n2 = lookup_host (acc_dev, detach_from, 1);
- goacc_aq aq = get_goacc_asyncqueue (async);
- gomp_detach_pointer (acc_dev, aq, n2, (uintptr_t) detach_from, finalize,
- NULL);
- }
-
- gomp_acc_data_env_remove (acc_dev, &acc_dev->openacc.data_environ, hostaddrs,
- mapnum);
-
for (int i = 0; i < mapnum; i++)
{
int kind = kinds[i] & 0xff;
@@ -1062,6 +934,7 @@ gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds,
case GOMP_MAP_ALWAYS_FROM:
copyfrom = true;
/* Fallthrough. */
+
case GOMP_MAP_TO_PSET:
case GOMP_MAP_POINTER:
case GOMP_MAP_DELETE:
@@ -1075,27 +948,41 @@ gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds,
|| kind == GOMP_MAP_POINTER)
? sizeof (void *) : sizes[i]);
n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
if (n == NULL)
continue;
+
+ if (n->refcount == REFCOUNT_INFINITY)
+ {
+ n->refcount = 1;
+ n->virtual_refcount = 0;
+ }
+
if (finalize)
{
- n->refcount -= n->dynamic_refcount;
- n->dynamic_refcount = 0;
+ n->refcount -= n->virtual_refcount;
+ n->virtual_refcount = 0;
}
- else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
+
+ if (n->virtual_refcount > 0)
{
n->refcount--;
- n->dynamic_refcount--;
+ n->virtual_refcount--;
}
+ else if (n->refcount > 0)
+ n->refcount--;
+
if (copyfrom)
gomp_copy_dev2host (acc_dev, NULL, (void *) cur_node.host_start,
(void *) (n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start
- n->host_start),
cur_node.host_end - cur_node.host_start);
+
if (n->refcount == 0)
gomp_remove_var (acc_dev, n);
break;
+
default:
gomp_mutex_unlock (&acc_dev->lock);
gomp_fatal ("gomp_acc_remove_pointer unhandled kind 0x%.2x",
@@ -1103,7 +990,6 @@ gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds,
}
}
-
gomp_mutex_unlock (&acc_dev->lock);
}
@@ -69,7 +69,8 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds)
if (kind1 == GOMP_MAP_POINTER
|| kind1 == GOMP_MAP_ALWAYS_POINTER
|| kind1 == GOMP_MAP_ATTACH
- || kind1 == GOMP_MAP_DETACH)
+ || kind1 == GOMP_MAP_DETACH
+ || kind1 == GOMP_MAP_FORCE_DETACH)
return 2;
else if (kind1 == GOMP_MAP_TO_PSET)
return 3;
@@ -847,42 +848,10 @@ GOACC_enter_exit_data (int device, size_t mapnum,
case GOMP_MAP_STRUCT:
{
int elems = sizes[i];
- struct splay_tree_key_s k;
- splay_tree_key str;
- uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
- uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
- + sizes[i + elems];
- k.host_start = elems_lo;
- k.host_end = elems_hi;
- gomp_mutex_lock (&acc_dev->lock);
- str = splay_tree_lookup (&acc_dev->mem_map, &k);
- gomp_mutex_unlock (&acc_dev->lock);
- if (str == NULL)
- {
- size_t mapsize = elems_hi - elems_lo;
- goacc_aq aq = get_goacc_asyncqueue (async);
- struct target_mem_desc *tgt;
- unsigned short thiskind = GOMP_MAP_ALLOC;
- int j;
- for (j = 0; j < elems; j++)
- if ((kinds[i + j] & 0xff) != GOMP_MAP_ALLOC)
- {
- thiskind = GOMP_MAP_TO;
- break;
- }
- tgt = gomp_map_vars_async (acc_dev, aq, 1,
- &hostaddrs[i + 1], NULL, &mapsize, &thiskind,
- true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
-
- for (j = 0; j < tgt->list_count; j++)
- if (tgt->list[j].key)
- tgt->list[j].key->dynamic_refcount++;
-
- gomp_mutex_lock (&acc_dev->lock);
- tgt->prev = acc_dev->openacc.data_environ;
- acc_dev->openacc.data_environ = tgt;
- gomp_mutex_unlock (&acc_dev->lock);
- }
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ gomp_map_vars_async (acc_dev, aq, elems + 1, &hostaddrs[i],
+ NULL, &sizes[i], &kinds[i], true,
+ GOMP_MAP_VARS_OPENACC_ENTER_DATA);
i += elems;
}
break;
@@ -898,8 +867,15 @@ GOACC_enter_exit_data (int device, size_t mapnum,
gomp_acc_declare_allocate (true, pointer, &hostaddrs[i],
&sizes[i], &kinds[i]);
else
- gomp_acc_insert_pointer (pointer, &hostaddrs[i],
- &sizes[i], &kinds[i], async);
+ {
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ for (int j = 0; j < 2; j++)
+ gomp_map_vars_async (acc_dev, aq,
+ (j == 0 || pointer == 2) ? 1 : 2,
+ &hostaddrs[i + j], NULL,
+ &sizes[i + j], &kinds[i + j], true,
+ GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+ }
/* Increment 'i' by two because OpenACC requires fortran
arrays to be contiguous, so each PSET is associated with
one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
@@ -930,8 +906,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
}
else
{
- /* This loop only handles explicit "detach" clauses that are not an
- implicit part of a copy{,in,out}, etc. mapping. */
+ /* Handle "detach" before copyback/deletion of mapped data. */
for (i = 0; i < mapnum; i++)
{
unsigned char kind = kinds[i] & 0xff;
@@ -948,7 +923,16 @@ GOACC_enter_exit_data (int device, size_t mapnum,
i += sizes[i];
}
else
- i += pointer - 1;
+ {
+ unsigned char kind2 = kinds[i + pointer - 1] & 0xff;
+
+ if (kind2 == GOMP_MAP_DETACH)
+ acc_detach (hostaddrs[i + pointer - 1]);
+ else if (kind2 == GOMP_MAP_FORCE_DETACH)
+ acc_detach_finalize (hostaddrs[i + pointer - 1]);
+
+ i += pointer - 1;
+ }
}
for (i = 0; i < mapnum; ++i)
@@ -985,19 +969,39 @@ GOACC_enter_exit_data (int device, size_t mapnum,
case GOMP_MAP_STRUCT:
{
int elems = sizes[i];
- struct splay_tree_key_s k;
- splay_tree_key str;
- uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
- uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
- + sizes[i + elems];
- k.host_start = elems_lo;
- k.host_end = elems_hi;
- gomp_mutex_lock (&acc_dev->lock);
- str = splay_tree_lookup (&acc_dev->mem_map, &k);
- gomp_mutex_unlock (&acc_dev->lock);
- if (str == NULL)
- gomp_fatal ("[%p,%ld] is not mapped", (void *) elems_lo,
- (unsigned long) (elems_hi - elems_lo));
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ for (int j = 1; j <= elems; j++)
+ {
+ struct splay_tree_key_s k;
+ k.host_start = (uintptr_t) hostaddrs[i + j];
+ k.host_end = k.host_start + sizes[i + j];
+ splay_tree_key str;
+ gomp_mutex_lock (&acc_dev->lock);
+ str = splay_tree_lookup (&acc_dev->mem_map, &k);
+ gomp_mutex_unlock (&acc_dev->lock);
+ if (str)
+ {
+ if (finalize)
+ {
+ str->refcount -= str->virtual_refcount;
+ str->virtual_refcount = 0;
+ }
+ if (str->virtual_refcount > 0)
+ {
+ str->refcount--;
+ str->virtual_refcount--;
+ }
+ else if (str->refcount > 0)
+ str->refcount--;
+ if (str->refcount == 0)
+ {
+ if (aq)
+ goacc_remove_var_async (acc_dev, str, aq);
+ else
+ gomp_remove_var (acc_dev, str);
+ }
+ }
+ }
i += elems;
}
break;
@@ -1012,17 +1016,8 @@ GOACC_enter_exit_data (int device, size_t mapnum,
gomp_acc_declare_allocate (false, pointer, &hostaddrs[i],
&sizes[i], &kinds[i]);
else
- {
- unsigned short ptrkind = kinds[i + pointer - 1] & 0xff;
- bool detach = (ptrkind == GOMP_MAP_DETACH
- || ptrkind == GOMP_MAP_FORCE_DETACH);
- void *detach_from = detach ? hostaddrs[i + pointer - 1]
- : NULL;
- gomp_acc_remove_pointer (&hostaddrs[i], &sizes[i], &kinds[i],
- async, detach_from, finalize,
- pointer);
- /* See the above comment. */
- }
+ gomp_acc_remove_pointer (acc_dev, &hostaddrs[i], &sizes[i],
+ &kinds[i], async, finalize, pointer);
i += pointer - 1;
}
}
@@ -374,7 +374,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
tgt_var->key = oldn;
tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
- tgt_var->do_detach = false;
+ tgt_var->do_detach = kind == GOMP_MAP_ATTACH;
tgt_var->offset = newn->host_start - oldn->host_start;
tgt_var->length = newn->host_end - newn->host_start;
@@ -841,8 +841,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
attribute_hidden struct target_mem_desc *
gomp_map_vars_async (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, size_t mapnum,
- void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
- bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+ void **hostaddrs, void **devaddrs, size_t *sizes,
+ void *kinds, bool short_mapkind,
+ enum gomp_map_vars_kind pragma_kind)
{
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
bool has_firstprivate = false;
@@ -873,7 +874,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
tgt = gomp_malloc (sizeof (*tgt)
+ sizeof (tgt->list[0]) * (mapnum + da_data_row_num));
tgt->list_count = mapnum + da_data_row_num;
- tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
+ tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
+ || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
tgt->device_descr = devicep;
struct gomp_coalesce_buf cbuf, *cbufp = NULL;
@@ -1307,6 +1309,10 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
{
tgt->list[i].key = &array->key;
tgt->list[i].key->tgt = tgt;
+ tgt->list[i].key->refcount = REFCOUNT_INFINITY;
+ tgt->list[i].key->virtual_refcount = 0;
+ tgt->list[i].key->attach_count = NULL;
+ tgt->list[i].key->link_key = NULL;
array++;
continue;
}
@@ -1356,7 +1362,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
tgt->list[i].offset = 0;
tgt->list[i].length = k->host_end - k->host_start;
k->refcount = 1;
- k->dynamic_refcount = 0;
+ k->virtual_refcount = 0;
k->attach_count = NULL;
tgt->refcount++;
array->left = NULL;
@@ -1528,7 +1534,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
k->tgt = tgt;
k->refcount = 1;
- k->dynamic_refcount = 0;
+ k->virtual_refcount = 0;
k->attach_count = NULL;
k->link_key = NULL;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
@@ -1611,8 +1617,20 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
/* If the variable from "omp target enter data" map-list was already mapped,
tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
gomp_exit_data. */
- if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
- {
+ if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
+ || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
+ && tgt->refcount == 0)
+ {
+ /* If we're about to discard a target_mem_desc with no "structural"
+ references (tgt->refcount == 0), any splay keys linked in the tgt's
+ list must have their virtual refcount incremented to represent that
+ "lost" reference in order to implement the semantics of the OpenACC
+ "present increment" operation properly. */
+ if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
+ for (i = 0; i < tgt->list_count; i++)
+ if (tgt->list[i].key)
+ tgt->list[i].key->virtual_refcount++;
+
free (tgt);
tgt = NULL;
}
@@ -1628,8 +1646,6 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
if (tgt->tgt_end)
gomp_free_device_memory (tgt->device_descr, tgt->to_free);
- gomp_acc_data_env_remove_tgt (&tgt->device_descr->openacc.data_environ, tgt);
-
free (tgt->array);
free (tgt);
}
@@ -1641,6 +1657,8 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
splay_tree_remove (&devicep->mem_map, k);
if (k->link_key)
splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
+ if (k->attach_count)
+ free (k->attach_count);
if (k->tgt->refcount > 1)
k->tgt->refcount--;
else
@@ -1648,8 +1666,6 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
is_tgt_unmapped = true;
gomp_unmap_tgt (k->tgt);
}
- if (k->attach_count)
- free (k->attach_count);
return is_tgt_unmapped;
}
@@ -1706,7 +1722,14 @@ gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
continue;
bool do_unmap = false;
- if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+ if (k->tgt == tgt
+ && k->virtual_refcount > 0
+ && k->refcount != REFCOUNT_INFINITY)
+ {
+ k->virtual_refcount--;
+ k->refcount--;
+ }
+ else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
k->refcount--;
else if (k->refcount == 1)
{
@@ -1830,17 +1853,14 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
}
/* Insert host-target address mapping into splay tree. */
- struct target_mem_desc *tgt =
- gomp_malloc (sizeof (*tgt)
- + sizeof (tgt->list[0])
- * (num_funcs + num_vars) * sizeof (*tgt->array));
+ struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
tgt->refcount = REFCOUNT_INFINITY;
tgt->tgt_start = 0;
tgt->tgt_end = 0;
tgt->to_free = NULL;
tgt->prev = NULL;
- tgt->list_count = num_funcs + num_vars;
+ tgt->list_count = 0;
tgt->device_descr = devicep;
splay_tree_node array = tgt->array;
@@ -1852,10 +1872,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
k->tgt = tgt;
k->tgt_offset = target_table[i].start;
k->refcount = REFCOUNT_INFINITY;
+ k->virtual_refcount = 0;
k->attach_count = NULL;
k->link_key = NULL;
- tgt->list[i].key = k;
- tgt->refcount++;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
@@ -1887,10 +1906,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
k->tgt = tgt;
k->tgt_offset = target_var->start;
k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
+ k->virtual_refcount = 0;
k->attach_count = NULL;
k->link_key = NULL;
- tgt->list[i].key = k;
- tgt->refcount++;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
@@ -3604,7 +3622,6 @@ gomp_target_init (void)
current_device.type = current_device.get_type_func ();
current_device.mem_map.root = NULL;
current_device.state = GOMP_DEVICE_UNINITIALIZED;
- current_device.openacc.data_environ = NULL;
/* Augment DEVICES and NUM_DEVICES. */
devices = gomp_realloc (devices,
@@ -20,16 +20,19 @@ main ()
for (k = 0; k < 16; k++)
{
+ /* Here, we do not explicitly copy the enclosing structure, but work
+ with fields directly. Make sure attachment counters and reference
+ counters work properly in that case. */
#pragma acc enter data copyin(v.a, v.b[0:n])
-
+#pragma acc enter data pcopyin(v.b[0:n])
#pragma acc enter data pcopyin(v.b[0:n])
-#pragma acc parallel loop attach(v.b)
+#pragma acc parallel loop present(v.a, v.b)
for (i = 0; i < n; i++)
v.b[i] = v.a + i;
-#pragma acc exit data copyout(v.b[:n])
-#pragma acc exit data delete(v) finalize
+#pragma acc exit data copyout(v.b[:n]) finalize
+#pragma acc exit data delete(v.a)
for (i = 0; i < n; i++)
assert (v.b[i] == v.a + i);
@@ -35,6 +35,7 @@ main ()
#pragma acc exit data copyout(v.b[:n])
#pragma acc exit data copyout(v.c[:n])
#pragma acc exit data copyout(v.d[:n])
+#pragma acc exit data copyout(v.a)
for (i = 0; i < n; i++)
assert (v.b[i] == v.a + i);