@@ -814,9 +814,26 @@ struct target_var_desc {
uintptr_t length;
};
+/* Uncomment to enable reference-count consistency checking (for development
+ use only). */
+/*#define RC_CHECKING 1*/
+
+#ifdef RC_CHECKING
+struct target_mem_desc;
+
+struct async_tgt_use {
+ struct target_mem_desc *tgt;
+ struct async_tgt_use *next;
+};
+#endif
+
struct target_mem_desc {
/* Reference count. */
uintptr_t refcount;
+#ifdef RC_CHECKING
+ uintptr_t refcount_chk;
+ bool mark;
+#endif
/* All the splay nodes allocated together. */
splay_tree_node array;
/* Start of the target region. */
@@ -865,6 +882,10 @@ struct splay_tree_key_s {
"present increment" operations (via "acc enter data") refering to the same
host-memory block. */
uintptr_t virtual_refcount;
+#ifdef RC_CHECKING
+ /* The recalculated reference count, for verification. */
+ uintptr_t refcount_chk;
+#endif
/* For a block with attached pointers, the attachment counters for each. */
unsigned short *attach_count;
/* Pointer to the original mapping of "omp declare target link" object. */
@@ -899,7 +920,11 @@ typedef struct acc_dispatch_t
int nasyncqueue;
struct goacc_asyncqueue **asyncqueue;
struct goacc_asyncqueue_list *active;
-
+#ifdef RC_CHECKING
+ struct async_tgt_use *tgt_uses;
+ gomp_mutex_t au_lock;
+#endif
+
__typeof (GOMP_OFFLOAD_openacc_async_construct) *construct_func;
__typeof (GOMP_OFFLOAD_openacc_async_destruct) *destruct_func;
__typeof (GOMP_OFFLOAD_openacc_async_test) *test_func;
@@ -1028,6 +1053,12 @@ extern void gomp_detach_pointer (struct gomp_device_descr *,
struct goacc_asyncqueue *, splay_tree_key,
uintptr_t, bool, struct gomp_coalesce_buf *);
+#ifdef RC_CHECKING
+extern void dump_tgt (const char *, struct target_mem_desc *);
+extern void gomp_rc_check (struct gomp_device_descr *,
+ struct target_mem_desc *);
+#endif
+
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,
size_t *, void *, bool,
@@ -365,6 +365,29 @@ goacc_async_unmap_tgt (void *ptr)
{
struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+#ifdef RC_CHECKING
+ {
+ struct gomp_device_descr *devicep = tgt->device_descr;
+ struct async_tgt_use *aup, *au;
+ gomp_mutex_lock (&devicep->openacc.async.au_lock);
+ /* Remove tgt from asynchronous-use list. */
+ for (aup = NULL, au = devicep->openacc.async.tgt_uses; au;
+ aup = au, au = au->next)
+ if (au->tgt == tgt)
+ {
+ if (aup)
+ aup->next = au->next;
+ else
+ devicep->openacc.async.tgt_uses = au->next;
+ free (au);
+ break;
+ }
+ if (!au)
+ gomp_fatal ("can't find tgt %p to remove in async list", tgt);
+ gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+ }
+#endif
+
if (tgt->refcount > 1)
tgt->refcount--;
else
@@ -380,6 +403,18 @@ goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
/* Increment reference to delay freeing of device memory until callback
has triggered. */
tgt->refcount++;
+
+#ifdef RC_CHECKING
+ {
+ struct async_tgt_use *au = malloc (sizeof (struct async_tgt_use));
+ gomp_mutex_lock (&devicep->openacc.async.au_lock);
+ /* Record the asynchronous use of this target_mem_desc. */
+ au->next = devicep->openacc.async.tgt_uses;
+ au->tgt = tgt;
+ devicep->openacc.async.tgt_uses = au;
+ gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+ }
+#endif
gomp_unmap_vars_async (tgt, true, aq);
devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
(void *) tgt);
@@ -398,6 +433,17 @@ goacc_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key n,
struct target_mem_desc *tgt = n->tgt;
assert (tgt);
tgt->refcount++;
+#ifdef RC_CHECKING
+ {
+ gomp_mutex_lock (&devicep->openacc.async.au_lock);
+ struct async_tgt_use *au = malloc (sizeof (struct async_tgt_use));
+ /* Record the asynchronous use of this target_mem_desc. */
+ au->next = devicep->openacc.async.tgt_uses;
+ au->tgt = tgt;
+ devicep->openacc.async.tgt_uses = au;
+ gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+ }
+#endif
gomp_remove_var (devicep, n);
devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
(void *) tgt);
@@ -375,6 +375,15 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
&api_info);
}
+#ifdef RC_CHECKING
+ gomp_mutex_lock (&acc_dev->lock);
+ assert (tgt);
+ dump_tgt (__FUNCTION__, tgt);
+ tgt->prev = thr->mapped_data;
+ gomp_rc_check (acc_dev, tgt);
+ gomp_mutex_unlock (&acc_dev->lock);
+#endif
+
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
for (i = 0; i < mapnum; i++)
devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
@@ -418,6 +427,12 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
goacc_async_copyout_unmap_vars (tgt, aq);
}
+#ifdef RC_CHECKING
+ gomp_mutex_lock (&acc_dev->lock);
+ gomp_rc_check (acc_dev, thr->mapped_data);
+ gomp_mutex_unlock (&acc_dev->lock);
+#endif
+
out:
if (profiling_dispatch_p)
{
@@ -589,6 +604,12 @@ GOACC_data_start (int device, size_t mapnum,
thr->prof_info = NULL;
thr->api_info = NULL;
}
+
+#ifdef RC_CHECKING
+ gomp_mutex_lock (&acc_dev->lock);
+ gomp_rc_check (acc_dev, thr->mapped_data);
+ gomp_mutex_unlock (&acc_dev->lock);
+#endif
}
void
@@ -664,6 +685,12 @@ GOACC_data_end (void)
thr->prof_info = NULL;
thr->api_info = NULL;
}
+
+#ifdef RC_CHECKING
+ gomp_mutex_lock (&acc_dev->lock);
+ gomp_rc_check (acc_dev, thr->mapped_data);
+ gomp_mutex_unlock (&acc_dev->lock);
+#endif
}
void
@@ -1023,6 +1050,12 @@ GOACC_enter_exit_data (int device, size_t mapnum,
}
}
+#ifdef RC_CHECKING
+ gomp_mutex_lock (&acc_dev->lock);
+ gomp_rc_check (acc_dev, thr->mapped_data);
+ gomp_mutex_unlock (&acc_dev->lock);
+#endif
+
out:
if (profiling_dispatch_p)
{
@@ -40,6 +40,9 @@
#include <assert.h>
#include <errno.h>
#include <limits.h>
+#ifdef RC_CHECKING
+#include <stdio.h>
+#endif
#ifdef PLUGIN_SUPPORT
#include <dlfcn.h>
@@ -361,6 +364,198 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
}
}
+#ifdef RC_CHECKING
+void
+dump_tgt (const char *where, struct target_mem_desc *tgt)
+{
+ if (!getenv ("GOMP_DEBUG_TGT"))
+ return;
+
+ fprintf (stderr, "%s: %s: tgt=%p\n", __FUNCTION__, where, (void*) tgt);
+ fprintf (stderr, "refcount=%d\n", (int) tgt->refcount);
+ fprintf (stderr, "tgt_start=%p\n", (void*) tgt->tgt_start);
+ fprintf (stderr, "tgt_end=%p\n", (void*) tgt->tgt_end);
+ fprintf (stderr, "to_free=%p\n", tgt->to_free);
+ fprintf (stderr, "list_count=%d\n", (int) tgt->list_count);
+ for (int i = 0; i < tgt->list_count; i++)
+ {
+ fprintf (stderr, "list item %d:\n", i);
+ fprintf (stderr, " key: %p\n", (void*) tgt->list[i].key);
+ if (tgt->list[i].key)
+ {
+ fprintf (stderr, " key.host_start=%p\n",
+ (void*) tgt->list[i].key->host_start);
+ fprintf (stderr, " key.host_end=%p\n",
+ (void*) tgt->list[i].key->host_end);
+ fprintf (stderr, " key.tgt=%p\n", (void*) tgt->list[i].key->tgt);
+ fprintf (stderr, " key.offset=%d\n",
+ (int) tgt->list[i].key->tgt_offset);
+ fprintf (stderr, " key.refcount=%d\n",
+ (int) tgt->list[i].key->refcount);
+ fprintf (stderr, " key.virtual_refcount=%d\n",
+ (int) tgt->list[i].key->virtual_refcount);
+ fprintf (stderr, " key.attach_count=%p\n",
+ (void*) tgt->list[i].key->attach_count);
+ fprintf (stderr, " key.link_key=%p\n",
+ (void*) tgt->list[i].key->link_key);
+ }
+ }
+ fprintf (stderr, "\n");
+}
+
+static void
+rc_check_clear (splay_tree_node node)
+{
+ splay_tree_key k = &node->key;
+
+ k->refcount_chk = 0;
+ k->tgt->refcount_chk = 0;
+ k->tgt->mark = false;
+
+ if (node->left)
+ rc_check_clear (node->left);
+ if (node->right)
+ rc_check_clear (node->right);
+}
+
+static void
+rc_check_count (splay_tree_node node)
+{
+ splay_tree_key k = &node->key;
+ struct target_mem_desc *t;
+
+ /* Add virtual reference counts ("acc enter data", etc.) for this key. */
+ k->refcount_chk += k->virtual_refcount;
+
+ t = k->tgt;
+ t->refcount_chk++;
+
+ if (!t->mark)
+ {
+ for (int i = 0; i < t->list_count; i++)
+ if (t->list[i].key)
+ t->list[i].key->refcount_chk++;
+
+ t->mark = true;
+ }
+
+ if (node->left)
+ rc_check_count (node->left);
+ if (node->right)
+ rc_check_count (node->right);
+}
+
+static bool
+rc_check_verify (splay_tree_node node, bool noisy, bool errors)
+{
+ splay_tree_key k = &node->key;
+ struct target_mem_desc *t;
+
+ if (k->refcount != REFCOUNT_INFINITY)
+ {
+ if (noisy)
+ fprintf (stderr, "key %p (%p..+%d): rc=%d/%d, virt_rc=%d\n", k,
+ (void *) k->host_start, (int) (k->host_end - k->host_start),
+ (int) k->refcount, (int) k->refcount_chk,
+ (int) k->virtual_refcount);
+
+ if (k->refcount != k->refcount_chk)
+ {
+ if (noisy)
+ fprintf (stderr, " -- key refcount mismatch!\n");
+ errors = true;
+ }
+
+ t = k->tgt;
+
+ if (noisy)
+ fprintf (stderr, "tgt %p: rc=%d/%d\n", t, (int) t->refcount,
+ (int) t->refcount_chk);
+
+ if (t->refcount != t->refcount_chk)
+ {
+ if (noisy)
+ fprintf (stderr,
+ " -- target memory descriptor refcount mismatch!\n");
+ errors = true;
+ }
+ }
+
+ if (node->left)
+ errors |= rc_check_verify (node->left, noisy, errors);
+ if (node->right)
+ errors |= rc_check_verify (node->right, noisy, errors);
+
+ return errors;
+}
+
+/* Call with device locked. */
+
+attribute_hidden void
+gomp_rc_check (struct gomp_device_descr *devicep, struct target_mem_desc *tgt)
+{
+ splay_tree sp = &devicep->mem_map;
+
+ bool noisy = getenv ("GOMP_DEBUG_TGT") != 0;
+
+ if (noisy)
+ fprintf (stderr, "\n*** GOMP_RC_CHECK ***\n\n");
+
+ if (sp->root)
+ {
+ gomp_mutex_lock (&devicep->openacc.async.au_lock);
+ struct async_tgt_use *async_uses = devicep->openacc.async.tgt_uses;
+
+ rc_check_clear (sp->root);
+
+ for (struct target_mem_desc *t = tgt; t; t = t->prev)
+ {
+ t->refcount_chk = 0;
+ t->mark = false;
+ }
+ for (struct async_tgt_use *au = async_uses; au; au = au->next)
+ {
+ struct target_mem_desc *t = au->tgt;
+ t->refcount_chk = 0;
+ t->mark = false;
+ }
+
+ /* Add references for interconnected splay-tree keys. */
+ rc_check_count (sp->root);
+
+ /* Add references for the tgt for a currently-executing kernel and/or
+ any enclosing data directives. */
+ for (struct target_mem_desc *t = tgt; t; t = t->prev)
+ {
+ t->refcount_chk++;
+
+ if (!t->mark)
+ {
+ for (int i = 0; i < t->list_count; i++)
+ if (t->list[i].key)
+ t->list[i].key->refcount_chk++;
+
+ t->mark = true;
+ }
+ }
+
+ /* Add references from in-progress asynchronous operations. */
+ for (struct async_tgt_use *au = async_uses; au; au = au->next)
+ {
+ struct target_mem_desc *t = au->tgt;
+ t->refcount_chk++;
+ }
+
+ if (rc_check_verify (sp->root, noisy, false))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("refcount checking failure");
+ }
+ gomp_mutex_unlock (&devicep->openacc.async.au_lock);
+ }
+}
+#endif
+
/* Handle the case where gomp_map_lookup, splay_tree_lookup or
gomp_map_0len_lookup found oldn for newn.
Helper function of gomp_map_vars. */
@@ -3622,6 +3817,10 @@ gomp_target_init (void)
current_device.type = current_device.get_type_func ();
current_device.mem_map.root = NULL;
current_device.state = GOMP_DEVICE_UNINITIALIZED;
+#ifdef RC_CHECKING
+ current_device.openacc.async.tgt_uses = NULL;
+ gomp_mutex_init (¤t_device.openacc.async.au_lock);
+#endif
/* Augment DEVICES and NUM_DEVICES. */
devices = gomp_realloc (devices,