diff mbox series

[6/6,og8] OpenACC refcounting refresh

Message ID 32a5b94840558034f99b260bf5f02e9c5a083020.1542748807.git.julian@codesourcery.com
State New
Headers show
Series OpenACC attach/detach | expand

Commit Message

Julian Brown Nov. 20, 2018, 9:56 p.m. UTC
This patch represents a mild overhaul of reference counting for OpenACC
in libgomp.  It's been partly automatically checked (using code not yet
quite finished nor submitted upstream), but it's already more precise
than the pre-patch implementation (as demonstrated by adjustments to
previously-erroneous tests, included).

I have a few more changes planned, but those are still tbd.

	libgomp/
	* libgomp.h (gomp_device_descr): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA.
	(gomp_acc_remove_pointer): Update prototype.
	(gomp_acc_data_env_remove_tgt): Add prototype.
	(gomp_unmap_vars, gomp_map_vars_async): Update prototype.
	* oacc-int.h (goacc_async_copyout_unmap_vars): Update prototype.
	* oacc-async.c (goacc_async_copyout_unmap_vars): Remove finalize
	parameter.
	* oacc-init.c (acc_shutdown_1): Remove finalize argument to
	gomp_unmap_vars call.
	* oacc-mem.c (lookup_dev_1): New helper function.
	(lookup_dev): Rewrite in terms of above.
	(acc_free): Update calls to lookup_dev.
	(acc_map_data): Likewise.  Don't add data mapped this way to OpenACC
	data environment list.
	(gomp_acc_data_env_remove, gomp_acc_data_env_remove_tgt): New functions.
	(acc_unmap_data): Rewrite using splay tree functions directly.  Don't
	call gomp_unmap_vars.  Fix refcount handling.
	(present_create_copy): Use GOMP_MAP_VARS_OPENACC_ENTER_DATA in
	gomp_map_vars_async call.  Adjust refcount handling.
	(delete_copyout): Remove dubious handling of target_mem_desc refcount.
	(gomp_acc_insert_pointer): Use GOMP_MAP_VARS_OPENACC_ENTER_DATA in
	gomp_map_vars_async call.  Update refcount handling.
	(gomp_acc_remove_pointer): Reimplement.  Fix detach and refcount
	handling.
	* oacc-parallel.c (find_pointer): Handle more mapping types.  Update
	calls to gomp_unmap_vars and goacc_async_copyout_unmap_vars.
	(GOACC_enter_exit_data): Update refcount handling.

	libgomp/
	* target.c (gomp_detach_pointer): Unlock device on error path.
	(gomp_map_vars_async): Support GOMP_MAP_VARS_OPENACC_ENTER_DATA and
	mapping size fix GOMP_MAP_ATTACH.
	(gomp_unmap_tgt): Call gomp_acc_data_env_remove_tgt.
	(gomp_unmap_vars): Remove finalize parameter.
	(gomp_unmap_vars_async): Likewise.  Adjust detach handling.
	(GOMP_target, GOMP_target_ext, GOMP_target_end_data)
	(gomp_target_task_fn): Update calls to gomp_unmap_vars.
	* testsuite/libgomp.oacc-c-c++-common/context-2.c: Use correct API to
	unmap data.
	* testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c: New test.
	* testsuite/libgomp.oacc-fortran/data-2.f90: Fix for unmap semantics.
---
 libgomp/libgomp.h                                  |   10 +-
 libgomp/oacc-async.c                               |    4 +-
 libgomp/oacc-init.c                                |    2 +-
 libgomp/oacc-int.h                                 |    2 +-
 libgomp/oacc-mem.c                                 |  387 ++++++++++----------
 libgomp/oacc-parallel.c                            |   76 +++--
 libgomp/target.c                                   |   35 ++-
 .../libgomp.oacc-c-c++-common/context-2.c          |    6 +-
 .../libgomp.oacc-c-c++-common/context-4.c          |    6 +-
 .../libgomp.oacc-c-c++-common/deep-copy-6.c        |   59 +++
 .../libgomp.oacc-c-c++-common/deep-copy-7.c        |   42 +++
 .../libgomp.oacc-c-c++-common/deep-copy-8.c        |   53 +++
 libgomp/testsuite/libgomp.oacc-fortran/data-2.f90  |   20 +-
 13 files changed, 445 insertions(+), 257 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
diff mbox series

Patch

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 17fe0d3..568e260 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1002,6 +1002,7 @@  struct gomp_device_descr
 enum gomp_map_vars_kind
 {
   GOMP_MAP_VARS_OPENACC,
+  GOMP_MAP_VARS_OPENACC_ENTER_DATA,
   GOMP_MAP_VARS_TARGET,
   GOMP_MAP_VARS_DATA,
   GOMP_MAP_VARS_ENTER_DATA
@@ -1010,7 +1011,8 @@  enum gomp_map_vars_kind
 struct gomp_coalesce_buf;
 
 extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
-extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
+extern void gomp_acc_remove_pointer (void **, size_t *, unsigned short *,
+				     int, void *, bool, int);
 extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
 				       unsigned short *);
 struct gomp_coalesce_buf;
@@ -1039,10 +1041,12 @@  extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
 						    size_t, void **, void **,
 						    size_t *, void *, bool,
 						    enum gomp_map_vars_kind);
+extern void gomp_acc_data_env_remove_tgt (struct target_mem_desc **,
+					  struct target_mem_desc *);
 extern void gomp_unmap_tgt (struct target_mem_desc *);
-extern void gomp_unmap_vars (struct target_mem_desc *, bool, bool);
+extern void gomp_unmap_vars (struct target_mem_desc *, bool);
 extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
-				   struct goacc_asyncqueue *, bool);
+				   struct goacc_asyncqueue *);
 extern void gomp_init_device (struct gomp_device_descr *);
 extern bool gomp_fini_device (struct gomp_device_descr *);
 extern void gomp_unload_device (struct gomp_device_descr *);
diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
index 6c12c82..bb00279 100644
--- a/libgomp/oacc-async.c
+++ b/libgomp/oacc-async.c
@@ -373,14 +373,14 @@  goacc_async_unmap_tgt (void *ptr)
 
 attribute_hidden void
 goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
-				struct goacc_asyncqueue *aq, bool finalize)
+				struct goacc_asyncqueue *aq)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
   /* Increment reference to delay freeing of device memory until callback
      has triggered.  */
   tgt->refcount++;
-  gomp_unmap_vars_async (tgt, true, aq, finalize);
+  gomp_unmap_vars_async (tgt, true, aq);
   devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
 					      (void *) tgt);
 }
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index e1938c5..48c9646 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -391,7 +391,7 @@  acc_shutdown_1 (acc_device_t d)
 	    {
 	      struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt;
 
-	      gomp_unmap_vars (tgt, false, false);
+	      gomp_unmap_vars (tgt, false);
 	    }
 
 	  walk->dev = NULL;
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
index 878f0f4..1f6c62c 100644
--- a/libgomp/oacc-int.h
+++ b/libgomp/oacc-int.h
@@ -112,7 +112,7 @@  void goacc_host_init (void);
 void goacc_init_asyncqueues (struct gomp_device_descr *);
 bool goacc_fini_asyncqueues (struct gomp_device_descr *);
 void goacc_async_copyout_unmap_vars (struct target_mem_desc *,
-				     struct goacc_asyncqueue *, bool);
+				     struct goacc_asyncqueue *);
 void goacc_async_free (struct gomp_device_descr *,
 		       struct goacc_asyncqueue *, void *);
 struct goacc_asyncqueue *get_goacc_asyncqueue (int);
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 76ba914..3202f06 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -52,6 +52,25 @@  lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
   return key;
 }
 
+/* Helper for lookup_dev.  Iterate over splay tree.  */
+
+static splay_tree_key
+lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
+{
+  splay_tree_key k = &node->key;
+  struct target_mem_desc *t = k->tgt;
+
+  if (d >= t->tgt_start && d + s <= t->tgt_end)
+    return k;
+
+  if (node->left)
+    return lookup_dev_1 (node->left, d, s);
+  if (node->right)
+    return lookup_dev_1 (node->right, d, s);
+
+  return NULL;
+}
+
 /* Return block containing [D->S), or NULL if not contained.
    The list isn't ordered by device address, so we have to iterate
    over the whole array.  This is not expected to be a common
@@ -59,35 +78,12 @@  lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
    remains locked on exit.  */
 
 static splay_tree_key
-lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
+lookup_dev (splay_tree mem_map, void *d, size_t s)
 {
-  int i;
-  struct target_mem_desc *t;
-
-  if (!tgt)
-    return NULL;
-
-  for (t = tgt; t != NULL; t = t->prev)
-    {
-      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
-        break;
-    }
-
-  if (!t)
+  if (!mem_map || !mem_map->root)
     return NULL;
 
-  for (i = 0; i < t->list_count; i++)
-    {
-      void * offset;
-
-      splay_tree_key k = &t->array[i].key;
-      offset = d - t->tgt_start + k->tgt_offset;
-
-      if (k->host_start + offset <= (void *) k->host_end)
-        return k;
-    }
-
-  return NULL;
+  return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
 }
 
 /* OpenACC is silent on how memory exhaustion is indicated.  We return
@@ -165,7 +161,7 @@  acc_free (void *d)
   /* We don't have to call lazy open here, as the ptr value must have
      been returned by acc_malloc.  It's not permitted to pass NULL in
      (unless you got that null from acc_malloc).  */
-  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
+  if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
     {
       void *offset;
 
@@ -325,7 +321,7 @@  acc_hostptr (void *d)
 
   gomp_mutex_lock (&acc_dev->lock);
 
-  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
+  n = lookup_dev (&acc_dev->mem_map, d, 1);
 
   if (!n)
     {
@@ -422,7 +418,7 @@  acc_map_data (void *h, void *d, size_t s)
 		      (int)s);
 	}
 
-      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
+      if (lookup_dev (&thr->dev->mem_map, d, s))
         {
 	  gomp_mutex_unlock (&acc_dev->lock);
 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
@@ -436,11 +432,6 @@  acc_map_data (void *h, void *d, size_t s)
       tgt->list[0].key->refcount = REFCOUNT_INFINITY;
     }
 
-  gomp_mutex_lock (&acc_dev->lock);
-  tgt->prev = acc_dev->openacc.data_environ;
-  acc_dev->openacc.data_environ = tgt;
-  gomp_mutex_unlock (&acc_dev->lock);
-
   if (profiling_setup_p)
     {
       thr->prof_info = NULL;
@@ -448,11 +439,83 @@  acc_map_data (void *h, void *d, size_t s)
     }
 }
 
+/* Remove the target_mem_desc holding the mapping for MAPNUM HOSTADDRS from
+   the OpenACC data environment pointed to by DATA_ENV.  The device lock
+   should be held before calling, and remains locked on exit.  */
+
+static void
+gomp_acc_data_env_remove (struct gomp_device_descr *acc_dev,
+			  struct target_mem_desc **data_env, void **hostaddrs,
+			  int mapnum)
+{
+  struct target_mem_desc *t, *tp;
+
+  for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
+    {
+      bool all_match = true;
+
+      /* We must locate the target descriptor by "value", matching each
+	 hostaddr that it describes.  */
+      if (t->list_count != mapnum)
+        continue;
+
+      for (int i = 0; i < t->list_count; i++)
+	if (t->list[i].key
+	    && (t->list[i].key->host_start + t->list[i].offset
+		!= (uintptr_t) hostaddrs[i]))
+	  {
+	    all_match = false;
+	    break;
+	  }
+
+      if (all_match)
+	{
+	  if (t->refcount > 1)
+	    t->refcount--;
+	  else
+	    {
+	      if (tp)
+		tp->prev = t->prev;
+	      else
+		*data_env = t->prev;
+	    }
+	  return;
+	}
+    }
+
+  gomp_mutex_unlock (&acc_dev->lock);
+  gomp_fatal ("cannot find data mapping to remove in data environment");
+}
+
+/* Similar, but removes target_mem_desc REMOVE from the DATA_ENV, in case its
+   reference count drops to zero resulting in it being unmapped (in
+   target.c:gomp_unmap_tgt).  Unlike the above function it is not an error if
+   REMOVE is not present in the environment.  The device lock should be held
+   before calling, and remains locked on exit.  */
+
+attribute_hidden void
+gomp_acc_data_env_remove_tgt (struct target_mem_desc **data_env,
+			      struct target_mem_desc *remove)
+{
+  struct target_mem_desc *t, *tp;
+
+  for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
+    if (t == remove)
+      {
+	if (tp)
+	  tp->prev = t->prev;
+	else
+	  *data_env = t->prev;
+	return;
+      }
+}
+
 void
 acc_unmap_data (void *h)
 {
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
 
   /* No need to call lazy open, as the address must have been mapped.  */
 
@@ -466,12 +529,11 @@  acc_unmap_data (void *h)
     = __builtin_expect (goacc_profiling_setup_p (thr, &prof_info, &api_info),
 			false);
 
-  size_t host_size;
-
   gomp_mutex_lock (&acc_dev->lock);
 
-  splay_tree_key n = lookup_host (acc_dev, h, 1);
-  struct target_mem_desc *t;
+  cur_node.host_start = (uintptr_t) h;
+  cur_node.host_end = cur_node.host_start + 1;
+  splay_tree_key n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
 
   if (!n)
     {
@@ -479,47 +541,28 @@  acc_unmap_data (void *h)
       gomp_fatal ("%p is not a mapped block", (void *)h);
     }
 
-  host_size = n->host_end - n->host_start;
-
   if (n->host_start != (uintptr_t) h)
     {
+      size_t host_size = n->host_end - n->host_start;
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("[%p,%d] surrounds %p",
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
 
-  /* Mark for removal.  */
-  n->refcount = 1;
+  splay_tree_remove (&acc_dev->mem_map, n);
 
-  t = n->tgt;
+  struct target_mem_desc *tgt = n->tgt;
 
-  if (t->refcount == 2)
+  if (tgt->refcount > 0)
+    tgt->refcount--;
+  else
     {
-      struct target_mem_desc *tp;
-
-      /* This is the last reference, so pull the descriptor off the
-         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
-         freeing the device memory. */
-      t->tgt_end = 0;
-      t->to_free = 0;
-
-      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	   tp = t, t = t->prev)
-	if (n->tgt == t)
-	  {
-	    if (tp)
-	      tp->prev = t->prev;
-	    else
-	      acc_dev->openacc.data_environ = t->prev;
-
-	    break;
-	  }
+      free (tgt->array);
+      free (tgt);
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
 
-  gomp_unmap_vars (t, true, false);
-
   if (profiling_setup_p)
     {
       thr->prof_info = NULL;
@@ -585,6 +628,24 @@  present_create_copy (unsigned f, void *h, size_t s, int async)
 	  n->refcount++;
 	  n->dynamic_refcount++;
 	}
+
+      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)
+						 + sizeof (tgt->list[0]));
+      tgt->refcount = 1;
+      tgt->tgt_start = 0;
+      tgt->tgt_end = 0;
+      tgt->to_free = NULL;
+      tgt->prev = acc_dev->openacc.data_environ;
+      tgt->list_count = 1;
+      tgt->device_descr = acc_dev;
+      tgt->list[0].key = n;
+      tgt->list[0].copy_from = false;
+      tgt->list[0].always_copy_from = false;
+      tgt->list[0].do_detach = false;
+      tgt->list[0].offset = (uintptr_t) h - n->host_start;
+      tgt->list[0].length = 0;
+      acc_dev->openacc.data_environ = tgt;
+
       gomp_mutex_unlock (&acc_dev->lock);
     }
   else if (!(f & FLAG_CREATE))
@@ -609,18 +670,19 @@  present_create_copy (unsigned f, void *h, size_t s, int async)
       goacc_aq aq = get_goacc_asyncqueue (async);
 
       tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
-				 &kinds, true, GOMP_MAP_VARS_OPENACC);
-      /* Initialize dynamic refcount.  */
-      tgt->list[0].key->dynamic_refcount = 1;
-      tgt->list[0].key->attach_count = NULL;
+				 &kinds, true,
+				 GOMP_MAP_VARS_OPENACC_ENTER_DATA);
 
-      gomp_mutex_lock (&acc_dev->lock);
+      for (int i = 0; i < tgt->list_count; i++)
+        if (tgt->list[i].key)
+	  tgt->list[i].key->dynamic_refcount++;
 
-      d = tgt->to_free;
+      gomp_mutex_lock (&acc_dev->lock);
       tgt->prev = acc_dev->openacc.data_environ;
       acc_dev->openacc.data_environ = tgt;
-
       gomp_mutex_unlock (&acc_dev->lock);
+
+      d = tgt->to_free;
     }
 
   if (profiling_setup_p)
@@ -753,11 +815,6 @@  delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
       n->dynamic_refcount = 0;
       n->attach_count = NULL;
     }
-  if (n->refcount < n->dynamic_refcount)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("Dynamic reference counting assert fail\n");
-    }
 
   if (f & FLAG_FINALIZE)
     {
@@ -772,21 +829,6 @@  delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 
   if (n->refcount == 0)
     {
-      if (n->tgt->refcount == 2)
-	{
-	  struct target_mem_desc *tp, *t;
-	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	       tp = t, t = t->prev)
-	    if (n->tgt == t)
-	      {
-		if (tp)
-		  tp->prev = t->prev;
-		else
-		  acc_dev->openacc.data_environ = t->prev;
-		break;
-	      }
-	}
-
       if (f & FLAG_COPYOUT)
 	{
 	  goacc_aq aq = get_goacc_asyncqueue (async);
@@ -968,38 +1010,16 @@  gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
-  if (acc_is_present (*hostaddrs, *sizes))
-    {
-      splay_tree_key n;
-      gomp_mutex_lock (&acc_dev->lock);
-      n = lookup_host (acc_dev, *hostaddrs, *sizes);
-      gomp_mutex_unlock (&acc_dev->lock);
-
-      tgt = n->tgt;
-      for (size_t i = 0; i < tgt->list_count; i++)
-	if (tgt->list[i].key == n)
-	  {
-	    for (size_t j = 0; j < mapnum; j++)
-	      if (i + j < tgt->list_count && tgt->list[i + j].key)
-		{
-		  tgt->list[i + j].key->refcount++;
-		  tgt->list[i + j].key->dynamic_refcount++;
-		}
-	    return;
-	  }
-      /* Should not reach here.  */
-      gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
-    }
-
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
   goacc_aq aq = get_goacc_asyncqueue (async);
   tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
-			     NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
+			     NULL, sizes, kinds, true,
+			     GOMP_MAP_VARS_OPENACC_ENTER_DATA);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
 
-  /* Initialize dynamic refcount.  */
-  tgt->list[0].key->dynamic_refcount = 1;
-  tgt->list[0].key->attach_count = NULL;
+  for (size_t i = 0; i < tgt->list_count; i++)
+    if (tgt->list[i].key)
+      tgt->list[i].key->dynamic_refcount++;
 
   gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
@@ -1008,96 +1028,83 @@  gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
 }
 
 void
-gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
-			 int finalize, int mapnum)
+gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds,
+			 int async, void *detach_from, bool finalize,
+			 int mapnum)
 {
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
   splay_tree_key n;
-  struct target_mem_desc *t;
-  int minrefs = (mapnum == 1) ? 2 : 3;
-
-  if (!acc_is_present (h, s))
-    return;
 
   gomp_mutex_lock (&acc_dev->lock);
 
-  n = lookup_host (acc_dev, h, 1);
-
-  if (!n)
+  if (detach_from)
     {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("%p is not a mapped block", (void *)h);
+      splay_tree_key n2 = lookup_host (acc_dev, detach_from, 1);
+      goacc_aq aq = get_goacc_asyncqueue (async);
+      gomp_detach_pointer (acc_dev, aq, n2, (uintptr_t) detach_from, finalize,
+			   NULL);
     }
 
-  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
-
-  t = n->tgt;
+  gomp_acc_data_env_remove (acc_dev, &acc_dev->openacc.data_environ, hostaddrs,
+			    mapnum);
 
-  if (n->refcount < n->dynamic_refcount)
+  for (int i = 0; i < mapnum; i++)
     {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("Dynamic reference counting assert fail\n");
-    }
-
-  if (finalize)
-    {
-      n->refcount -= n->dynamic_refcount;
-      n->dynamic_refcount = 0;
-    }
-  else if (n->dynamic_refcount)
-    {
-      n->dynamic_refcount--;
-      n->refcount--;
-    }
+      int kind = kinds[i] & 0xff;
+      bool copyfrom = false;
 
-  gomp_mutex_unlock (&acc_dev->lock);
-
-  if (n->refcount == 0)
-    {
-      if (t->refcount == minrefs)
-	{
-	  /* This is the last reference, so pull the descriptor off the
-	     chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
-	     freeing the device memory. */
-	  struct target_mem_desc *tp;
-	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	       tp = t, t = t->prev)
+      switch (kind)
+        {
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_FORCE_FROM:
+	case GOMP_MAP_ALWAYS_FROM:
+	  copyfrom = true;
+	  /* Fallthrough.  */
+	case GOMP_MAP_TO_PSET:
+	case GOMP_MAP_POINTER:
+	case GOMP_MAP_DELETE:
+	case GOMP_MAP_RELEASE:
+	case GOMP_MAP_DETACH:
+	case GOMP_MAP_FORCE_DETACH:
+	  cur_node.host_start = (uintptr_t) hostaddrs[i];
+	  cur_node.host_end = cur_node.host_start
+			      + ((kind == GOMP_MAP_DETACH
+				  || kind == GOMP_MAP_FORCE_DETACH
+				  || kind == GOMP_MAP_POINTER)
+				 ? sizeof (void *) : sizes[i]);
+	  n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+	  if (n == NULL)
+	    continue;
+	  if (finalize)
 	    {
-	      if (n->tgt == t)
-		{
-		  if (tp)
-		    tp->prev = t->prev;
-		  else
-		    acc_dev->openacc.data_environ = t->prev;
-		  break;
-		}
+	      n->refcount -= n->dynamic_refcount;
+	      n->dynamic_refcount = 0;
 	    }
-	}
-
-      /* Set refcount to 1 to allow gomp_unmap_vars to unmap it.  */
-      n->refcount = 1;
-      t->refcount = minrefs;
-      for (size_t i = 0; i < t->list_count; i++)
-	if (t->list[i].key == n)
-	  {
-	    t->list[i].copy_from = force_copyfrom ? 1 : 0;
-	    break;
-	  }
-
-      /* If running synchronously, unmap immediately.  */
-      if (async < acc_async_noval)
-	gomp_unmap_vars (t, true, finalize);
-      else
-	{
-	  goacc_aq aq = get_goacc_asyncqueue (async);        
-	  goacc_async_copyout_unmap_vars (t, aq, finalize);
+	  else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
+	    {
+	      n->refcount--;
+	      n->dynamic_refcount--;
+	    }
+	  if (copyfrom)
+	    gomp_copy_dev2host (acc_dev, NULL, (void *) cur_node.host_start,
+				(void *) (n->tgt->tgt_start + n->tgt_offset
+					  + cur_node.host_start
+					  - n->host_start),
+				cur_node.host_end - cur_node.host_start);
+	  if (n->refcount == 0)
+	    gomp_remove_var (acc_dev, n);
+	  break;
+	default:
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("gomp_acc_remove_pointer unhandled kind 0x%.2x",
+		      kind);
 	}
     }
 
-  gomp_mutex_unlock (&acc_dev->lock);
 
-  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
+  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index f6c9114..8a3c65b 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -58,8 +58,12 @@  find_pointer (int pos, size_t mapnum, unsigned short *kinds)
     case GOMP_MAP_FORCE_TO:
     case GOMP_MAP_FROM:
     case GOMP_MAP_FORCE_FROM:
+    case GOMP_MAP_TOFROM:
+    case GOMP_MAP_FORCE_TOFROM:
     case GOMP_MAP_ALLOC:
     case GOMP_MAP_RELEASE:
+    case GOMP_MAP_DECLARE_ALLOCATE:
+    case GOMP_MAP_DECLARE_DEALLOCATE:
       {
 	unsigned char kind1 = kinds[pos + 1] & 0xff;
 	if (kind1 == GOMP_MAP_POINTER
@@ -392,7 +396,7 @@  GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
 				    &api_info);
 	}
       /* If running synchronously, unmap immediately.  */
-      gomp_unmap_vars (tgt, true, false);
+      gomp_unmap_vars (tgt, true);
       if (profiling_dispatch_p)
 	{
 	  prof_info.event_type = acc_ev_exit_data_end;
@@ -410,7 +414,7 @@  GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
       else
 	acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs,
 					  devaddrs, dims, tgt, aq);
-      goacc_async_copyout_unmap_vars (tgt, aq, false);
+      goacc_async_copyout_unmap_vars (tgt, aq);
     }
 
  out:
@@ -647,7 +651,7 @@  GOACC_data_end (void)
 
   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
   thr->mapped_data = tgt->prev;
-  gomp_unmap_vars (tgt, true, false);
+  gomp_unmap_vars (tgt, true);
   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
 
   if (profiling_dispatch_p)
@@ -845,18 +849,39 @@  GOACC_enter_exit_data (int device, size_t mapnum,
 		    int elems = sizes[i];
 		    struct splay_tree_key_s k;
 		    splay_tree_key str;
-		    k.host_start = (uintptr_t) hostaddrs[i];
-		    k.host_end = k.host_start + 1;
+		    uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
+		    uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
+					 + sizes[i + elems];
+		    k.host_start = elems_lo;
+		    k.host_end = elems_hi;
 		    gomp_mutex_lock (&acc_dev->lock);
 		    str = splay_tree_lookup (&acc_dev->mem_map, &k);
 		    gomp_mutex_unlock (&acc_dev->lock);
-		    /* We increment the dynamic reference count for the struct
-		       itself by the number of struct elements that we
-		       mapped.  */
-		    if (str->refcount != REFCOUNT_INFINITY)
+		    if (str == NULL)
 		      {
-		        str->refcount += elems;
-			str->dynamic_refcount += elems;
+		        size_t mapsize = elems_hi - elems_lo;
+			goacc_aq aq = get_goacc_asyncqueue (async);
+			struct target_mem_desc *tgt;
+			unsigned short thiskind = GOMP_MAP_ALLOC;
+			int j;
+			for (j = 0; j < elems; j++)
+			  if ((kinds[i + j] & 0xff) != GOMP_MAP_ALLOC)
+			    {
+			      thiskind = GOMP_MAP_TO;
+			      break;
+			    }
+			tgt = gomp_map_vars_async (acc_dev, aq, 1,
+				&hostaddrs[i + 1], NULL, &mapsize, &thiskind,
+				true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+
+			for (j = 0; j < tgt->list_count; j++)
+			  if (tgt->list[j].key)
+			    tgt->list[j].key->dynamic_refcount++;
+
+			gomp_mutex_lock (&acc_dev->lock);
+			tgt->prev = acc_dev->openacc.data_environ;
+			acc_dev->openacc.data_environ = tgt;
+			gomp_mutex_unlock (&acc_dev->lock);
 		      }
 		    i += elems;
 		  }
@@ -962,18 +987,17 @@  GOACC_enter_exit_data (int device, size_t mapnum,
 		  int elems = sizes[i];
 		  struct splay_tree_key_s k;
 		  splay_tree_key str;
-		  k.host_start = (uintptr_t) hostaddrs[i];
-		  k.host_end = k.host_start + 1;
+		  uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
+		  uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
+				       + sizes[i + elems];
+		  k.host_start = elems_lo;
+		  k.host_end = elems_hi;
 		  gomp_mutex_lock (&acc_dev->lock);
 		  str = splay_tree_lookup (&acc_dev->mem_map, &k);
 		  gomp_mutex_unlock (&acc_dev->lock);
-		  /* Decrement dynamic reference count for the struct by the
-		     number of elements that we are unmapping.  */
-		  if (str->dynamic_refcount >= elems)
-		    {
-		      str->dynamic_refcount -= elems;
-		      str->refcount -= elems;
-		    }
+		  if (str == NULL)
+		    gomp_fatal ("[%p,%ld] is not mapped", (void *) elems_lo,
+				(unsigned long) (elems_hi - elems_lo));
 		  i += elems;
 		}
 		break;
@@ -989,10 +1013,14 @@  GOACC_enter_exit_data (int device, size_t mapnum,
 					   &sizes[i], &kinds[i]);
 	      else
 		{
-		  bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
-				   || kind == GOMP_MAP_FROM);
-		  gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom,
-					   async, finalize, pointer);
+		  unsigned short ptrkind = kinds[i + pointer - 1] & 0xff;
+		  bool detach = (ptrkind == GOMP_MAP_DETACH
+				 || ptrkind == GOMP_MAP_FORCE_DETACH);
+		  void *detach_from = detach ? hostaddrs[i + pointer - 1]
+					     : NULL;
+		  gomp_acc_remove_pointer (&hostaddrs[i], &sizes[i], &kinds[i],
+					   async, detach_from, finalize,
+					   pointer);
 		  /* See the above comment.  */
 		}
 	      i += pointer - 1;
diff --git a/libgomp/target.c b/libgomp/target.c
index da51291..bb5e1e9 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -629,7 +629,10 @@  gomp_detach_pointer (struct gomp_device_descr *devicep,
   idx = (detach_from - n->host_start) / sizeof (void *);
 
   if (!n->attach_count)
-    gomp_fatal ("no attachment counters for struct");
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("no attachment counters for struct");
+    }
 
   if (finalize)
     n->attach_count[idx] = 1;
@@ -1013,7 +1016,8 @@  gomp_map_vars_async (struct gomp_device_descr *devicep,
 	  continue;
 	}
       cur_node.host_start = (uintptr_t) hostaddrs[i];
-      if (!GOMP_MAP_POINTER_P (kind & typemask))
+      if (!GOMP_MAP_POINTER_P (kind & typemask)
+          && (kind & typemask) != GOMP_MAP_ATTACH)
 	cur_node.host_end = cur_node.host_start + sizes[i];
       else
 	cur_node.host_end = cur_node.host_start + sizeof (void *);
@@ -1281,7 +1285,9 @@  gomp_map_vars_async (struct gomp_device_descr *devicep,
 		      tgt->list[i].length = n->host_end - n->host_start;
 		      tgt->list[i].copy_from = false;
 		      tgt->list[i].always_copy_from = false;
-		      tgt->list[i].do_detach = true;
+		      tgt->list[i].do_detach
+		        = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+		      n->refcount++;
 		    }
 		  else
 		    {
@@ -1622,6 +1628,8 @@  gomp_unmap_tgt (struct target_mem_desc *tgt)
   if (tgt->tgt_end)
     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
 
+  gomp_acc_data_env_remove_tgt (&tgt->device_descr->openacc.data_environ, tgt);
+
   free (tgt->array);
   free (tgt);
 }
@@ -1650,17 +1658,18 @@  gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
    has been done already.  */
 
 attribute_hidden void
-gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, bool finalize)
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 {
-  gomp_unmap_vars_async (tgt, do_copyfrom, NULL, finalize);
+  gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
 }
 
 attribute_hidden void
 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
-		       struct goacc_asyncqueue *aq, bool finalize)
+		       struct goacc_asyncqueue *aq)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
+
   if (tgt->list_count == 0)
     {
       free (tgt);
@@ -1685,15 +1694,15 @@  gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
 
       if (k != NULL && tgt->list[i].do_detach)
 	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
-					     + tgt->list[i].offset, finalize,
-			     NULL);
+					     + tgt->list[i].offset,
+			     k->refcount == 1, NULL);
     }
 
   for (i = 0; i < tgt->list_count; i++)
     {
       splay_tree_key k = tgt->list[i].key;
 
-      if (k == NULL || tgt->list[i].do_detach)
+      if (k == NULL)
 	continue;
 
       bool do_unmap = false;
@@ -2314,7 +2323,7 @@  GOMP_target (int device, void (*fn) (void *), const void *unused,
 		     GOMP_MAP_VARS_TARGET);
   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
 		     NULL);
-  gomp_unmap_vars (tgt_vars, true, false);
+  gomp_unmap_vars (tgt_vars, true);
 }
 
 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
@@ -2458,7 +2467,7 @@  GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
 		     args);
   if (tgt_vars)
-    gomp_unmap_vars (tgt_vars, true, false);
+    gomp_unmap_vars (tgt_vars, true);
 }
 
 /* Host fallback for GOMP_target_data{,_ext} routines.  */
@@ -2527,7 +2536,7 @@  GOMP_target_end_data (void)
     {
       struct target_mem_desc *tgt = icv->target_data;
       icv->target_data = tgt->prev;
-      gomp_unmap_vars (tgt, true, false);
+      gomp_unmap_vars (tgt, true);
     }
 }
 
@@ -2762,7 +2771,7 @@  gomp_target_task_fn (void *data)
       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
 	{
 	  if (ttask->tgt)
-	    gomp_unmap_vars (ttask->tgt, true, false);
+	    gomp_unmap_vars (ttask->tgt, true);
 	  return false;
 	}
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
index 6a52f74..6bdcfe7 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
@@ -182,13 +182,13 @@  main (int argc, char **argv)
         exit (EXIT_FAILURE);
     }
 
+    acc_delete (&h_X[0], N * sizeof (float));
+    acc_delete (&h_Y1[0], N * sizeof (float));
+
     free (h_X);
     free (h_Y1);
     free (h_Y2);
 
-    acc_free (d_X);
-    acc_free (d_Y);
-
     context_check (pctx);
 
     s = cublasDestroy (h);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
index 71365e8..b403a5c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
@@ -176,13 +176,13 @@  main (int argc, char **argv)
         exit (EXIT_FAILURE);
     }
 
+    acc_delete (&h_X[0], N * sizeof (float));
+    acc_delete (&h_Y1[0], N * sizeof (float));
+
     free (h_X);
     free (h_Y1);
     free (h_Y2);
 
-    acc_free (d_X);
-    acc_free (d_Y);
-
     context_check (pctx);
 
     s = cublasDestroy (h);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
new file mode 100644
index 0000000..81c1c5e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
@@ -0,0 +1,59 @@ 
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+  int a;
+  int **b;
+};
+
+int
+main ()
+{
+  int n = 100, i, j, k;
+  struct dc v = { .a = 3 };
+
+  v.b = (int **) malloc (sizeof (int *) * n);
+  for (i = 0; i < n; i++)
+    v.b[i] = (int *) malloc (sizeof (int) * n);
+
+  for (k = 0; k < 16; k++)
+    {
+#pragma acc data copy(v)
+      {
+#pragma acc data copy(v.b[:n])
+	{
+	  for (i = 0; i < n; i++)
+	    {
+	      acc_copyin (v.b[i], sizeof (int) * n);
+	      acc_attach ((void **) &v.b[i]);
+	    }
+
+#pragma acc parallel loop
+	  for (i = 0; i < n; i++)
+	    for (j = 0; j < n; j++)
+	      v.b[i][j] = v.a + i + j;
+
+	  for (i = 0; i < n; i++)
+	    {
+	      acc_detach ((void **) &v.b[i]);
+	      acc_copyout (v.b[i], sizeof (int) * n);
+	    }
+	}
+      }
+
+      for (i = 0; i < n; i++)
+	for (j = 0; j < n; j++)
+	  assert (v.b[i][j] == v.a + i + j);
+
+      assert (!acc_is_present (&v, sizeof (v)));
+      assert (!acc_is_present (v.b, sizeof (int *) * n));
+      for (i = 0; i < n; i++)
+        assert (!acc_is_present (v.b[i], sizeof (int) * n));
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
new file mode 100644
index 0000000..3a970a0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+  int a;
+  int *b;
+};
+
+int
+main ()
+{
+  int n = 100, i, j, k;
+  struct dc v = { .a = 3 };
+
+  v.b = (int *) malloc (sizeof (int) * n);
+
+  for (k = 0; k < 16; k++)
+    {
+#pragma acc enter data copyin(v.a, v.b[0:n])
+
+#pragma acc enter data pcopyin(v.b[0:n])
+
+#pragma acc parallel loop attach(v.b)
+      for (i = 0; i < n; i++)
+	v.b[i] = v.a + i;
+
+#pragma acc exit data copyout(v.b[:n])
+#pragma acc exit data delete(v) finalize
+
+      for (i = 0; i < n; i++)
+	assert (v.b[i] == v.a + i);
+
+      assert (!acc_is_present (&v, sizeof (v)));
+      assert (!acc_is_present (v.b, sizeof (int *) * n));
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
new file mode 100644
index 0000000..54f553b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+  int a;
+  int *b;
+  int *c;
+  int *d;
+};
+
+int
+main ()
+{
+  int n = 100, i, j, k;
+  struct dc v = { .a = 3 };
+
+  v.b = (int *) malloc (sizeof (int) * n);
+  v.c = (int *) malloc (sizeof (int) * n);
+  v.d = (int *) malloc (sizeof (int) * n);
+
+#pragma acc enter data copyin(v)
+
+  for (k = 0; k < 16; k++)
+    {
+#pragma acc enter data copyin(v.a, v.b[:n], v.c[:n], v.d[:n])
+
+#pragma acc parallel loop
+      for (i = 0; i < n; i++)
+	v.b[i] = v.a + i;
+
+#pragma acc exit data copyout(v.b[:n])
+#pragma acc exit data copyout(v.c[:n])
+#pragma acc exit data copyout(v.d[:n])
+
+      for (i = 0; i < n; i++)
+	assert (v.b[i] == v.a + i);
+
+      assert (acc_is_present (&v, sizeof (v)));
+      assert (!acc_is_present (v.b, sizeof (int *) * n));
+      assert (!acc_is_present (v.c, sizeof (int *) * n));
+      assert (!acc_is_present (v.d, sizeof (int *) * n));
+    }
+
+#pragma acc exit data copyout(v)
+
+  assert (!acc_is_present (&v, sizeof (v)));
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
index db80413..a58e465 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
@@ -92,10 +92,6 @@  program test
 
   if (acc_is_present (c) .eqv. .TRUE.) call abort
 
-  !$acc exit data delete (c(0:N))
-
-  if (acc_is_present (c) .eqv. .TRUE.) call abort
-
   do i = 1, N
     if (c(i) .ne. 3.0) call abort
   end do
@@ -113,11 +109,6 @@  program test
   if (acc_is_present (c) .eqv. .TRUE.) call abort
   if (acc_is_present (d) .eqv. .TRUE.) call abort
 
-  !$acc exit data delete (c(0:N), d(0:N))
-
-  if (acc_is_present (c) .eqv. .TRUE.) call abort
-  if (acc_is_present (d) .eqv. .TRUE.) call abort
-
   do i = 1, N
     if (c(i) .ne. 5.0) call abort
     if (d(i) .ne. 9.0) call abort
@@ -177,8 +168,8 @@  program test
 
   !$acc exit data delete (c(0:N), d(0:N))
 
-  !if (acc_is_present (c) .eqv. .TRUE.) call abort
-  !if (acc_is_present (d) .eqv. .TRUE.) call abort
+  if (acc_is_present (c) .eqv. .FALSE.) call abort
+  if (acc_is_present (d) .eqv. .FALSE.) call abort
 
   !$acc exit data delete (c(0:N), d(0:N))
 
@@ -190,12 +181,7 @@  program test
   if (acc_is_present (c) .eqv. .FALSE.) call abort
   if (acc_is_present (d) .eqv. .TRUE.) call abort
 
-  !$acc exit data delete (c(0:N), d(0:N))
-
-  if (acc_is_present (c) .eqv. .TRUE.) call abort
-  if (acc_is_present (d) .eqv. .TRUE.) call abort
-
-  !$acc exit data delete (c(0:N), d(0:N))
+  !$acc exit data delete (c(0:N))
 
   if (acc_is_present (c) .eqv. .TRUE.) call abort
   if (acc_is_present (d) .eqv. .TRUE.) call abort