diff mbox series

Handle OpenACC 'self' clause for compute constructs in OpenACC 'kernels' decomposition (was: Extend test suite coverage for OpenACC 'self' clause for compute constructs (was: [PATCH, OpenACC 2.7] Implement self clause for compute constructs))

Message ID 87bkcnvyct.fsf@euler.schwinge.homeip.net
State New
Headers show
Series Handle OpenACC 'self' clause for compute constructs in OpenACC 'kernels' decomposition (was: Extend test suite coverage for OpenACC 'self' clause for compute constructs (was: [PATCH, OpenACC 2.7] Implement self clause for compute constructs)) | expand

Commit Message

Thomas Schwinge Oct. 25, 2023, 9:35 a.m. UTC
Hi!

On 2023-10-25T11:29:52+0200, I wrote:
> On 2023-10-25T10:57:06+0200, I wrote:
>> With minor textual conflicts resolved, I've pushed this to master branch
>> in commit 3a3596389c2e539cb8fd5dc5784a4e2afe193a2a
>> "OpenACC 2.7: Implement self clause for compute constructs", see
>> attached.
>>
>>
>> I'll then apply/submit a number of follow-on commits.
>
>> From 3a3596389c2e539cb8fd5dc5784a4e2afe193a2a Mon Sep 17 00:00:00 2001
>> From: Chung-Lin Tang <cltang@codesourcery.com>
>> Date: Tue, 13 Jun 2023 08:44:31 -0700
>> Subject: [PATCH] OpenACC 2.7: Implement self clause for compute constructs
>
>>  .../c-c++-common/goacc/self-clause-1.c        |  22 +
>>  .../c-c++-common/goacc/self-clause-2.c        |  17 +
>>  gcc/testsuite/gfortran.dg/goacc/self.f95      |  53 +
>
>>  .../libgomp.oacc-c-c++-common/self-1.c        | 962 ++++++++++++++++++
>
> I found that insufficient, and added some more.  Pushed to
> master branch commit 047841a68ebf5f991e842961f9e54f3c10b94f2c
> "Extend test suite coverage for OpenACC 'self' clause for compute constructs",
> see attached.  This is mostly just adapting and cross-linking some
> existing 'if' clause test cases.  (..., which turned up a problem when
> the 'self' clause is used with OpenACC 'kernels'.)

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/self-1.f90
> @@ -0,0 +1,996 @@
> +! OpenACC 'self' clause.
> +
> +! This is 'if-1.f90' with 'self(!cond)' instead of 'if(cond)' on compute
> +! constructs.
> +! ..., which the exception of certain 'kernels' constructs.

..., which I've then fixed up per master branch
commit 7b2ae64b68132c1c643cb34d58cd5eab6f9de652
"Handle OpenACC 'self' clause for compute constructs in OpenACC 'kernels' decomposition",
see attached.


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
diff mbox series

Patch

From 7b2ae64b68132c1c643cb34d58cd5eab6f9de652 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Mon, 23 Oct 2023 15:28:30 +0200
Subject: [PATCH] Handle OpenACC 'self' clause for compute constructs in
 OpenACC 'kernels' decomposition

... to fix up recent commit 3a3596389c2e539cb8fd5dc5784a4e2afe193a2a
"OpenACC 2.7: Implement self clause for compute constructs" for that case.

	gcc/
	* omp-oacc-kernels-decompose.cc (omp_oacc_kernels_decompose_1):
	Handle 'OMP_CLAUSE_SELF' like 'OMP_CLAUSE_IF'.
	* omp-expand.cc (expand_omp_target): Handle 'OMP_CLAUSE_SELF' for
	'GF_OMP_TARGET_KIND_OACC_DATA_KERNELS'.
	gcc/testsuite/
	* c-c++-common/goacc/self-clause-2.c: Verify
	'--param=openacc-kernels=decompose'.
	* gfortran.dg/goacc/kernels-tree.f95: Adjust.
	libgomp/
	* oacc-parallel.c (GOACC_data_start): Handle
	'GOACC_FLAG_LOCAL_DEVICE'.
	(GOACC_parallel_keyed): Simplify accordingly.
	* testsuite/libgomp.oacc-fortran/self-1.f90: Adjust.
---
 gcc/omp-expand.cc                               | 14 ++++++++++++--
 gcc/omp-oacc-kernels-decompose.cc               | 15 ++++++++-------
 .../c-c++-common/goacc/self-clause-2.c          |  6 ++++++
 .../gfortran.dg/goacc/kernels-tree.f95          |  2 +-
 libgomp/oacc-parallel.c                         | 17 +++++------------
 .../testsuite/libgomp.oacc-fortran/self-1.f90   | 15 +++++++--------
 6 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
index 8576b938102..5c6a7f2e381 100644
--- a/gcc/omp-expand.cc
+++ b/gcc/omp-expand.cc
@@ -10334,9 +10334,19 @@  expand_omp_target (struct omp_region *region)
 
   if ((c = omp_find_clause (clauses, OMP_CLAUSE_SELF)) != NULL_TREE)
     {
-      gcc_assert (is_gimple_omp_oacc (entry_stmt) && offloaded);
+      gcc_assert ((is_gimple_omp_oacc (entry_stmt) && offloaded)
+		  || (gimple_omp_target_kind (entry_stmt)
+		      == GF_OMP_TARGET_KIND_OACC_DATA_KERNELS));
 
-      edge e = split_block_after_labels (new_bb);
+      edge e;
+      if (offloaded)
+	e = split_block_after_labels (new_bb);
+      else
+	{
+	  gsi = gsi_last_nondebug_bb (new_bb);
+	  gsi_prev (&gsi);
+	  e = split_block (new_bb, gsi_stmt (gsi));
+	}
       basic_block cond_bb = e->src;
       new_bb = e->dest;
       remove_edge (e);
diff --git a/gcc/omp-oacc-kernels-decompose.cc b/gcc/omp-oacc-kernels-decompose.cc
index ffc0a8f813e..dfbb34935d0 100644
--- a/gcc/omp-oacc-kernels-decompose.cc
+++ b/gcc/omp-oacc-kernels-decompose.cc
@@ -1519,17 +1519,18 @@  omp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
 	      break;
 	    }
 	}
-      else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
+      else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF
+	       || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SELF)
 	{
-	  /* If there is an 'if' clause, it must be duplicated to the
-	     enclosing data region.  Temporarily remove the if clause's
-	     chain to avoid copying it.  */
+	  /* If there is an 'if' or 'self' clause, it must be duplicated to the
+	     enclosing data region.  Temporarily remove its chain to avoid
+	     copying it.  */
 	  tree saved_chain = OMP_CLAUSE_CHAIN (c);
 	  OMP_CLAUSE_CHAIN (c) = NULL;
-	  tree new_if_clause = unshare_expr (c);
+	  tree new_clause = unshare_expr (c);
 	  OMP_CLAUSE_CHAIN (c) = saved_chain;
-	  OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
-	  data_clauses = new_if_clause;
+	  OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
+	  data_clauses = new_clause;
 	}
     }
   /* Restore the original order of the clauses.  */
diff --git a/gcc/testsuite/c-c++-common/goacc/self-clause-2.c b/gcc/testsuite/c-c++-common/goacc/self-clause-2.c
index 769694baec9..3ac29a03bc4 100644
--- a/gcc/testsuite/c-c++-common/goacc/self-clause-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/self-clause-2.c
@@ -1,6 +1,8 @@ 
 /* See also 'if-clause-2.c'.  */
 
 /* { dg-additional-options "-fdump-tree-gimple" } */
+/* { dg-additional-options "--param=openacc-kernels=decompose" }
+   { dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" } */
 
 void
 f (short c)
@@ -11,6 +13,8 @@  f (short c)
 
 #pragma acc kernels self(c) copy(c)
   /* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(tofrom:c \[len: [0-9]+\]\) self\(_[0-9]+\)$} 1 "gimple" } } */
+  /* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(tofrom:c \[len: [0-9]+\]\) self\(_[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } }
+     { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:c \[len: [0-9]+\]\) self\(_[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } } */
   ++c;
 
 #pragma acc serial self(c) copy(c)
@@ -29,6 +33,8 @@  g (short d)
 
 #pragma acc kernels self copy(d)
   /* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(tofrom:d \[len: [0-9]+\]\) self\(1\)$} 1 "gimple" } } */
+  /* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(tofrom:d \[len: [0-9]+\]\) self\(1\)$} 1 "omp_oacc_kernels_decompose" } }
+     { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:d \[len: [0-9]+\]\) self\(1+\)$} 1 "omp_oacc_kernels_decompose" } } */
   ++d;
 
 #pragma acc serial self copy(d)
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
index 1ba04a84e12..2ee578f7f32 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
@@ -42,5 +42,5 @@  end program test
 
 ! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } } 
 
-! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels if\((?:D\.|_)[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } }
+! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels if\((?:D\.|_)[0-9]+\) self\(1\)$} 1 "omp_oacc_kernels_decompose" } }
 ! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single num_gangs\(1\) if\((?:D\.|_)[0-9]+\) self\(1\) async\(-1\)$} 1 "omp_oacc_kernels_decompose" } }
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index cf37a1bdd7d..16cf3948e2d 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -184,19 +184,11 @@  GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
 
   /* Host fallback if "if" clause is false or if the current device is set to
      the host.  */
-  if (flags & GOACC_FLAG_HOST_FALLBACK)
-    {
-      prof_info.device_type = acc_device_host;
-      api_info.device_type = prof_info.device_type;
-      goacc_save_and_set_bind (acc_device_host);
-      fn (hostaddrs);
-      goacc_restore_bind ();
-      goto out_prof;
-    }
-  else if (flags & GOACC_FLAG_LOCAL_DEVICE)
-    {
+  if ((flags & GOACC_FLAG_HOST_FALLBACK)
       /* TODO: a proper pthreads based "multi-core CPU" local device
 	 implementation. Currently, this is still the same as host-fallback.  */
+      || (flags & GOACC_FLAG_LOCAL_DEVICE))
+    {
       prof_info.device_type = acc_device_host;
       api_info.device_type = prof_info.device_type;
       goacc_save_and_set_bind (acc_device_host);
@@ -457,7 +449,8 @@  GOACC_data_start (int flags_m, size_t mapnum,
 
   /* Host fallback or 'do nothing'.  */
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
-      || (flags & GOACC_FLAG_HOST_FALLBACK))
+      || (flags & GOACC_FLAG_HOST_FALLBACK)
+      || (flags & GOACC_FLAG_LOCAL_DEVICE))
     {
       prof_info.device_type = acc_device_host;
       api_info.device_type = prof_info.device_type;
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/self-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/self-1.f90
index b9ec9de08d9..6c1233d6cf5 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/self-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/self-1.f90
@@ -2,7 +2,6 @@ 
 
 ! This is 'if-1.f90' with 'self(!cond)' instead of 'if(cond)' on compute
 ! constructs.
-! ..., which the exception of certain 'kernels' constructs.
 
 ! { dg-do run }
 ! { dg-additional-options "-cpp" }
@@ -523,7 +522,7 @@  program main
 
   a(:) = 16.0
 
-  !$acc kernels if (0 == 1) ! { dg-line l_compute[incr c_compute] }
+  !$acc kernels self (0 /= 1) ! { dg-line l_compute[incr c_compute] }
   ! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
   !   { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -569,7 +568,7 @@  program main
 
   a(:) = 22.0
 
-  !$acc kernels if (zero == 1) ! { dg-line l_compute[incr c_compute] }
+  !$acc kernels self (zero /= 1) ! { dg-line l_compute[incr c_compute] }
   ! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
   !   { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -615,7 +614,7 @@  program main
 
   a(:) = 76.0
 
-  !$acc kernels if (.FALSE.) ! { dg-line l_compute[incr c_compute] }
+  !$acc kernels self (.TRUE.) ! { dg-line l_compute[incr c_compute] }
   ! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
   !   { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -665,7 +664,7 @@  program main
 
   nn = 0
 
-  !$acc kernels if (nn == 1) ! { dg-line l_compute[incr c_compute] }
+  !$acc kernels self (nn /= 1) ! { dg-line l_compute[incr c_compute] }
   ! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
   !   { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -715,7 +714,7 @@  program main
 
   nn = 0;
 
-  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) ! { dg-line l_compute[incr c_compute] }
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) self (.NOT. ((nn + nn) > 0)) ! { dg-line l_compute[incr c_compute] }
   ! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
   !   { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -735,7 +734,7 @@  program main
 
   a(:) = 91.0
 
-  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0) ! { dg-line l_compute[incr c_compute] }
+  !$acc kernels copyin (a(1:N)) copyout (b(1:N)) self (.NOT. (-2 > 0)) ! { dg-line l_compute[incr c_compute] }
   ! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
   !   { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -781,7 +780,7 @@  program main
 
   a(:) = 87.0
 
-  !$acc kernels if (one == 0) ! { dg-line l_compute[incr c_compute] }
+  !$acc kernels self (one /= 0) ! { dg-line l_compute[incr c_compute] }
   ! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
   !   { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
-- 
2.34.1