@@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
+ break;
+
case OMP_CLAUSE_TILE:
+ /* We're not yet making use of the information provided by OpenACC
+ tile clauses. Discard these here, to simplify later middle end
+ processing. */
+ remove = true;
break;
default:
@@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
- case OMP_CLAUSE_TILE:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
@@ -2201,6 +2200,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
break;
case OMP_CLAUSE__CACHE_:
+ case OMP_CLAUSE_TILE:
default:
gcc_unreachable ();
}
@@ -2357,7 +2357,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
- case OMP_CLAUSE_TILE:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
@@ -2365,6 +2364,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
break;
case OMP_CLAUSE__CACHE_:
+ case OMP_CLAUSE_TILE:
default:
gcc_unreachable ();
}
@@ -111,6 +111,7 @@ test ()
// { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
// { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
new file mode 100644
@@ -0,0 +1,100 @@
+/* Exercise how tree-nested.c handles OpenACC clauses. */
+/* See gcc/testsuite/gfortran.dg/goacc/subroutines.f90 for the Fortran
+ version. */
+
+int main ()
+{
+#define N 100
+ int nonlocal_arg;
+ int nonlocal_a[N];
+ int nonlocal_i;
+ int nonlocal_j;
+
+ for (int i = 0; i < N; ++i)
+ nonlocal_a[i] = 5;
+ nonlocal_arg = 5;
+
+ void local ()
+ {
+ int local_i;
+ int local_arg;
+ int local_a[N];
+ int local_j;
+
+ for (int i = 0; i < N; ++i)
+ local_a[i] = 5;
+ local_arg = 5;
+
+#pragma acc kernels loop \
+ gang(num:local_arg) worker(local_arg) vector(local_arg) \
+ wait async(local_arg)
+ for (local_i = 0; local_i < N; ++local_i)
+ {
+#pragma acc cache (local_a[local_i:5])
+ local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+#pragma acc loop auto independent tile(1)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+ }
+
+#pragma acc kernels loop \
+ gang(static:local_arg) worker(local_arg) vector(local_arg) \
+ wait(local_arg, local_arg + 1, local_arg + 2) async
+ for (local_i = 0; local_i < N; ++local_i)
+ {
+#pragma acc cache (local_a[local_i:4])
+ local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+#pragma acc loop auto independent tile(*)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+ }
+ }
+
+ void nonlocal ()
+ {
+ for (int i = 0; i < N; ++i)
+ nonlocal_a[i] = 5;
+ nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+ gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+ wait async(nonlocal_arg)
+ for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+ {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+ nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+#pragma acc loop auto independent tile(3)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+ }
+
+#pragma acc kernels loop \
+ gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+ wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+ for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+ {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+ nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+#pragma acc loop auto independent tile(*)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+ }
+ }
+
+ local ();
+ nonlocal ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage. */
+/* Due to missing right braces '}', the following functions are parsed as
+ nested functions. This ran into an ICE. */
+
+void foo (void)
+{
+ #pragma acc parallel
+ {
+ #pragma acc loop independent
+ for (int i = 0; i < 16; i++)
+ ;
+ // Note right brace '}' commented out here.
+ //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+ #pragma acc parallel
+ {
+ #pragma acc loop independent
+ for (int i = 0; i < 16; i++)
+ ;
+ }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
@@ -143,7 +143,8 @@ end subroutine test
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
@@ -1,6 +1,5 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile }
+! Exercise how tree-nested.c handles OpenACC clauses.
+! See gcc/testsuite/c-c++-common/goacc/nested.c for the C version.
program main
integer, parameter :: N = 100
@@ -8,10 +7,10 @@ program main
integer :: nonlocal_a(N)
integer :: nonlocal_i
integer :: nonlocal_j
-
+
nonlocal_a (:) = 5
nonlocal_arg = 5
-
+
call local ()
call nonlocal ()
@@ -22,24 +21,35 @@ contains
integer :: local_arg
integer :: local_a(N)
integer :: local_j
-
+
local_a (:) = 5
local_arg = 5
- !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
+ !$acc kernels loop &
+ !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+ !$acc wait async(local_arg)
do local_i = 1, N
+ !$acc cache (local_a(local_i:local_i + 5))
local_a(local_i) = 100
- !$acc loop seq
+ !$acc loop seq tile(*)
+ do local_j = 1, N
+ enddo
+ !$acc loop auto independent tile(1)
do local_j = 1, N
enddo
enddo
!$acc end kernels loop
- !$acc kernels loop gang(static:local_arg) worker(local_arg) &
- !$acc vector(local_arg)
+ !$acc kernels loop &
+ !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+ !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
do local_i = 1, N
+ !$acc cache (local_a(local_i:local_i + 4))
local_a(local_i) = 100
- !$acc loop seq
+ !$acc loop seq tile(1)
+ do local_j = 1, N
+ enddo
+ !$acc loop auto independent tile(*)
do local_j = 1, N
enddo
enddo
@@ -49,22 +59,32 @@ contains
subroutine nonlocal ()
nonlocal_a (:) = 5
nonlocal_arg = 5
-
- !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
- !$acc vector(nonlocal_arg)
+
+ !$acc kernels loop &
+ !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+ !$acc wait async(nonlocal_arg)
do nonlocal_i = 1, N
+ !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
nonlocal_a(nonlocal_i) = 100
- !$acc loop seq
+ !$acc loop seq tile(2)
+ do nonlocal_j = 1, N
+ enddo
+ !$acc loop auto independent tile(3)
do nonlocal_j = 1, N
enddo
enddo
!$acc end kernels loop
- !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
- !$acc vector(nonlocal_arg)
+ !$acc kernels loop &
+ !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+ !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
do nonlocal_i = 1, N
+ !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
nonlocal_a(nonlocal_i) = 100
- !$acc loop seq
+ !$acc loop seq tile(*)
+ do nonlocal_j = 1, N
+ enddo
+ !$acc loop auto independent tile(*)
do nonlocal_j = 1, N
enddo
enddo
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_ASYNC:
+ case OMP_CLAUSE_WAIT:
/* Several OpenACC clauses have optional arguments. Check if they
are present. */
if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_DEFAULTMAP:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
break;
+ case OMP_CLAUSE__CACHE_:
+ /* These clauses belong to the OpenACC cache directive, which is
+ discarded during gimplification, so we don't expect to see
+ anything here. */
+ gcc_unreachable ();
+
+ case OMP_CLAUSE_TILE:
+ /* OpenACC tile clauses are discarded during gimplification, so we
+ don't expect to see anything here. */
+ gcc_unreachable ();
+
default:
gcc_unreachable ();
}
@@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_ASYNC:
+ case OMP_CLAUSE_WAIT:
/* Several OpenACC clauses have optional arguments. Check if they
are present. */
if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_DEFAULTMAP:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
break;
+ case OMP_CLAUSE__CACHE_:
+ /* These clauses belong to the OpenACC cache directive, which is
+ discarded during gimplification, so we don't expect to see
+ anything here. */
+ gcc_unreachable ();
+
+ case OMP_CLAUSE_TILE:
+ /* OpenACC tile clauses are discarded during gimplification, so we
+ don't expect to see anything here. */
+ gcc_unreachable ();
+
default:
gcc_unreachable ();
}