Handle nested loops in kernels regions
2015-07-12 Tom de Vries <tom@codesourcery.com>
* omp-low.c (build_receiver_ref): Mark *.omp_data_i as non-trapping.
* tree-parloops.c (gen_parallel_loop): Add LOOPS_NEED_FIXUP to loop
state.
(parallelize_loops): Allow nested loops.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c: New test.
* c-c++-common/goacc/kernels-loop-nest.c: New test.
---
gcc/omp-low.c | 1 +
.../c-c++-common/goacc/kernels-loop-nest.c | 42 ++++++++++++++++++++++
gcc/tree-parloops.c | 5 +--
.../libgomp.oacc-c-c++-common/kernels-loop-nest.c | 26 ++++++++++++++
4 files changed, 70 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c
@@ -1147,6 +1147,7 @@ build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
field = x;
x = build_simple_mem_ref (ctx->receiver_decl);
+ TREE_THIS_NOTRAP (x) = 1;
x = omp_build_component_ref (x, field);
if (by_ref)
x = build_simple_mem_ref (x);
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+/* Based on autopar/outer-1.c. */
+
+#include <stdlib.h>
+
+#define N 1000
+
+int
+main (void)
+{
+ int x[N][N];
+
+#pragma acc kernels copyout (x)
+ {
+ for (int ii = 0; ii < N; ii++)
+ for (int jj = 0; jj < N; jj++)
+ x[ii][jj] = ii + jj + 3;
+ }
+
+ for (int i = 0; i < N; i++)
+ for (int j = 0; j < N; j++)
+ if (x[i][j] != i + j + 3)
+ abort ();
+
+ return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
@@ -2442,6 +2442,7 @@ gen_parallel_loop (struct loop *loop,
/* Cancel the loop (it is simpler to do it here rather than to teach the
expander to do it). */
cancel_loop_tree (loop);
+ loops_state_set (LOOPS_NEED_FIXUP);
/* Free loop bound estimations that could contain references to
removed statements. */
@@ -2761,10 +2762,6 @@ parallelize_loops (bool oacc_kernels_p)
if (!loop->in_oacc_kernels_region)
continue;
- /* TODO: Allow nested loops. */
- if (loop->inner)
- continue;
-
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"Trying loop %d with header bb %d in oacc kernels region\n",
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <stdlib.h>
+
+#define N 1000
+
+int
+main (void)
+{
+ int x[N][N];
+
+#pragma acc kernels copyout (x)
+ {
+ for (int ii = 0; ii < N; ii++)
+ for (int jj = 0; jj < N; jj++)
+ x[ii][jj] = ii + jj + 3;
+ }
+
+ for (int i = 0; i < N; i++)
+ for (int j = 0; j < N; j++)
+ if (x[i][j] != i + j + 3)
+ abort ();
+
+ return 0;
+}
--
1.9.1