diff mbox

[gomp4,committed] Revert "Use marked_independent in oacc kernels region"

Message ID 563CE705.5040901@mentor.com
State New
Headers show

Commit Message

Tom de Vries Nov. 6, 2015, 5:44 p.m. UTC
Hi,

this patch reverts the independent clause support in the oacc kernels 
region.

The independent clause support is broken, in a subtle way. We currently 
set the marked_independent field in struct loop for loops with the 
independent clause in a kernels region. So that property holds for all 
the loads and stores present at source level. But, at omp-lowering, we 
introduce new loads and stores. Those new load and stores are supposed 
to be eliminated from the loop by the kernels pass group. But in 
general, we can't guarantuee that that happens. So, at parloops, we 
cannot assume based on marked_independent that in fact all loads and 
stores in the loop body are independent.

Committed to gomp-4_0-branch.

Thanks,
- Tom
diff mbox

Patch

Revert "Use marked_independent in oacc kernels region"

2015-10-20  Tom de Vries  <tom@codesourcery.com>

	Revert:
	2015-07-14  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (parallelize_loops): Use marked_independent flag in
	oacc kernels region.

	* c-c++-common/goacc/kernels-independent.c: New test.

	* testsuite/libgomp.oacc-c-c++-common/kernels-independent.c: New test.
---
 .../c-c++-common/goacc/kernels-independent.c       | 41 --------------------
 gcc/tree-parloops.c                                | 21 ++--------
 .../kernels-independent.c                          | 45 ----------------------
 3 files changed, 3 insertions(+), 104 deletions(-)
 delete mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-independent.c
 delete mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-independent.c b/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
deleted file mode 100644
index 1f36323..0000000
--- a/gcc/testsuite/c-c++-common/goacc/kernels-independent.c
+++ /dev/null
@@ -1,41 +0,0 @@ 
-/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
-/* { dg-additional-options "-fdump-tree-optimized" } */
-
-#include <stdlib.h>
-
-#define N (1024 * 512)
-#define COUNTERTYPE unsigned int
-
-void
-foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
-{
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    a[i] = i * 2;
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    b[i] = i * 4;
-
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
-  {
-    #pragma acc loop independent
-    for (COUNTERTYPE ii = 0; ii < N; ii++)
-      c[ii] = a[ii] + b[ii];
-  }
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    if (c[i] != a[i] + b[i])
-      abort ();
-}
-
-/* Check that only one loop is analyzed, and that it can be parallelized.  */
-/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized, marked independent" 1 "parloops_oacc_kernels" } } */
-/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
-
-/* Check that the loop has been split off into a function.  */
-/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 1 "parloops_oacc_kernels" } } */
-
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 05827d1..b4039ad 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -3258,24 +3258,9 @@  parallelize_loops (bool oacc_kernels_p)
       if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
 	continue;
 
-      if (!flag_loop_parallelize_all)
-	{
-	  bool independent = (oacc_kernels_p
-			      && loop->marked_independent);
-
-	  if (independent)
-	    {
-	      if (dump_file
-		  && (dump_flags & TDF_DETAILS))
-		fprintf (dump_file,
-			 "  SUCCESS: may be parallelized, marked independent\n");
-	    }
-	  else
-	    independent = loop_parallel_p (loop, &parloop_obstack);
-
-	  if (!independent)
-	    continue;
-	}
+      if (!flag_loop_parallelize_all
+	  && !loop_parallel_p (loop, &parloop_obstack))
+	continue;
 
       if (oacc_kernels_p
 	&& !oacc_entry_exit_ok (loop, &reduction_list))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
deleted file mode 100644
index d169a5f..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-independent.c
+++ /dev/null
@@ -1,45 +0,0 @@ 
-/* { dg-do run } */
-/* { dg-additional-options "-ftree-parallelize-loops=32" } */
-
-#include <stdlib.h>
-
-#define N (1024 * 512)
-#define COUNTERTYPE unsigned int
-
-void __attribute__((noinline,noclone))
-foo (unsigned int *a,  unsigned int *b,  unsigned int *c)
-{
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    a[i] = i * 2;
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    b[i] = i * 4;
-
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
-  {
-    #pragma acc loop independent
-    for (COUNTERTYPE ii = 0; ii < N; ii++)
-      c[ii] = a[ii] + b[ii];
-  }
-
-  for (COUNTERTYPE i = 0; i < N; i++)
-    if (c[i] != a[i] + b[i])
-      abort ();
-}
-
-int
-main (void)
-{
-  unsigned int *__restrict a;
-  unsigned int *__restrict b;
-  unsigned int *__restrict c;
-
-  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
-  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
-  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
-
-  foo (a, b, c);
-
-  return 0;
-}
-- 
1.9.1