diff mbox

Handle global loop counters in c/c++ oacc kernels (was: openacc kernels directive -- initial support)

Message ID 87k2x5z0m3.fsf@kepler.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge April 21, 2015, 8:29 p.m. UTC
Hi!

On Sat, 15 Nov 2014 13:14:52 +0100, Tom de Vries <Tom_deVries@mentor.com> wrote:
> I'm submitting a patch series with initial support for the oacc kernels directive.

Committed to gomp-4_0-branch in r222287:

commit abaf92b2db3c0799edac63cfb846af2dbde47423
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Apr 21 20:27:40 2015 +0000

    Handle global loop counters in c/c++ oacc kernels
    
    	gcc/
    	* passes.def: Add pass_fre after pass_ch_oacc_kernels.
    
    	gcc/testsuite/
    	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test.
    	* c-c++-common/goacc/kernels-one-counter-var.c: New test.
    	* g++.dg/ipa/devirt-37.C: Update for new pass_fre.
    	* g++.dg/ipa/devirt-40.C: Likewise.
    	* g++.dg/tree-ssa/pr61034.C: Likewise.
    	* gcc.dg/ipa/ipa-pta-13.c: Likewise.
    	* gcc.dg/ipa/ipa-pta-3.c: Likewise.
    	* gcc.dg/ipa/ipa-pta-4.c: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222287 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |    2 +
 gcc/passes.def                                     |    1 +
 gcc/testsuite/ChangeLog.gomp                       |    9 ++++
 .../goacc/kernels-counter-vars-function-scope.c    |   55 ++++++++++++++++++++
 .../c-c++-common/goacc/kernels-one-counter-var.c   |   54 +++++++++++++++++++
 gcc/testsuite/g++.dg/ipa/devirt-37.C               |   12 ++---
 gcc/testsuite/g++.dg/ipa/devirt-40.C               |    6 +--
 gcc/testsuite/g++.dg/tree-ssa/pr61034.C            |   10 ++--
 gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c              |    6 +--
 gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c               |    6 +--
 gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c               |    6 +--
 11 files changed, 144 insertions(+), 23 deletions(-)



Grüße,
 Thomas
diff mbox

Patch

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index f14c3718..b1933ba 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,7 @@ 
 2015-04-21  Tom de Vries  <tom@codesourcery.com>
 
+	* passes.def: Add pass_fre after pass_ch_oacc_kernels.
+
 	* passes.def: Add pass_scev_cprop to pass_oacc_kernels.
 	* tree-ssa-loop.c (pass_scev_cprop::clone): New function.
 
diff --git gcc/passes.def gcc/passes.def
index 3e85808..04cbba0 100644
--- gcc/passes.def
+++ gcc/passes.def
@@ -91,6 +91,7 @@  along with GCC; see the file COPYING3.  If not see
 	  NEXT_PASS (pass_oacc_kernels);
 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
 	      NEXT_PASS (pass_ch_oacc_kernels);
+	      NEXT_PASS (pass_fre);
 	      NEXT_PASS (pass_tree_loop_init);
 	      NEXT_PASS (pass_lim);
 	      NEXT_PASS (pass_copy_prop);
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index eed22e2..ed80f5b 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,6 +1,15 @@ 
 2015-04-21  Tom de Vries  <tom@codesourcery.com>
 	    Thomas Schwinge  <thomas@codesourcery.com>
 
+	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test.
+	* c-c++-common/goacc/kernels-one-counter-var.c: New test.
+	* g++.dg/ipa/devirt-37.C: Update for new pass_fre.
+	* g++.dg/ipa/devirt-40.C: Likewise.
+	* g++.dg/tree-ssa/pr61034.C: Likewise.
+	* gcc.dg/ipa/ipa-pta-13.c: Likewise.
+	* gcc.dg/ipa/ipa-pta-3.c: Likewise.
+	* gcc.dg/ipa/ipa-pta-4.c: Likewise.
+
 	* gcc.dg/pr41488.c: Update for new pass_scev_cprop.
 	* gcc.dg/tree-ssa/loop-17.c: Likewise.
 	* gcc.dg/tree-ssa/loop-39.c: Likewise.
diff --git gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
new file mode 100644
index 0000000..06cdb29
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -0,0 +1,55 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+  COUNTERTYPE i;
+  COUNTERTYPE ii;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+  for (i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+    for (ii = 0; ii < N; ii++)
+      c[ii] = a[ii] + b[ii];
+  }
+
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
new file mode 100644
index 0000000..2699437
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
@@ -0,0 +1,54 @@ 
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+  unsigned int *__restrict a;
+  unsigned int *__restrict b;
+  unsigned int *__restrict c;
+  COUNTERTYPE i;
+
+  a = (unsigned int *)malloc (N * sizeof (unsigned int));
+  b = (unsigned int *)malloc (N * sizeof (unsigned int));
+  c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+  for (i = 0; i < N; i++)
+    a[i] = i * 2;
+
+  for (i = 0; i < N; i++)
+    b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  {
+    for (i = 0; i < N; i++)
+      c[i] = a[i] + b[i];
+  }
+
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] + b[i])
+      abort ();
+
+  free (a);
+  free (b);
+  free (c);
+
+  return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git gcc/testsuite/g++.dg/ipa/devirt-37.C gcc/testsuite/g++.dg/ipa/devirt-37.C
index 7e1acdc..eb2c7f2 100644
--- gcc/testsuite/g++.dg/ipa/devirt-37.C
+++ gcc/testsuite/g++.dg/ipa/devirt-37.C
@@ -1,4 +1,4 @@ 
-/* { dg-options "-fpermissive -O2 -fno-indirect-inlining -fno-devirtualize-speculatively -fdump-tree-fre2-details -fno-early-inlining"  } */
+/* { dg-options "-fpermissive -O2 -fno-indirect-inlining -fno-devirtualize-speculatively -fdump-tree-fre3-details -fno-early-inlining"  } */
 #include <stdlib.h>
 struct A {virtual void test() {abort ();}};
 struct B:A
@@ -30,8 +30,8 @@  t()
 /* After inlining the call within constructor needs to be checked to not go into a basetype.
    We should see the vtbl store and we should notice extcall as possibly clobbering the
    type but ignore it because b is in static storage.  */
-/* { dg-final { scan-tree-dump "No dynamic type change found."  "fre2"  } } */
-/* { dg-final { scan-tree-dump "Checking vtbl store:"  "fre2"  } } */
-/* { dg-final { scan-tree-dump "Function call may change dynamic type:extcall"  "fre2"  } } */
-/* { dg-final { scan-tree-dump "converting indirect call to function virtual void"  "fre2"  } } */
-/* { dg-final { cleanup-tree-dump "fre2" } } */
+/* { dg-final { scan-tree-dump "No dynamic type change found."  "fre3"  } } */
+/* { dg-final { scan-tree-dump "Checking vtbl store:"  "fre3"  } } */
+/* { dg-final { scan-tree-dump "Function call may change dynamic type:extcall"  "fre3"  } } */
+/* { dg-final { scan-tree-dump "converting indirect call to function virtual void"  "fre3"  } } */
+/* { dg-final { cleanup-tree-dump "fre3" } } */
diff --git gcc/testsuite/g++.dg/ipa/devirt-40.C gcc/testsuite/g++.dg/ipa/devirt-40.C
index 79cb129..7e4ae7c 100644
--- gcc/testsuite/g++.dg/ipa/devirt-40.C
+++ gcc/testsuite/g++.dg/ipa/devirt-40.C
@@ -1,4 +1,4 @@ 
-/* { dg-options "-O2 -fdump-tree-fre2-details"  } */
+/* { dg-options "-O2 -fdump-tree-fre3-details"  } */
 typedef enum
 {
 } UErrorCode;
@@ -19,5 +19,5 @@  A::m_fn1 (UnicodeString &, int &p2, UErrorCode &) const
   UnicodeString a[2];
 }
 
-/* { dg-final { scan-tree-dump-not "\\n  OBJ_TYPE_REF" "fre2"  } } */
-/* { dg-final { cleanup-tree-dump "fre2" } } */
+/* { dg-final { scan-tree-dump-not "\\n  OBJ_TYPE_REF" "fre3"  } } */
+/* { dg-final { cleanup-tree-dump "fre3" } } */
diff --git gcc/testsuite/g++.dg/tree-ssa/pr61034.C gcc/testsuite/g++.dg/tree-ssa/pr61034.C
index 9ec3995..78417a1 100644
--- gcc/testsuite/g++.dg/tree-ssa/pr61034.C
+++ gcc/testsuite/g++.dg/tree-ssa/pr61034.C
@@ -1,5 +1,5 @@ 
 // { dg-do compile }
-// { dg-options "-O3 -fdump-tree-fre2" }
+// { dg-options "-O3 -fdump-tree-fre3" }
 
 #define assume(x) if(!(x))__builtin_unreachable()
 
@@ -41,7 +41,7 @@  bool f(I a, I b, I c, I d) {
 // a bunch of conditional free()s and unreachable()s.
 // This works only if everything is inlined into 'f'.
 
-// { dg-final { scan-tree-dump-times ";; Function" 1 "fre2" } }
-// { dg-final { scan-tree-dump-times "free" 19 "fre2" } }
-// { dg-final { scan-tree-dump-times "unreachable" 11 "fre2" } }
-// { dg-final { cleanup-tree-dump "fre2" } }
+// { dg-final { scan-tree-dump-times ";; Function" 1 "fre3" } }
+// { dg-final { scan-tree-dump-times "free" 19 "fre3" } }
+// { dg-final { scan-tree-dump-times "unreachable" 11 "fre3" } }
+// { dg-final { cleanup-tree-dump "fre3" } }
diff --git gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c
index f7f95f4..8d73900 100644
--- gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c
+++ gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c
@@ -1,5 +1,5 @@ 
 /* { dg-do link } */
-/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre2 -fno-ipa-icf" } */
+/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre3 -fno-ipa-icf" } */
 
 static int x, y;
 
@@ -54,9 +54,9 @@  int main()
   local_address_taken (&y);
   /* As we are computing flow- and context-insensitive we may not
      CSE the load of x here.  */
-  /* { dg-final { scan-tree-dump " = x;" "fre2" } } */
+  /* { dg-final { scan-tree-dump " = x;" "fre3" } } */
   return x;
 }
 
 /* { dg-final { cleanup-ipa-dump "pta" } } */
-/* { dg-final { cleanup-tree-dump "fre2" } } */
+/* { dg-final { cleanup-tree-dump "fre3" } } */
diff --git gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c
index 4790080..2398a21 100644
--- gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c
+++ gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre2-details" } */
+/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre3-details" } */
 
 static int __attribute__((noinline,noclone))
 foo (int *p, int *q)
@@ -23,6 +23,6 @@  int main()
 
 /* { dg-final { scan-ipa-dump "foo.arg0 = &a" "pta" } } */
 /* { dg-final { scan-ipa-dump "foo.arg1 = &b" "pta" } } */
-/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre2" } } */
-/* { dg-final { cleanup-tree-dump "fre2" } } */
+/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre3" } } */
+/* { dg-final { cleanup-tree-dump "fre3" } } */
 /* { dg-final { cleanup-ipa-dump "pta" } } */
diff --git gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c
index bf6fa28..b72489f 100644
--- gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c
+++ gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre2-details" } */
+/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre3-details" } */
 
 int a, b;
 
@@ -28,6 +28,6 @@  int main()
 
 /* { dg-final { scan-ipa-dump "foo.arg0 = &a" "pta" } } */
 /* { dg-final { scan-ipa-dump "foo.arg1 = &b" "pta" } } */
-/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre2" } } */
-/* { dg-final { cleanup-tree-dump "fre2" } } */
+/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre3" } } */
+/* { dg-final { cleanup-tree-dump "fre3" } } */
 /* { dg-final { cleanup-ipa-dump "pta" } } */