Move kernels pass group before pass_fre
2015-10-13 Tom de Vries <tom@codesourcery.com>
* tree-ssa-dom.c (pass_dominator_oacc_kernels::clone): New function.
* passes.def: Move pass group pass_oacc_kernels to before pass_fre. Add
pass_dominator_oacc_kernels twice in the pass_oacc_kernels pass group.
* c-c++-common/goacc/kernels-acc-on-device-2.c: New test.
* c-c++-common/goacc/kernels-counter-var-redundant-load.c: Update.
---
gcc/passes.def | 4 ++-
.../c-c++-common/goacc/kernels-acc-on-device-2.c | 37 ++++++++++++++++++++++
.../goacc/kernels-counter-var-redundant-load.c | 10 +++---
gcc/tree-ssa-dom.c | 1 +
4 files changed, 47 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-acc-on-device-2.c
@@ -86,12 +86,13 @@ along with GCC; see the file COPYING3. If not see
/* pass_build_ealias is a dummy pass that ensures that we
execute TODO_rebuild_alias at this point. */
NEXT_PASS (pass_build_ealias);
- NEXT_PASS (pass_fre);
/* Pass group that runs when there are oacc kernels in the
function. */
NEXT_PASS (pass_oacc_kernels);
PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+ NEXT_PASS (pass_dominator_oacc_kernels);
NEXT_PASS (pass_ch_oacc_kernels);
+ NEXT_PASS (pass_dominator_oacc_kernels);
NEXT_PASS (pass_tree_loop_init);
NEXT_PASS (pass_lim);
NEXT_PASS (pass_copy_prop);
@@ -105,6 +106,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_expand_omp_ssa);
NEXT_PASS (pass_tree_loop_done);
POP_INSERT_PASSES ()
+ NEXT_PASS (pass_fre);
NEXT_PASS (pass_merge_phi);
NEXT_PASS (pass_dse);
NEXT_PASS (pass_cd_dce);
new file mode 100644
@@ -0,0 +1,37 @@
+/* { dg-additional-options "-O2" } */
+
+#include "openacc.h"
+
+#define N 32
+
+void
+foo (float *a, float *b)
+{
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
+ {
+ int ii;
+ int on_host = acc_on_device (acc_device_X);
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (on_host)
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N])
+ {
+ int ii;
+ int on_host = acc_on_device (acc_device_X);
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (on_host)
+ b[ii] = a[ii] + 2;
+ else
+ b[ii] = a[ii];
+ }
+ }
+}
@@ -1,5 +1,5 @@
/* { dg-additional-options "-O2" } */
-/* { dg-additional-options "-fdump-tree-dom_oacc_kernels" } */
+/* { dg-additional-options "-fdump-tree-dom_oacc_kernels3" } */
#include <stdlib.h>
@@ -28,7 +28,9 @@ foo (unsigned int *c)
_15 = .omp_data_i_10->c;
c.1_16 = *_15;
- Check that there's only one load from anonymous ssa-name (which we assume to
- be the one to read c), and that there's no such load for ii. */
+ Check that there are two loads from anonymous ssa-names, which we assume to
+ be:
+ - the one to read c
+ - the one to read ii after the kernels region. */
-/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 1 "dom_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 2 "dom_oacc_kernels3" } } */
@@ -788,6 +788,7 @@ public:
{}
/* opt_pass methods: */
+ opt_pass * clone () { return new pass_dominator_oacc_kernels (m_ctxt); }
virtual bool gate (function *) { return true; }
private:
--
1.9.1