[PATCH 3/7] Add pass_ch_oacc_kernels to pass_oacc_kernels
2014-11-25 Tom de Vries <tom@codesourcery.com>
* omp-low.c (loop_in_oacc_kernels_region_p): New function.
* omp-low.h (loop_in_oacc_kernels_region_p): Declare.
* passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels.
* tree-pass.h (make_pass_ch_oacc_kernels): Declare
* tree-ssa-loop-ch.c: Include omp-low.h.
(pass_ch_execute): Declare.
(pass_ch::execute): Factor out ...
(pass_ch_execute): ... this new function. If handling oacc kernels,
skip loops that are not in oacc kernels region.
(pass_ch_oacc_kernels::execute):
(pass_data_ch_oacc_kernels): New pass_data.
(class pass_ch_oacc_kernels): New pass.
(pass_ch_oacc_kernels::execute, make_pass_ch_oacc_kernels): New
function.
---
gcc/omp-low.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++
gcc/omp-low.h | 2 ++
gcc/passes.def | 1 +
gcc/tree-pass.h | 1 +
gcc/tree-ssa-loop-ch.c | 59 +++++++++++++++++++++++++++++++++--
5 files changed, 144 insertions(+), 2 deletions(-)
@@ -13912,4 +13912,87 @@ gimple_stmt_omp_data_i_init_p (gimple stmt)
SSA_OP_DEF);
}
+/* Return true if LOOP is inside a kernels region. */
+
+bool
+loop_in_oacc_kernels_region_p (struct loop *loop, basic_block *region_entry,
+ basic_block *region_exit)
+{
+ bitmap excludes_bitmap = BITMAP_GGC_ALLOC ();
+ bitmap region_bitmap = BITMAP_GGC_ALLOC ();
+ bitmap_clear (region_bitmap);
+
+ if (region_entry != NULL)
+ *region_entry = NULL;
+ if (region_exit != NULL)
+ *region_exit = NULL;
+
+ basic_block bb;
+ gimple last;
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (bitmap_bit_p (region_bitmap, bb->index))
+ continue;
+
+ last = last_stmt (bb);
+ if (!last)
+ continue;
+
+ if (gimple_code (last) != GIMPLE_OACC_KERNELS)
+ continue;
+
+ bitmap_clear (excludes_bitmap);
+ bitmap_set_bit (excludes_bitmap, bb->index);
+
+ vec<basic_block> dominated
+ = get_all_dominated_blocks (CDI_DOMINATORS, bb);
+
+ unsigned di;
+ basic_block dom;
+
+ basic_block end_region = NULL;
+ FOR_EACH_VEC_ELT (dominated, di, dom)
+ {
+ if (dom == bb)
+ continue;
+
+ last = last_stmt (dom);
+ if (!last)
+ continue;
+
+ if (gimple_code (last) != GIMPLE_OMP_RETURN)
+ continue;
+
+ if (end_region == NULL
+ || dominated_by_p (CDI_DOMINATORS, end_region, dom))
+ end_region = dom;
+ }
+
+ vec<basic_block> excludes
+ = get_all_dominated_blocks (CDI_DOMINATORS, end_region);
+
+ unsigned di2;
+ basic_block exclude;
+
+ FOR_EACH_VEC_ELT (excludes, di2, exclude)
+ if (exclude != end_region)
+ bitmap_set_bit (excludes_bitmap, exclude->index);
+
+ FOR_EACH_VEC_ELT (dominated, di, dom)
+ if (!bitmap_bit_p (excludes_bitmap, dom->index))
+ bitmap_set_bit (region_bitmap, dom->index);
+
+ if (bitmap_bit_p (region_bitmap, loop->header->index))
+ {
+ if (region_entry != NULL)
+ *region_entry = bb;
+ if (region_exit != NULL)
+ *region_exit = end_region;
+ return true;
+ }
+ }
+
+ return false;
+}
+
#include "gt-omp-low.h"
@@ -29,6 +29,8 @@ extern tree omp_reduction_init (tree, tree);
extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
extern void omp_finish_file (void);
extern bool gimple_stmt_omp_data_i_init_p (gimple);
+extern bool loop_in_oacc_kernels_region_p (struct loop *, basic_block *,
+ basic_block *);
extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars;
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
function. */
NEXT_PASS (pass_oacc_kernels);
PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+ NEXT_PASS (pass_ch_oacc_kernels);
NEXT_PASS (pass_expand_omp_ssa);
POP_INSERT_PASSES ()
NEXT_PASS (pass_merge_phi);
@@ -378,6 +378,7 @@ extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_ch_oacc_kernels (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_phi_only_cprop (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt);
@@ -48,12 +48,15 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "flags.h"
#include "tree-ssa-threadedge.h"
+#include "omp-low.h"
/* Duplicates headers of loops if they are small enough, so that the statements
in the loop body are always executed when the loop is entered. This
increases effectiveness of code motion optimizations, and reduces the need
for loop preconditioning. */
+static unsigned int pass_ch_execute (function *, bool);
+
/* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
instructions should be duplicated, limit is decreased by the actual
amount. */
@@ -172,6 +175,14 @@ public:
unsigned int
pass_ch::execute (function *fun)
{
+ return pass_ch_execute (fun, false);
+}
+
+} // anon namespace
+
+static unsigned int
+pass_ch_execute (function *fun, bool oacc_kernels_p)
+{
struct loop *loop;
basic_block header;
edge exit, entry;
@@ -205,6 +216,10 @@ pass_ch::execute (function *fun)
if (do_while_loop_p (loop))
continue;
+ if (oacc_kernels_p
+ && !loop_in_oacc_kernels_region_p (loop, NULL, NULL))
+ continue;
+
/* Iterate the header copying up to limit; this takes care of the cases
like while (a && b) {...}, where we want to have both of the conditions
copied. TODO -- handle while (a || b) - like cases, by not requiring
@@ -295,10 +310,50 @@ pass_ch::execute (function *fun)
return 0;
}
-} // anon namespace
-
gimple_opt_pass *
make_pass_ch (gcc::context *ctxt)
{
return new pass_ch (ctxt);
}
+
+namespace {
+
+const pass_data pass_data_ch_oacc_kernels =
+{
+ GIMPLE_PASS, /* type */
+ "ch_oacc_kernels", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_TREE_CH, /* tv_id */
+ ( PROP_cfg | PROP_ssa ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_cleanup_cfg, /* todo_flags_finish */
+};
+
+ class pass_ch_oacc_kernels : public gimple_opt_pass
+{
+public:
+ pass_ch_oacc_kernels (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_ch_oacc_kernels, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) { return true; }
+ virtual unsigned int execute (function *);
+
+}; // class pass_ch_oacc_kernels
+
+unsigned int
+pass_ch_oacc_kernels::execute (function *fun)
+{
+ return pass_ch_execute (fun, true);
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_ch_oacc_kernels (gcc::context *ctxt)
+{
+ return new pass_ch_oacc_kernels (ctxt);
+}
--
1.9.1