2015-10-26 Nathan Sidwell <nathan@codesourcery.com>
* doc/tm.texi: Rebuilt.
* internal-fn.c (expand_GOACC_REDUCTION_SETUP,
expand_GOACC_REDUCTION_INIT, expand_GOACC_REDUCTION_FINI,
expand_GOACC_REDUCTION_TEADOWN): Replace with ...
(expand_GOACC_REDUCTION): ... this.
* internal-fn.def (GOACC_REDUCTION_SETUP,
GOACC_REDUCTION_INIT, GOACC_REDUCTION_FINI,
GOACC_REDUCTION_TEADOWN): Replace with ...
(GOACC_REDUCTION): ... this.
* internal-fn.h (enum ifn_goacc_reduction_kind): New.
* omp-low.c (lower_rec_input_clauses): Adjust OpenACC comment.
(lower_oacc_reductions): Remove RID & LID, calculate
offset. Adjust for IFN_GOACC_REDUCTION change.
(default_goacc_reduction): Don't return bool. Adjust for argument
shift.
(execute_oacc_device_lower): Adjust for IFN_GOACC_REDUCTION
change.
* target.def (goacc_reduction): Adjust hook.
* targhooks.h (default_goacc_reduction): Return void.
* config/nvptx/nvptx.c (worker_red_hwm): Rename to ...
(worker_red_size): ... here.
(var_red_t, struct loop_red, loop_reds): Delete.
(nvptx_reorg_reductions): Delete.
(nvptx_reorg): Don't reorg reductoins.
(nvptx_file_end): Adjust worker reduction size name.
(nvptx_expand_worker_addr): Reimplement.
(nvptx_init_builtins): Adjust WORKER_ADDR prototype.
(nvptx_get_worker_red_addr): Reimplement.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teardown): Don't
return bool. Adjust for argument shift & worker offset
processing.
(nvptx_goacc_reduction): Adjust.
===================================================================
@@ -119,40 +119,13 @@ static unsigned worker_bcast_align;
static GTY(()) rtx worker_bcast_sym;
/* Size of buffer needed for worker reductions. This has to be
- disjoing from the worker broadcast array, as both may be live
+ distinct from the worker broadcast array, as both may be live
concurrently. */
-static unsigned worker_red_hwm;
+static unsigned worker_red_size;
static unsigned worker_red_align;
#define worker_red_name "__worker_red"
static GTY(()) rtx worker_red_sym;
-/* To process worker-level reductions we need a buffer in CTA local
- (.shared) memory. As the number of loops per function and number
- of reductions per loop are likely to be small numbers, we use
- simple unsorted vectors to hold the mappings. */
-
-/* Mapping from a reduction to an offset within the worker reduction
- array. */
-typedef std::pair<unsigned, unsigned> var_red_t;
-
-/* Mapping from loops within a function to lists of reductions on that
- loop. */
-struct loop_red
-{
- unsigned id; /* Loop ID. */
- unsigned hwm; /* Allocated worker buffer for this loop. */
- auto_vec<var_red_t> vars; /* Reduction variables of the loop. */
-
- loop_red (unsigned id_)
- :id (id_), hwm (0)
- {
- }
-};
-
-/* It would be nice to put this intp machine_function, but auto_vec
- pulls in too much other stuff. */
-static auto_vec<loop_red> loop_reds;
-
/* Allocate a new, cleared machine_function structure. */
static struct machine_function *
@@ -3785,21 +3758,7 @@ nvptx_neuter_pars (parallel *par, unsign
nvptx_neuter_pars (par->next, modes, outer);
}
-static void
-nvptx_reorg_reductions (void)
-{
- unsigned ix;
-
- for (ix = loop_reds.length (); ix--;)
- {
- if (loop_reds[ix].hwm > worker_red_hwm)
- worker_red_hwm = loop_reds[ix].hwm;
- loop_reds.pop ();
- }
-}
-
/* PTX-specific reorganization
- - Scan and release reduction buffers
- Split blocks at fork and join instructions
- Compute live registers
- Mark now-unused registers, so function begin doesn't declare
@@ -3812,8 +3771,6 @@ nvptx_reorg_reductions (void)
static void
nvptx_reorg (void)
{
- nvptx_reorg_reductions ();
-
/* We are freeing block_for_insn in the toplev to keep compatibility
with old MDEP_REORGS that are not CFG based. Recompute it now. */
compute_bb_for_insn ();
@@ -4023,17 +3980,17 @@ nvptx_file_end (void)
worker_bcast_name, worker_bcast_hwm);
}
- if (worker_red_hwm)
+ if (worker_red_size)
{
/* Define the reduction buffer. */
- worker_red_hwm = (worker_red_hwm + worker_red_align - 1)
+ worker_red_size = (worker_red_size + worker_red_align - 1)
& ~(worker_red_align - 1);
fprintf (asm_out_file, "// BEGIN VAR DEF: %s\n", worker_red_name);
fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
worker_red_align,
- worker_red_name, worker_red_hwm);
+ worker_red_name, worker_red_size);
}
}
@@ -4074,44 +4031,21 @@ nvptx_expand_worker_addr (tree exp, rtx
if (ignore)
return target;
- unsigned lid = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 2));
- unsigned rid = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 3));
- unsigned ix;
-
- for (ix = 0; ix != loop_reds.length (); ix++)
- if (loop_reds[ix].id == lid)
- goto found_lid;
- /* Allocate a new loop. */
- loop_reds.safe_push (loop_red (lid));
- found_lid:
- loop_red &loop = loop_reds[ix];
- for (ix = 0; ix != loop.vars.length (); ix++)
- if (loop.vars[ix].first == rid)
- goto found_rid;
+ unsigned align = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 2));
+ if (align > worker_red_align)
+ worker_red_align = align;
+
+ unsigned offset = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 0));
+ unsigned size = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 1));
+ if (size + offset > worker_red_size)
+ worker_red_size = size + offset;
- /* Allocate a new var. */
- {
- unsigned size = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 0));
- unsigned align = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 1));
- unsigned off = loop.hwm;
-
- if (align > worker_red_align)
- worker_red_align = align;
- off = (off + align - 1) & ~(align -1);
- loop.hwm = off + size;
- loop.vars.safe_push (var_red_t (rid, off));
- }
- found_rid:
-
- /* Return offset into worker reduction array. */
- unsigned offset = loop.vars[ix].second;
-
emit_insn (gen_rtx_SET (target, worker_red_sym));
if (offset)
emit_insn (gen_rtx_SET (target,
gen_rtx_PLUS (Pmode, target, GEN_INT (offset))));
-
+
emit_insn (gen_rtx_SET (target,
gen_rtx_UNSPEC (Pmode, gen_rtvec (1, target),
UNSPEC_FROM_SHARED)));
@@ -4167,6 +4101,7 @@ enum nvptx_builtins
static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
/* Return the NVPTX builtin for CODE. */
+
static tree
nvptx_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
{
@@ -4177,6 +4112,7 @@ nvptx_builtin_decl (unsigned code, bool
}
/* Set up all builtin functions for this target. */
+
static void
nvptx_init_builtins (void)
{
@@ -4185,6 +4121,7 @@ nvptx_init_builtins (void)
add_builtin_function ("__builtin_nvptx_" NAME, \
build_function_type_list T, \
NVPTX_BUILTIN_ ## ID, BUILT_IN_MD, NULL, NULL))
+#define ST sizetype
#define UINT unsigned_type_node
#define LLUINT long_long_unsigned_type_node
#define PTRVOID ptr_type_node
@@ -4192,11 +4129,12 @@ nvptx_init_builtins (void)
DEF (SHUFFLE, "shuffle", (UINT, UINT, UINT, UINT, NULL_TREE));
DEF (SHUFFLELL, "shufflell", (LLUINT, LLUINT, UINT, UINT, NULL_TREE));
DEF (WORKER_ADDR, "worker_addr",
- (PTRVOID, UINT, UINT, UINT, UINT, NULL_TREE));
+ (PTRVOID, ST, UINT, UINT, NULL_TREE));
DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE));
DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, NULL_TREE));
#undef DEF
+#undef ST
#undef UINT
#undef LLUINT
#undef PTRVOID
@@ -4209,10 +4147,8 @@ nvptx_init_builtins (void)
IGNORE is nonzero if the value is to be ignored. */
static rtx
-nvptx_expand_builtin (tree exp, rtx target,
- rtx subtarget ATTRIBUTE_UNUSED,
- machine_mode mode,
- int ignore)
+nvptx_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ machine_mode mode, int ignore)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
switch (DECL_FUNCTION_CODE (fndecl))
@@ -4232,7 +4168,7 @@ nvptx_expand_builtin (tree exp, rtx targ
}
}
-/* Define vector size for known hardware. */
+/* Define dimension sizes for known hardware. */
#define PTX_VECTOR_LENGTH 32
#define PTX_WORKER_LENGTH 32
@@ -4311,16 +4247,16 @@ nvptx_goacc_fork_join (gcall *call, cons
}
static tree
-nvptx_get_worker_red_addr (tree type, tree rid, tree lid)
+nvptx_get_worker_red_addr (tree type, tree offset)
{
machine_mode mode = TYPE_MODE (type);
tree fndecl = nvptx_builtin_decl (NVPTX_BUILTIN_WORKER_ADDR, true);
tree size = build_int_cst (unsigned_type_node, GET_MODE_SIZE (mode));
tree align = build_int_cst (unsigned_type_node,
GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
- tree call = build_call_expr (fndecl, 4, size, align, lid, rid);
+ tree call = build_call_expr (fndecl, 3, offset, size, align);
- return fold_build1 (NOP_EXPR, build_pointer_type (type), call);
+ return fold_convert (build_pointer_type (type), call);
}
/* Emit a SHFL.DOWN using index SHFL of VAR into DEST_VAR. This function
@@ -4454,24 +4390,21 @@ nvptx_lockless_update (location_t loc, g
/* NVPTX implementation of GOACC_REDUCTION_SETUP. */
-static bool
+static void
nvptx_goacc_reduction_setup (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
- tree var = gimple_call_arg (call, 1);
- int level = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
- tree lid = gimple_call_arg (call, 4);
- tree rid = gimple_call_arg (call, 5);
+ tree var = gimple_call_arg (call, 2);
+ int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
gimple_seq seq = NULL;
- tree r = NULL_TREE;
push_gimplify_context (true);
if (level != GOMP_DIM_GANG)
{
/* Copy the receiver object. */
- tree ref_to_res = gimple_call_arg (call, 0);
+ tree ref_to_res = gimple_call_arg (call, 1);
if (!integer_zerop (ref_to_res))
var = build_simple_mem_ref (ref_to_res);
@@ -4480,40 +4413,36 @@ nvptx_goacc_reduction_setup (gcall *call
if (level == GOMP_DIM_WORKER)
{
/* Store incoming value to worker reduction buffer. */
- tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), rid, lid);
+ tree offset = gimple_call_arg (call, 5);
+ tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
tree ref = build_simple_mem_ref (ptr);
TREE_THIS_VOLATILE (ref) = 1;
gimplify_assign (ref, var, &seq);
- r = var;
}
- else
- r = var;
if (lhs)
- gimplify_assign (lhs, r, &seq);
+ gimplify_assign (lhs, var, &seq);
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
-
- return false;
}
/* NVPTX implementation of GOACC_REDUCTION_INIT. */
-static bool
+static void
nvptx_goacc_reduction_init (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
- tree var = gimple_call_arg (call, 1);
- int level = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
- tree init = omp_reduction_init_op
- (gimple_location (call),
- (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 3)),
- TREE_TYPE (var));
+ tree var = gimple_call_arg (call, 2);
+ int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
+ enum tree_code rcode
+ = (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 4));
+ tree init = omp_reduction_init_op (gimple_location (call), rcode,
+ TREE_TYPE (var));
gimple_seq seq = NULL;
push_gimplify_context (true);
@@ -4522,7 +4451,7 @@ nvptx_goacc_reduction_init (gcall *call)
{
/* Initialize vector-non-zeroes to INIT_VAL (OP). */
tree tid = make_ssa_name (integer_type_node);
- tree dim_vector = gimple_call_arg (call, 2);
+ tree dim_vector = gimple_call_arg (call, 3);
gimple *tid_call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1,
dim_vector);
gimple *cond_stmt = gimple_build_cond (NE_EXPR, tid, integer_zero_node,
@@ -4567,42 +4496,33 @@ nvptx_goacc_reduction_init (gcall *call)
if (level == GOMP_DIM_GANG)
{
/* If there's no receiver object, propagate the incoming VAR. */
- tree ref_to_res = gimple_call_arg (call, 0);
+ tree ref_to_res = gimple_call_arg (call, 1);
if (integer_zerop (ref_to_res))
init = var;
}
-
+
gimplify_assign (lhs, init, &seq);
}
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
-
- return false;
}
/* NVPTX implementation of GOACC_REDUCTION_FINI. */
-static bool
+static void
nvptx_goacc_reduction_fini (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
- tree ref_to_res = gimple_call_arg (call, 0);
- tree var = gimple_call_arg (call, 1);
- int level = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
+ tree ref_to_res = gimple_call_arg (call, 1);
+ tree var = gimple_call_arg (call, 2);
+ int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
enum tree_code op
- = (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 3));
- tree lid = gimple_call_arg (call, 4);
- tree rid = gimple_call_arg (call, 5);
+ = (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 4));
gimple_seq seq = NULL;
tree r = NULL_TREE;;
- if (op == TRUTH_ANDIF_EXPR)
- op = BIT_AND_EXPR;
- else if (op == TRUTH_ORIF_EXPR)
- op = BIT_IOR_EXPR;
-
push_gimplify_context (true);
if (level == GOMP_DIM_VECTOR)
@@ -4629,7 +4549,8 @@ nvptx_goacc_reduction_fini (gcall *call)
if (level == GOMP_DIM_WORKER)
{
/* Get reduction buffer address. */
- tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), rid, lid);
+ tree offset = gimple_call_arg (call, 5);
+ tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
@@ -4655,75 +4576,73 @@ nvptx_goacc_reduction_fini (gcall *call)
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
-
- return false;
}
/* NVPTX implementation of GOACC_REDUCTION_TEARDOWN. */
-static bool
+static void
nvptx_goacc_reduction_teardown (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
- tree var = gimple_call_arg (call, 1);
- int level = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
- tree lid = gimple_call_arg (call, 4);
- tree rid = gimple_call_arg (call, 5);
+ tree var = gimple_call_arg (call, 2);
+ int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
gimple_seq seq = NULL;
- tree r = NULL_TREE;
push_gimplify_context (true);
if (level == GOMP_DIM_WORKER)
{
/* Read the worker reduction buffer. */
- tree call = nvptx_get_worker_red_addr(TREE_TYPE (var), rid, lid);
+ tree offset = gimple_call_arg (call, 5);
+ tree call = nvptx_get_worker_red_addr(TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
- r = build_simple_mem_ref (ptr);
- TREE_THIS_VOLATILE (r) = 1;
+ var = build_simple_mem_ref (ptr);
+ TREE_THIS_VOLATILE (var) = 1;
}
- else
- r = var;
if (level != GOMP_DIM_GANG)
{
/* Write to the receiver object. */
- tree ref_to_res = gimple_call_arg (call, 0);
+ tree ref_to_res = gimple_call_arg (call, 1);
if (!integer_zerop (ref_to_res))
- gimplify_assign (build_simple_mem_ref (ref_to_res), r, &seq);
+ gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq);
}
if (lhs)
- gimplify_assign (lhs, r, &seq);
+ gimplify_assign (lhs, var, &seq);
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
-
- return false;
}
-/* Default goacc.reduction early expander. */
+/* NVPTX reduction expander. */
-bool
+void
nvptx_goacc_reduction (gcall *call)
{
- switch (gimple_call_internal_fn (call))
+ unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
+
+ switch (code)
{
case IFN_GOACC_REDUCTION_SETUP:
- return nvptx_goacc_reduction_setup (call);
+ nvptx_goacc_reduction_setup (call);
+ break;
case IFN_GOACC_REDUCTION_INIT:
- return nvptx_goacc_reduction_init (call);
+ nvptx_goacc_reduction_init (call);
+ break;
case IFN_GOACC_REDUCTION_FINI:
- return nvptx_goacc_reduction_fini (call);
+ nvptx_goacc_reduction_fini (call);
+ break;
case IFN_GOACC_REDUCTION_TEARDOWN:
- return nvptx_goacc_reduction_teardown (call);
+ nvptx_goacc_reduction_teardown (call);
+ break;
default:
gcc_unreachable ();
===================================================================
@@ -5778,17 +5778,13 @@ pass. It should return true, if the fun
default hook returns true, if there are no RTL expanders for them.
@end deftypefn
-@deftypefn {Target Hook} bool TARGET_GOACC_REDUCTION (gcall *@var{call})
+@deftypefn {Target Hook} void TARGET_GOACC_REDUCTION (gcall *@var{call})
This hook is used by the oacc_transform pass to expand calls to the
-internal functions @var{GOACC_REDUCTION_SETUP},
-@var{GOACC_REDUCTION_INIT},
-@var{GOACC_REDUCTION_FINI} and
-@var{GOACC_REDUCTION_TEARDOWN} into a sequence of gimple instructions.
-@var{call} is gimple statement containing the call to the function. This
-hook removes statement @var{call} after the expanded sequence has been
-inserted. This hook is also responsible for allocating any storage for
-reductions when necessary. It returns @var{true} if the expanded
-sequence introduces any calls to OpenACC-specific internal functions.
+@var{GOACC_REDUCTION} internal function, into a sequence of gimple
+instructions. @var{call} is gimple statement containing the call to
+the function. This hook removes statement @var{call} after the
+expanded sequence has been inserted. This hook is also responsible
+for allocating any storage for reductions when necessary.
@end deftypefn
@node Anchored Addresses
===================================================================
@@ -2053,28 +2053,10 @@ expand_GOACC_DIM_POS (gcall *stmt)
gcc_unreachable ();
}
-/* All the GOACC_REDUCTION variants get expanded in oacc_device_lower. */
-
-static void
-expand_GOACC_REDUCTION_SETUP (gcall *stmt ATTRIBUTE_UNUSED)
-{
- gcc_unreachable ();
-}
-
-static void
-expand_GOACC_REDUCTION_INIT (gcall *stmt ATTRIBUTE_UNUSED)
-{
- gcc_unreachable ();
-}
-
-static void
-expand_GOACC_REDUCTION_FINI (gcall *stmt ATTRIBUTE_UNUSED)
-{
- gcc_unreachable ();
-}
+/* This is expanded by oacc_device_lower pass. */
static void
-expand_GOACC_REDUCTION_TEARDOWN (gcall *stmt ATTRIBUTE_UNUSED)
+expand_GOACC_LOOP (gcall *stmt ATTRIBUTE_UNUSED)
{
gcc_unreachable ();
}
@@ -2082,7 +2064,7 @@ expand_GOACC_REDUCTION_TEARDOWN (gcall *
/* This is expanded by oacc_device_lower pass. */
static void
-expand_GOACC_LOOP (gcall *stmt ATTRIBUTE_UNUSED)
+expand_GOACC_REDUCTION (gcall *stmt ATTRIBUTE_UNUSED)
{
gcc_unreachable ();
}
===================================================================
@@ -82,23 +82,8 @@ DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW, NU
DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".")
-/* REDUCTION_SETUP, REDUCTION_INIT, REDUCTION_FINI and REDUCTION_TEARDOWN
- together define a generic interface to support gang, worker and vector
- reductions. All of the functions take the following form
-
- V = goacc_reduction_foo (REF_TO_RES, LOCAL_VAR, LEVEL, OP, LID, RID)
-
- where REF_TO_RES is a reference to the original reduction variable for
- that particular reduction, LOCAL_VAR is the intermediate reduction
- variable. LEVEL corresponds to the GOMP_DIM of the reduction, OP is a
- tree code of the reduction operation. LID is a unique identifier of the
- loop within a TU and RID is a unique id for a reduction within a loop.
- V is the resulting intermediate reduction variable returned by the
- function. In general, V should equal LOCAL_VAR. */
-DEF_INTERNAL_FN (GOACC_REDUCTION_SETUP, ECF_NOTHROW, NULL)
-DEF_INTERNAL_FN (GOACC_REDUCTION_INIT, ECF_NOTHROW, NULL)
-DEF_INTERNAL_FN (GOACC_REDUCTION_FINI, ECF_NOTHROW, NULL)
-DEF_INTERNAL_FN (GOACC_REDUCTION_TEARDOWN, ECF_NOTHROW, NULL)
-
/* OpenACC looping abstraction. See internal-fn.h for usage. */
DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL)
+
+/* OpenACC reduction abstraction. See internal-fn.h for usage. */
+DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NOTHROW | ECF_LEAF, NULL)
===================================================================
@@ -66,6 +66,28 @@ enum ifn_goacc_loop_kind {
IFN_GOACC_LOOP_BOUND /* Limit of iteration value. */
};
+/* The GOACC_REDUCTION function defines a generic interface to support
+ gang, worker and vector reductions. All calls are of the following
+ form:
+
+ V = REDUCTION (CODE, REF_TO_RES, LOCAL_VAR, LEVEL, OP, OFFSET)
+
+ REF_TO_RES - is a reference to the original reduction varl, may be NULL
+ LOCAL_VAR is the intermediate reduction variable
+ LEVEL corresponds to the GOMP_DIM of the reduction
+ OP is the tree code of the reduction operation
+ OFFSET may be used as an offset into a reduction array for the
+ reductions occuring at this level.
+ In general the return value is LOCAL_VAR, which creates a data
+ dependency between calls operating on the same reduction. */
+
+enum ifn_goacc_reduction_kind {
+ IFN_GOACC_REDUCTION_SETUP,
+ IFN_GOACC_REDUCTION_INIT,
+ IFN_GOACC_REDUCTION_FINI,
+ IFN_GOACC_REDUCTION_TEARDOWN
+};
+
/* Initialize internal function tables. */
extern void init_internal_fns ();
===================================================================
@@ -4940,8 +4940,8 @@ lower_rec_input_clauses (tree clauses, g
break;
case OMP_CLAUSE_REDUCTION:
- /* OpenACC reductions are initialized using the internal
- functions GOACC_REDUCTION_SETUP and GOACC_REDUCTION_INIT. */
+ /* OpenACC reductions are initialized using the
+ GOACC_REDUCTION internal function. */
if (is_gimple_omp_oacc (ctx->stmt))
break;
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
@@ -5401,14 +5401,13 @@ lower_oacc_reductions (location_t loc, t
gcall *fork, gcall *join, gimple_seq *fork_seq,
gimple_seq *join_seq, omp_context *ctx)
{
- static unsigned oacc_lid = 0;
-
gimple_seq before_fork = NULL;
gimple_seq after_fork = NULL;
gimple_seq before_join = NULL;
gimple_seq after_join = NULL;
- unsigned count = 0;
- tree lid = build_int_cst (unsigned_type_node, oacc_lid++);
+ tree init_code = NULL_TREE, fini_code = NULL_TREE,
+ setup_code = NULL_TREE, teardown_code = NULL_TREE;
+ unsigned offset = 0;
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
@@ -5473,30 +5472,59 @@ lower_oacc_reductions (location_t loc, t
else if (is_reference (orig))
ref_to_res = build_simple_mem_ref (ref_to_res);
- unsigned rcode = OMP_CLAUSE_REDUCTION_CODE (c);
+ enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
if (rcode == MINUS_EXPR)
rcode = PLUS_EXPR;
+ else if (rcode == TRUTH_ANDIF_EXPR)
+ rcode = BIT_AND_EXPR;
+ else if (rcode == TRUTH_ORIF_EXPR)
+ rcode = BIT_IOR_EXPR;
tree op = build_int_cst (unsigned_type_node, rcode);
- tree rid = build_int_cst (unsigned_type_node, count);
- tree setup = build_call_expr_internal_loc
- (loc, IFN_GOACC_REDUCTION_SETUP, TREE_TYPE (var), 6,
- unshare_expr (ref_to_res), var, level, op, lid, rid);
- tree init = build_call_expr_internal_loc
- (loc, IFN_GOACC_REDUCTION_INIT, TREE_TYPE (var), 6,
- unshare_expr (ref_to_res), var, level, op, lid, rid);
- tree fini = build_call_expr_internal_loc
- (loc, IFN_GOACC_REDUCTION_FINI, TREE_TYPE (var), 6,
- unshare_expr (ref_to_res), var, level, op, lid, rid);
- tree teardown = build_call_expr_internal_loc
- (loc, IFN_GOACC_REDUCTION_TEARDOWN, TREE_TYPE (var), 6,
- ref_to_res, var, level, op, lid, rid);
-
- gimplify_assign (var, setup, &before_fork);
- gimplify_assign (var, init, &after_fork);
- gimplify_assign (var, fini, &before_join);
- gimplify_assign (var, teardown, &after_join);
- count++;
+ /* Determine position in reduction buffer, which may be used
+ by target. */
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
+ unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
+ offset = (offset + align - 1) & ~(align - 1);
+ tree off = build_int_cst (sizetype, offset);
+ offset += GET_MODE_SIZE (mode);
+
+ if (!init_code)
+ {
+ init_code = build_int_cst (integer_type_node,
+ IFN_GOACC_REDUCTION_INIT);
+ fini_code = build_int_cst (integer_type_node,
+ IFN_GOACC_REDUCTION_FINI);
+ setup_code = build_int_cst (integer_type_node,
+ IFN_GOACC_REDUCTION_SETUP);
+ teardown_code = build_int_cst (integer_type_node,
+ IFN_GOACC_REDUCTION_TEARDOWN);
+ }
+
+ tree setup_call
+ = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
+ TREE_TYPE (var), 6, setup_code,
+ unshare_expr (ref_to_res),
+ var, level, op, off);
+ tree init_call
+ = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
+ TREE_TYPE (var), 6, init_code,
+ unshare_expr (ref_to_res),
+ var, level, op, off);
+ tree fini_call
+ = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
+ TREE_TYPE (var), 6, fini_code,
+ unshare_expr (ref_to_res),
+ var, level, op, off);
+ tree teardown_call
+ = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
+ TREE_TYPE (var), 6, teardown_code,
+ ref_to_res, var, level, op, off);
+
+ gimplify_assign (var, setup_call, &before_fork);
+ gimplify_assign (var, init_call, &after_fork);
+ gimplify_assign (var, fini_call, &before_join);
+ gimplify_assign (var, teardown_call, &after_join);
}
/* Now stitch things together. */
@@ -19464,11 +19492,8 @@ oacc_loop_xform_head_tail (gcall *from,
else if (c == code && stmt != from)
break;
}
- else if (gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_SETUP
- || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_INIT
- || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_FINI
- || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_TEARDOWN)
- *gimple_call_arg_ptr (stmt, 2) = replacement;
+ else if (gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION)
+ *gimple_call_arg_ptr (stmt, 3) = replacement;
gsi_next (&gsi);
while (gsi_end_p (gsi))
@@ -19788,13 +19813,13 @@ default_goacc_fork_join (gcall *ARG_UNUS
If LHS is not NULL
emit 'LHS = VAR' */
-bool
+void
default_goacc_reduction (gcall *call)
{
+ unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
- tree var = gimple_call_arg (call, 1);
- unsigned code = gimple_call_internal_fn (call);
+ tree var = gimple_call_arg (call, 2);
gimple_seq seq = NULL;
if (code == IFN_GOACC_REDUCTION_SETUP
@@ -19802,7 +19827,7 @@ default_goacc_reduction (gcall *call)
{
/* Setup and Teardown need to copy from/to the receiver object,
if there is one. */
- tree ref_to_res = gimple_call_arg (call, 0);
+ tree ref_to_res = gimple_call_arg (call, 1);
if (!integer_zerop (ref_to_res))
{
@@ -19824,8 +19849,6 @@ default_goacc_reduction (gcall *call)
gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
gsi_replace_with_seq (&gsi, seq, true);
-
- return false;
}
/* Main entry point for oacc transformations which run on the device
@@ -19902,16 +19925,13 @@ execute_oacc_device_lower ()
rescan = true;
break;
- case IFN_GOACC_REDUCTION_SETUP:
- case IFN_GOACC_REDUCTION_INIT:
- case IFN_GOACC_REDUCTION_FINI:
- case IFN_GOACC_REDUCTION_TEARDOWN:
+ case IFN_GOACC_REDUCTION:
/* Mark the function for SSA renaming. */
mark_virtual_operands_for_renaming (cfun);
/* If the level is -1, this ended up being an unused
axis. Handle as a default. */
- if (integer_minus_onep (gimple_call_arg (call, 2)))
+ if (integer_minus_onep (gimple_call_arg (call, 3)))
default_goacc_reduction (call);
else
targetm.goacc.reduction (call);
===================================================================
@@ -1677,16 +1677,12 @@ default_goacc_fork_join)
DEFHOOK
(reduction,
"This hook is used by the oacc_transform pass to expand calls to the\n\
-internal functions @var{GOACC_REDUCTION_SETUP},\n\
-@var{GOACC_REDUCTION_INIT},\n\
-@var{GOACC_REDUCTION_FINI} and\n\
-@var{GOACC_REDUCTION_TEARDOWN} into a sequence of gimple instructions.\n\
-@var{call} is gimple statement containing the call to the function. This\n\
-hook removes statement @var{call} after the expanded sequence has been\n\
-inserted. This hook is also responsible for allocating any storage for\n\
-reductions when necessary. It returns @var{true} if the expanded\n\
-sequence introduces any calls to OpenACC-specific internal functions.",
-bool, (gcall *call),
+@var{GOACC_REDUCTION} internal function, into a sequence of gimple\n\
+instructions. @var{call} is gimple statement containing the call to\n\
+the function. This hook removes statement @var{call} after the\n\
+expanded sequence has been inserted. This hook is also responsible\n\
+for allocating any storage for reductions when necessary.",
+void, (gcall *call),
default_goacc_reduction)
HOOK_VECTOR_END (goacc)
===================================================================
@@ -109,7 +109,7 @@ extern void default_finish_cost (void *,
extern void default_destroy_cost_data (void *);
/* OpenACC hooks. */
-extern bool default_goacc_reduction (gcall *);
+extern void default_goacc_reduction (gcall *);
extern bool default_goacc_validate_dims (tree, int [], int);
extern unsigned default_goacc_dim_limit (unsigned);
extern bool default_goacc_fork_join (gcall *, const int [], bool);