@@ -39,7 +39,7 @@ extern rtx gcn_gen_undef (machine_mode);
extern bool gcn_global_address_p (rtx);
extern tree gcn_goacc_adjust_propagation_record (tree record_type, bool sender,
const char *name);
-extern void gcn_goacc_adjust_gangprivate_decl (tree var);
+extern void gcn_goacc_adjust_private_decl (tree var, int level);
extern void gcn_goacc_reduction (gcall *call);
extern bool gcn_hard_regno_rename_ok (unsigned int from_reg,
unsigned int to_reg);
@@ -697,8 +697,11 @@ gcn_goacc_adjust_propagation_record (tree record_type, bool sender,
}
void
-gcn_goacc_adjust_gangprivate_decl (tree var)
+gcn_goacc_adjust_private_decl (tree var, int level)
{
+ if (level != GOMP_DIM_GANG)
+ return;
+
tree type = TREE_TYPE (var);
tree lds_type = build_qualified_type (type,
TYPE_QUALS_NO_ADDR_SPACE (type)
@@ -6067,8 +6067,8 @@ print_operand (FILE *file, rtx x, int code)
#undef TARGET_GOACC_ADJUST_PROPAGATION_RECORD
#define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
gcn_goacc_adjust_propagation_record
-#undef TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
-#define TARGET_GOACC_ADJUST_GANGPRIVATE_DECL gcn_goacc_adjust_gangprivate_decl
+#undef TARGET_GOACC_ADJUST_PRIVATE_DECL
+#define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl
#undef TARGET_GOACC_FORK_JOIN
#define TARGET_GOACC_FORK_JOIN gcn_fork_join
#undef TARGET_GOACC_REDUCTION
@@ -74,6 +74,8 @@
#include "cfgloop.h"
#include "fold-const.h"
#include "intl.h"
+#include "tree-hash-traits.h"
+#include "tree-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
@@ -166,6 +168,12 @@ static unsigned vector_red_align;
static unsigned vector_red_partition;
static GTY(()) rtx vector_red_sym;
+/* Shared memory block for gang-private variables. */
+static unsigned gangprivate_shared_size;
+static unsigned gangprivate_shared_align;
+static GTY(()) rtx gangprivate_shared_sym;
+static hash_map<tree_decl_hash, unsigned int> gangprivate_shared_hmap;
+
/* Global lock variable, needed for 128bit worker & gang reductions. */
static GTY(()) tree global_lock_var;
@@ -247,6 +255,10 @@ nvptx_option_override (void)
vector_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
vector_red_partition = 0;
+ gangprivate_shared_sym = gen_rtx_SYMBOL_REF (Pmode, "__gangprivate_shared");
+ SET_SYMBOL_DATA_AREA (gangprivate_shared_sym, DATA_AREA_SHARED);
+ gangprivate_shared_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
+
diagnose_openacc_conflict (TARGET_GOMP, "-mgomp");
diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack");
diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt");
@@ -5231,6 +5243,10 @@ nvptx_file_end (void)
write_shared_buffer (asm_out_file, vector_red_sym,
vector_red_align, vector_red_size);
+ if (gangprivate_shared_size)
+ write_shared_buffer (asm_out_file, gangprivate_shared_sym,
+ gangprivate_shared_align, gangprivate_shared_size);
+
if (need_softstack_decl)
{
write_var_marker (asm_out_file, false, true, "__nvptx_stacks");
@@ -6450,6 +6466,60 @@ nvptx_can_change_mode_class (machine_mode, machine_mode, reg_class_t)
return false;
}
+/* Implement TARGET_GOACC_ADJUST_PRIVATE_DECL. Set "oacc gangprivate"
+ attribute for gang-private variable declarations. */
+
+void
+nvptx_goacc_adjust_private_decl (tree decl, int level)
+{
+ if (level != GOMP_DIM_GANG)
+ return;
+
+ if (!lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (decl)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Setting 'oacc gangprivate' attribute for decl:");
+ print_generic_decl (dump_file, decl, TDF_SLIM);
+ fputc ('\n', dump_file);
+ }
+ tree id = get_identifier ("oacc gangprivate");
+ DECL_ATTRIBUTES (decl) = tree_cons (id, NULL, DECL_ATTRIBUTES (decl));
+ }
+}
+
+/* Implement TARGET_GOACC_EXPAND_ACCEL_VAR. Place "oacc gangprivate"
+ variables in shared memory. */
+
+static rtx
+nvptx_goacc_expand_accel_var (tree var)
+{
+ if (VAR_P (var)
+ && lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (var)))
+ {
+ unsigned int offset, *poffset;
+ poffset = gangprivate_shared_hmap.get (var);
+ if (poffset)
+ offset = *poffset;
+ else
+ {
+ unsigned HOST_WIDE_INT align = DECL_ALIGN (var);
+ gangprivate_shared_size
+ = (gangprivate_shared_size + align - 1) & ~(align - 1);
+ if (gangprivate_shared_align < align)
+ gangprivate_shared_align = align;
+
+ offset = gangprivate_shared_size;
+ bool existed = gangprivate_shared_hmap.put (var, offset);
+ gcc_assert (!existed);
+ gangprivate_shared_size += tree_to_uhwi (DECL_SIZE_UNIT (var));
+ }
+ rtx addr = plus_constant (Pmode, gangprivate_shared_sym, offset);
+ return gen_rtx_MEM (TYPE_MODE (TREE_TYPE (var)), addr);
+ }
+ return NULL_RTX;
+}
+
static GTY(()) tree nvptx_previous_fndecl;
static void
@@ -6458,6 +6528,7 @@ nvptx_set_current_function (tree fndecl)
if (!fndecl || fndecl == nvptx_previous_fndecl)
return;
+ gangprivate_shared_hmap.empty ();
nvptx_previous_fndecl = fndecl;
vector_red_partition = 0;
oacc_bcast_partition = 0;
@@ -6602,6 +6673,12 @@ nvptx_set_current_function (tree fndecl)
#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+#undef TARGET_GOACC_ADJUST_PRIVATE_DECL
+#define TARGET_GOACC_ADJUST_PRIVATE_DECL nvptx_goacc_adjust_private_decl
+
+#undef TARGET_GOACC_EXPAND_ACCEL_VAR
+#define TARGET_GOACC_EXPAND_ACCEL_VAR nvptx_goacc_expand_accel_var
+
#undef TARGET_SET_CURRENT_FUNCTION
#define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function
@@ -6155,6 +6155,19 @@ like @code{cond_add@var{m}}. The default implementation returns a zero
constant of type @var{type}.
@end deftypefn
+@deftypefn {Target Hook} rtx TARGET_GOACC_EXPAND_ACCEL_VAR (tree @var{var})
+This hook, if defined, is used by accelerator target back-ends to expand
+specially handled kinds of VAR_DECL expressions. A particular use is to
+place variables with specific attributes inside special accelarator
+memories. A return value of NULL indicates that the target does not
+handle this VAR_DECL, and normal RTL expanding is resumed.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_GOACC_ADJUST_PRIVATE_DECL (tree @var{var}, @var{int})
+Tweak variable declaration for a private variable at the specified
+parallelism level.
+@end deftypefn
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
@@ -4213,6 +4213,10 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_PREFERRED_ELSE_VALUE
+@hook TARGET_GOACC_EXPAND_ACCEL_VAR
+
+@hook TARGET_GOACC_ADJUST_PRIVATE_DECL
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
@@ -10044,8 +10044,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
exp = SSA_NAME_VAR (ssa_name);
goto expand_decl_rtl;
- case PARM_DECL:
case VAR_DECL:
+ /* Allow accel compiler to handle specific cases of variables,
+ specifically those tagged with the "oacc gangprivate" attribute,
+ which may be intended to be placed in special memory in GPUs. */
+ if (flag_openacc && targetm.goacc.expand_accel_var)
+ {
+ temp = targetm.goacc.expand_accel_var (exp);
+ if (temp)
+ return temp;
+ }
+ /* ... fall through ... */
+
+ case PARM_DECL:
/* If a static var's type was incomplete when the decl was written,
but the type is complete now, lay out the decl now. */
if (DECL_SIZE (exp) == 0
@@ -2618,6 +2618,8 @@ expand_UNIQUE (internal_fn, gcall *stmt)
else
gcc_unreachable ();
break;
+ case IFN_UNIQUE_OACC_PRIVATE:
+ break;
}
if (pattern)
@@ -36,7 +36,8 @@ along with GCC; see the file COPYING3. If not see
#define IFN_UNIQUE_CODES \
DEF(UNSPEC), \
DEF(OACC_FORK), DEF(OACC_JOIN), \
- DEF(OACC_HEAD_MARK), DEF(OACC_TAIL_MARK)
+ DEF(OACC_HEAD_MARK), DEF(OACC_TAIL_MARK), \
+ DEF(OACC_PRIVATE)
enum ifn_unique_kind {
#define DEF(X) IFN_UNIQUE_##X
@@ -163,6 +163,9 @@ struct omp_context
/* True if there is bind clause on the construct (i.e. a loop construct). */
bool loop_p;
+
+ /* Addressable variable decls in this context. */
+ vec<tree> *oacc_addressable_var_decls;
};
static splay_tree all_contexts;
@@ -943,6 +946,8 @@ new_omp_context (gimple *stmt, omp_context *outer_ctx)
ctx->cb.decl_map = new hash_map<tree, tree>;
+ ctx->oacc_addressable_var_decls = new vec<tree> ();
+
return ctx;
}
@@ -1024,6 +1029,7 @@ delete_omp_context (splay_tree_value value)
}
delete ctx->lastprivate_conditional_map;
+ delete ctx->oacc_addressable_var_decls;
XDELETE (ctx);
}
@@ -6667,8 +6673,9 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p,
static void
lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
- gcall *fork, gcall *join, gimple_seq *fork_seq,
- gimple_seq *join_seq, omp_context *ctx)
+ gcall *fork, gcall *private_marker, gcall *join,
+ gimple_seq *fork_seq, gimple_seq *join_seq,
+ omp_context *ctx)
{
gimple_seq before_fork = NULL;
gimple_seq after_fork = NULL;
@@ -6866,6 +6873,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
/* Now stitch things together. */
gimple_seq_add_seq (fork_seq, before_fork);
+ if (private_marker)
+ gimple_seq_add_stmt (fork_seq, private_marker);
if (fork)
gimple_seq_add_stmt (fork_seq, fork);
gimple_seq_add_seq (fork_seq, after_fork);
@@ -7581,7 +7590,7 @@ lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
HEAD and TAIL. */
static void
-lower_oacc_head_tail (location_t loc, tree clauses,
+lower_oacc_head_tail (location_t loc, tree clauses, gcall *private_marker,
gimple_seq *head, gimple_seq *tail, omp_context *ctx)
{
bool inner = false;
@@ -7589,6 +7598,14 @@ lower_oacc_head_tail (location_t loc, tree clauses,
gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
+
+ if (private_marker)
+ {
+ gimple_set_location (private_marker, loc);
+ gimple_call_set_lhs (private_marker, ddvar);
+ gimple_call_set_arg (private_marker, 1, ddvar);
+ }
+
tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
@@ -7619,7 +7636,8 @@ lower_oacc_head_tail (location_t loc, tree clauses,
&join_seq);
lower_oacc_reductions (loc, clauses, place, inner,
- fork, join, &fork_seq, &join_seq, ctx);
+ fork, (count == 1) ? private_marker : NULL,
+ join, &fork_seq, &join_seq, ctx);
/* Append this level to head. */
gimple_seq_add_seq (head, fork_seq);
@@ -9584,6 +9602,32 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
}
}
+/* Record vars listed in private clauses in CLAUSES in CTX. This information
+ is used to mark up variables that should be made private per-gang. */
+
+static void
+oacc_record_private_var_clauses (omp_context *ctx, tree clauses)
+{
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE)
+ {
+ tree decl = OMP_CLAUSE_DECL (c);
+ if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+ ctx->oacc_addressable_var_decls->safe_push (decl);
+ }
+}
+
+/* Record addressable vars declared in BINDVARS in CTX. This information is
+ used to mark up variables that should be made private per-gang. */
+
+static void
+oacc_record_vars_in_bind (omp_context *ctx, tree bindvars)
+{
+ for (tree v = bindvars; v; v = DECL_CHAIN (v))
+ if (VAR_P (v) && TREE_ADDRESSABLE (v))
+ ctx->oacc_addressable_var_decls->safe_push (v);
+}
+
/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */
static tree
@@ -10414,6 +10458,57 @@ lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt,
*dlist = new_dlist;
}
+/* Build an internal UNIQUE function with type IFN_UNIQUE_OACC_PRIVATE listing
+ the addresses of variables that should be made private at the surrounding
+ parallelism level. Such functions appear in the gimple code stream in two
+ forms, e.g. for a partitioned loop:
+
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6, 1, 68);
+ .data_dep.6 = .UNIQUE (OACC_PRIVATE, .data_dep.6, -1, &w);
+ .data_dep.6 = .UNIQUE (OACC_FORK, .data_dep.6, -1);
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6);
+
+ or alternatively, OACC_PRIVATE can appear at the top level of a parallel,
+ not as part of a HEAD_MARK sequence:
+
+ .UNIQUE (OACC_PRIVATE, 0, 0, &w);
+
+ For such stand-alone appearances, the 3rd argument is always 0, denoting
+ gang partitioning. */
+
+static gcall *
+make_oacc_private_marker (omp_context *ctx)
+{
+ int i;
+ tree decl;
+
+ if (ctx->oacc_addressable_var_decls->length () == 0)
+ return NULL;
+
+ auto_vec<tree, 5> args;
+
+ args.quick_push (build_int_cst (integer_type_node, IFN_UNIQUE_OACC_PRIVATE));
+ args.quick_push (integer_zero_node);
+ args.quick_push (integer_minus_one_node);
+
+ FOR_EACH_VEC_ELT (*ctx->oacc_addressable_var_decls, i, decl)
+ {
+ for (omp_context *thisctx = ctx; thisctx; thisctx = thisctx->outer)
+ {
+ tree inner_decl = maybe_lookup_decl (decl, thisctx);
+ if (inner_decl)
+ {
+ decl = inner_decl;
+ break;
+ }
+ }
+ tree addr = build_fold_addr_expr (decl);
+ args.safe_push (addr);
+ }
+
+ return gimple_build_call_internal_vec (IFN_UNIQUE, args);
+}
+
/* Lower code for an OMP loop directive. */
static void
@@ -10430,6 +10525,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
push_gimplify_context ();
+ oacc_record_private_var_clauses (ctx, gimple_omp_for_clauses (stmt));
+
lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
block = make_node (BLOCK);
@@ -10448,6 +10545,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gbind *inner_bind
= as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
tree vars = gimple_bind_vars (inner_bind);
+ if (is_gimple_omp_oacc (ctx->stmt))
+ oacc_record_vars_in_bind (ctx, vars);
gimple_bind_append_vars (new_stmt, vars);
/* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
keep them on the inner_bind and it's block. */
@@ -10547,6 +10646,11 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
lower_omp (gimple_omp_body_ptr (stmt), ctx);
+ gcall *private_marker = NULL;
+ if (is_gimple_omp_oacc (ctx->stmt)
+ && !gimple_seq_empty_p (omp_for_body))
+ private_marker = make_oacc_private_marker (ctx);
+
/* Lower the header expressions. At this point, we can assume that
the header is of the form:
@@ -10583,7 +10687,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (is_gimple_omp_oacc (ctx->stmt)
&& !ctx_in_oacc_kernels_region (ctx))
lower_oacc_head_tail (gimple_location (stmt),
- gimple_omp_for_clauses (stmt),
+ gimple_omp_for_clauses (stmt), private_marker,
&oacc_head, &oacc_tail, ctx);
/* Add OpenACC partitioning and reduction markers just before the loop. */
@@ -12525,8 +12629,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
them as a dummy GANG loop. */
tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
+ gcall *private_marker = make_oacc_private_marker (ctx);
+
+ if (private_marker)
+ gimple_call_set_arg (private_marker, 2, level);
+
lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
- false, NULL, NULL, &fork_seq, &join_seq, ctx);
+ false, NULL, private_marker, NULL, &fork_seq,
+ &join_seq, ctx);
}
gimple_seq_add_seq (&new_body, fork_seq);
@@ -12782,6 +12892,9 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
ctx);
break;
case GIMPLE_BIND:
+ if (ctx && is_gimple_omp_oacc (ctx->stmt))
+ oacc_record_vars_in_bind (ctx,
+ gimple_bind_vars (as_a <gbind *> (stmt)));
lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
maybe_remove_omp_member_access_dummy_vars (as_a <gbind *> (stmt));
break;
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
#include "stringpool.h"
#include "attribs.h"
#include "cfgloop.h"
+#include "convert.h"
/* Describe the OpenACC looping structure of a function. The entire
function is held in a 'NULL' loop. */
@@ -1082,7 +1083,9 @@ oacc_loop_xform_head_tail (gcall *from, int level)
= ((enum ifn_unique_kind)
TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
- if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
+ if (k == IFN_UNIQUE_OACC_FORK
+ || k == IFN_UNIQUE_OACC_JOIN
+ || k == IFN_UNIQUE_OACC_PRIVATE)
*gimple_call_arg_ptr (stmt, 2) = replacement;
else if (k == kind && stmt != from)
break;
@@ -1684,6 +1687,38 @@ execute_oacc_device_lower ()
case IFN_UNIQUE_OACC_TAIL_MARK:
remove = true;
break;
+
+ case IFN_UNIQUE_OACC_PRIVATE:
+ {
+ HOST_WIDE_INT level
+ = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
+ if (level == -1)
+ break;
+ for (unsigned i = 3;
+ i < gimple_call_num_args (call);
+ i++)
+ {
+ tree arg = gimple_call_arg (call, i);
+ gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
+ tree decl = TREE_OPERAND (arg, 0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ static char const *const axes[] =
+ /* Must be kept in sync with GOMP_DIM
+ enumeration. */
+ { "gang", "worker", "vector" };
+ fprintf (dump_file, "Decl UID %u has %s "
+ "partitioning:", DECL_UID (decl),
+ axes[level]);
+ print_generic_decl (dump_file, decl, TDF_SLIM);
+ fputc ('\n', dump_file);
+ }
+ if (targetm.goacc.adjust_private_decl)
+ targetm.goacc.adjust_private_decl (decl, level);
+ }
+ remove = true;
+ }
+ break;
}
break;
}
@@ -1734,6 +1734,23 @@ for allocating any storage for reductions when necessary.",
void, (gcall *call),
default_goacc_reduction)
+DEFHOOK
+(expand_accel_var,
+"This hook, if defined, is used by accelerator target back-ends to expand\n\
+specially handled kinds of VAR_DECL expressions. A particular use is to\n\
+place variables with specific attributes inside special accelarator\n\
+memories. A return value of NULL indicates that the target does not\n\
+handle this VAR_DECL, and normal RTL expanding is resumed.",
+rtx, (tree var),
+NULL)
+
+DEFHOOK
+(adjust_private_decl,
+"Tweak variable declaration for a private variable at the specified\n\
+parallelism level.",
+void, (tree var, int),
+NULL)
+
HOOK_VECTOR_END (goacc)
/* Functions relating to vectorization. */
new file mode 100644
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+int main (void)
+{
+ int ret;
+
+ #pragma acc parallel num_gangs(1) num_workers(32) copyout(ret)
+ {
+ int w = 0;
+
+ #pragma acc loop worker
+ for (int i = 0; i < 32; i++)
+ {
+ #pragma acc atomic update
+ w++;
+ }
+
+ ret = (w == 32);
+ }
+ assert (ret);
+
+ #pragma acc parallel num_gangs(1) vector_length(32) copyout(ret)
+ {
+ int v = 0;
+
+ #pragma acc loop vector
+ for (int i = 0; i < 32; i++)
+ {
+ #pragma acc atomic update
+ v++;
+ }
+
+ ret = (v == 32);
+ }
+ assert (ret);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <openacc.h>
+#include <alloca.h>
+#include <string.h>
+#include <gomp-constants.h>
+#include <stdlib.h>
+
+#if 0
+#define DEBUG(DIM, IDX, VAL) \
+ fprintf (stderr, "%sdist[%d] = %d\n", (DIM), (IDX), (VAL))
+#else
+#define DEBUG(DIM, IDX, VAL)
+#endif
+
+#define N (32*32*32)
+
+int
+check (const char *dim, int *dist, int dimsize)
+{
+ int ix;
+ int exit = 0;
+
+ for (ix = 0; ix < dimsize; ix++)
+ {
+ DEBUG(dim, ix, dist[ix]);
+ if (dist[ix] < (N) / (dimsize + 0.5)
+ || dist[ix] > (N) / (dimsize - 0.5))
+ {
+ fprintf (stderr, "did not distribute to %ss (%d not between %d "
+ "and %d)\n", dim, dist[ix], (int) ((N) / (dimsize + 0.5)),
+ (int) ((N) / (dimsize - 0.5)));
+ exit |= 1;
+ }
+ }
+
+ return exit;
+}
+
+int main ()
+{
+ int ary[N];
+ int ix;
+ int exit = 0;
+ int gangsize = 0, workersize = 0, vectorsize = 0;
+ int *gangdist, *workerdist, *vectordist;
+
+ for (ix = 0; ix < N;ix++)
+ ary[ix] = -1;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(ary) copyout(gangsize, workersize, vectorsize)
+ {
+#pragma acc loop gang worker vector
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int g, w, v;
+
+ g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
+
+ ary[ix] = (g << 16) | (w << 8) | v;
+ }
+
+ gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
+ }
+
+ gangdist = (int *) alloca (gangsize * sizeof (int));
+ workerdist = (int *) alloca (workersize * sizeof (int));
+ vectordist = (int *) alloca (vectorsize * sizeof (int));
+ memset (gangdist, 0, gangsize * sizeof (int));
+ memset (workerdist, 0, workersize * sizeof (int));
+ memset (vectordist, 0, vectorsize * sizeof (int));
+
+ /* Test that work is shared approximately equally amongst each active
+ gang/worker/vector. */
+ for (ix = 0; ix < N; ix++)
+ {
+ int g = (ary[ix] >> 16) & 255;
+ int w = (ary[ix] >> 8) & 255;
+ int v = ary[ix] & 255;
+
+ gangdist[g]++;
+ workerdist[w]++;
+ vectordist[v]++;
+ }
+
+ exit = check ("gang", gangdist, gangsize);
+ exit |= check ("worker", workerdist, workersize);
+ exit |= check ("vector", vectordist, vectorsize);
+
+ return exit;
+}
new file mode 100644
@@ -0,0 +1,25 @@
+! Test for "oacc gangprivate" attribute on gang-private variables
+
+! { dg-do run }
+! { dg-additional-options "-fdump-tree-oaccdevlow-details" }
+! { dg-final { scan-tree-dump-times "Decl UID \[0-9\]+ has gang partitioning: integer\\(kind=4\\) w;" 1 "oaccdevlow" } } */
+
+program main
+ integer :: w, arr(0:31)
+
+ !$acc parallel num_gangs(32) num_workers(32) copyout(arr)
+ !$acc loop gang private(w)
+ do j = 0, 31
+ w = 0
+ !$acc loop seq
+ do i = 0, 31
+ !$acc atomic update
+ w = w + 1
+ !$acc end atomic
+ end do
+ arr(j) = w
+ end do
+ !$acc end parallel
+
+ if (any (arr .ne. 32)) stop 1
+end program main
new file mode 100644
@@ -0,0 +1,25 @@
+! Test for worker-private variables
+
+! { dg-do run }
+! { dg-additional-options "-fdump-tree-oaccdevlow-details" }
+! { dg-final { scan-tree-dump-times "Decl UID \[0-9\]+ has worker partitioning: integer\\(kind=4\\) w;" 1 "oaccdevlow" } } */
+
+program main
+ integer :: w, arr(0:31)
+
+ !$acc parallel num_gangs(32) num_workers(32) copyout(arr)
+ !$acc loop gang worker private(w)
+ do j = 0, 31
+ w = 0
+ !$acc loop seq
+ do i = 0, 31
+ !$acc atomic update
+ w = w + 1
+ !$acc end atomic
+ end do
+ arr(j) = w
+ end do
+ !$acc end parallel
+
+ if (any (arr .ne. 32)) stop 1
+end program main