2015-06-20 Nathan Sidwell <nathan@codesourcery.com>
gcc/
* omp-builtins.def (BUILT_IN_GOACC_NTID, BUILTIN_NCTAID): Replace
with ...
(BUILT_IN_GOACC_NID): ... this.
(BUILT_IN_GOACC_TID, BUILTIN_CTAID): Replace with ...
(BUILT_IN_GOACC_ID): ... this.
* builtins.c: Include omp-low.h.
(expand_oacc_buoltin): Replace with ...
(expand_oacc_id): ... this.
(expand_builtin, is_simple_builtin): Adjust.oo
* omp-low.h (enum oacc_loop_levels): New.
* omp-low.c (MASK_GANG, MASK_WORKER, MASK_VECTOR): Replace with ...
(OACC_LOOP_MASK): ... this.
(scan_omp_for, scan_omp_target): Adjust.
(expand_oacc_get_num_threads): Adjust and use a loop.
(expand_oacc_get_thread_num): Likewise.
(oacc_loop_needs_thread_barrier_p, find_omp_for_region_gwv,
find_omp_taarget_region_data, required_predication_mask,
generate_vector_broadcast, generate_oacc_broadcast): Adjust.
(make_predication_test): Adjust and use a loop.
(predicate_bb, oacc_broadcast, oacc_init_count_vars): Adjust.
* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID, UNSPEC_NCTAID,
UNSPEC_CTAID): Replace with ...
(UNSPEC_NID, UNSPEC_ID): ... these.
(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid,
*oacc_nctaid_insn, oacc_nctaid, *oacc_ctaid_insn,
oacc_ctaid): Replace with ...
(oacc_nid, oacc_id): ... these.
* config/nvptx/nvptx.c (nvptx_print_operand [CASE 'd']): Remove.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Replace
GOACC_ctaid builtin with GOACC_id.
===================================================================
@@ -35,38 +35,38 @@ main ()
#pragma acc parallel loop gang (static:*) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_ctaid (0);
+ a[i] = __builtin_GOACC_id (0);
test_nonstatic (a, 10);
#pragma acc parallel loop gang (static:1) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_ctaid (0);
+ a[i] = __builtin_GOACC_id (0);
test_static (a, 10, 1);
#pragma acc parallel loop gang (static:2) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_ctaid (0);
+ a[i] = __builtin_GOACC_id (0);
test_static (a, 10, 2);
#pragma acc parallel loop gang (static:5) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_ctaid (0);
+ a[i] = __builtin_GOACC_id (0);
test_static (a, 10, 5);
#pragma acc parallel loop gang (static:20) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_ctaid (0);
+ a[i] = __builtin_GOACC_id (0);
test_static (a, 10, 20);
/* Non-static gang. */
#pragma acc parallel loop gang num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_ctaid (0);
+ a[i] = __builtin_GOACC_id (0);
test_nonstatic (a, 10);
===================================================================
@@ -61,13 +61,9 @@ DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
BT_FN_VOID_INT_INT_VAR,
ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NTID, "GOACC_ntid",
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ID, "GOACC_id",
BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_TID, "GOACC_tid",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NCTAID, "GOACC_nctaid",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_CTAID, "GOACC_ctaid",
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NID, "GOACC_nid",
BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GANGLOCAL_PTR, "GOACC_get_ganglocal_ptr",
BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
===================================================================
@@ -49,10 +49,8 @@
UNSPEC_ALLOCA
- UNSPEC_NTID
- UNSPEC_TID
- UNSPEC_NCTAID
- UNSPEC_CTAID
+ UNSPEC_NID
+ UNSPEC_ID
UNSPEC_SHARED_DATA
])
@@ -1263,65 +1261,32 @@
DONE;
})
-(define_insn "*oacc_ntid_insn"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_NTID))]
- ""
- "%.\\tmov.u32 %0, %%ntid%d1;")
-
-(define_expand "oacc_ntid"
- [(set (match_operand:SI 0 "nvptx_register_operand" "")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NTID))]
- ""
-{
- if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
- FAIL;
-})
-
-(define_insn "*oacc_tid_insn"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_TID))]
- ""
- "%.\\tmov.u32 %0, %%tid%d1;")
-
-(define_expand "oacc_tid"
+(define_insn "oacc_nid"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_TID))]
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NID))]
""
{
- if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
- FAIL;
+ static const char *const asms[] =
+{ /* Must match oacc_loop_levels ordering. */
+ "%.\\tmov.u32 %0, %%nctaid.x;",/* gang */
+ "%.\\tmov.u32 %0, %%ntid.y;", /* worker */
+ "%.\\tmov.u32 %0, %%ntid.x;", /* vector */
+};
+ return asms[INTVAL (operands[1])];
})
-;; Number of CUDA grids (CPA = Cooperative Thread Arrays)
-(define_insn "*oacc_nctaid_insn"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_NCTAID))]
- ""
- "%.\\tmov.u32 %0, %%nctaid%d1;")
-
-(define_expand "oacc_nctaid"
- [(set (match_operand:SI 0 "nvptx_register_operand" "")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NCTAID))]
- ""
-{
- if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
- FAIL;
-})
-
-(define_insn "*oacc_ctaid_insn"
- [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_CTAID))]
- ""
- "%.\\tmov.u32 %0, %%ctaid%d1;")
-
-(define_expand "oacc_ctaid"
+(define_insn "oacc_id"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_CTAID))]
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_ID))]
""
{
- if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
- FAIL;
+ static const char *const asms[] =
+{ /* Must match oacc_loop_levels ordering. */
+ "%.\\tmov.u32 %0, %%ctaid.x;",/* gang */
+ "%.\\tmov.u32 %0, %%tid.y;", /* worker */
+ "%.\\tmov.u32 %0, %%tid.x;", /* vector */
+};
+ return asms[INTVAL (operands[1])];
})
(define_insn "oacc_thread_broadcastsi"
===================================================================
@@ -1673,7 +1673,6 @@ condition_unidirectional_p (rtx cond)
A -- print an address space identifier for a MEM
c -- print an opcode suffix for a comparison operator, including a type code
- d -- print a CONST_INT as a vector dimension (x, y, or z)
f -- print a full reg even for something that must always be split
t -- print a type opcode suffix, promoting QImode to 32 bits
T -- print a type size in bits
@@ -1718,18 +1717,6 @@ nvptx_print_operand (FILE *file, rtx x,
}
break;
- case 'd':
- gcc_assert (x_code == CONST_INT);
- if (INTVAL (x) == 0)
- fputs (".x", file);
- else if (INTVAL (x) == 1)
- fputs (".y", file);
- else if (INTVAL (x) == 2)
- fputs (".z", file);
- else
- gcc_unreachable ();
- break;
-
case 't':
op_mode = nvptx_underlying_object_mode (x);
fprintf (file, "%s", nvptx_ptx_type_from_mode (op_mode, true));
===================================================================
@@ -172,9 +172,7 @@ struct omp_region
/* Levels of parallelism as defined by OpenACC. Increasing numbers
correspond to deeper loop nesting levels. */
-#define MASK_GANG 1
-#define MASK_WORKER 2
-#define MASK_VECTOR 4
+#define OACC_LOOP_MASK(X) (1 << (X))
/* Context structure. Used to store information about each parallel
directive in the code. */
@@ -2967,17 +2965,17 @@ scan_omp_for (gomp_for *stmt, omp_contex
int val;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_GANG)
{
- val = MASK_GANG;
+ val = OACC_LOOP_MASK (OACC_gang);
gwv_clause = true;
}
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WORKER)
{
- val = MASK_WORKER;
+ val = OACC_LOOP_MASK (OACC_worker);
gwv_clause = true;
}
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR)
{
- val = MASK_VECTOR;
+ val = OACC_LOOP_MASK (OACC_vector);
gwv_clause = true;
}
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SEQ)
@@ -3122,11 +3120,11 @@ scan_omp_target (gomp_target *stmt, omp_
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS)
- ctx->gwv_this |= MASK_GANG;
+ ctx->gwv_this |= OACC_LOOP_MASK (OACC_gang);
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS)
- ctx->gwv_this |= MASK_WORKER;
+ ctx->gwv_this |= OACC_LOOP_MASK (OACC_worker);
else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
- ctx->gwv_this |= MASK_VECTOR;
+ ctx->gwv_this |= OACC_LOOP_MASK (OACC_vector);
}
}
@@ -4992,53 +4990,25 @@ is_atomic_compatible_reduction (tree var
static tree
expand_oacc_get_num_threads (gimple_seq *seq, int gwv_bits)
{
- tree res = NULL_TREE;
- tree u0 = fold_convert (unsigned_type_node, integer_zero_node);
- tree u1 = fold_convert (unsigned_type_node, integer_one_node);
-
- if (gwv_bits & MASK_GANG)
- {
- tree decl = builtin_decl_explicit (BUILT_IN_GOACC_NCTAID);
- tree gang_count = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (decl, 1, u0);
- gimple_call_set_lhs (call, gang_count);
- gimple_seq_add_stmt (seq, call);
- res = gang_count;
- }
-
- if (gwv_bits & MASK_WORKER)
- {
- tree decl = builtin_decl_explicit (BUILT_IN_GOACC_NTID);
- tree worker_count = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (decl, 1, u1);
- gimple_call_set_lhs (call, worker_count);
- gimple_seq_add_stmt (seq, call);
- if (res != NULL_TREE)
- res = fold_build2 (MULT_EXPR, unsigned_type_node, res, worker_count);
- else
- res = worker_count;
- }
-
- if (gwv_bits & MASK_VECTOR)
- {
- tree decl = builtin_decl_explicit (BUILT_IN_GOACC_NTID);
- tree vector_length = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (decl, 1, u0);
- gimple_call_set_lhs (call, vector_length);
- gimple_seq_add_stmt (seq, call);
- if (res != NULL_TREE)
- res = fold_build2 (MULT_EXPR, unsigned_type_node, res, vector_length);
- else
- res = vector_length;
- }
+ tree res = build_int_cst (unsigned_type_node, 1);
+ tree decl = builtin_decl_explicit (BUILT_IN_GOACC_NID);
+ unsigned ix;
- if (res == NULL_TREE)
- res = u1;
+ for (ix = 0; (1 << ix) <= gwv_bits; ix++)
+ if ((1 << ix) & gwv_bits)
+ {
+ tree arg = build_int_cst (unsigned_type_node, ix);
+ tree count = create_tmp_var (unsigned_type_node);
+ gimple call = gimple_build_call (decl, 1, arg);
+
+ gimple_call_set_lhs (call, count);
+ gimple_seq_add_stmt (seq, call);
+ res = fold_build2 (MULT_EXPR, unsigned_type_node, res, count);
+ }
return res;
}
-
/* Find the current thread number to use within a region partitioned by
GWV_BITS. Setup code required for the calculation is added to SEQ. See
note for expand_oacc_get_num_threads above re: builtin usage. */
@@ -5047,90 +5017,43 @@ static tree
expand_oacc_get_thread_num (gimple_seq *seq, int gwv_bits)
{
tree res = NULL_TREE;
- tree u0 = fold_convert (unsigned_type_node, integer_zero_node);
- tree u1 = fold_convert (unsigned_type_node, integer_one_node);
- tree vector_count = NULL_TREE;
- tree tid_decl = builtin_decl_explicit (BUILT_IN_GOACC_TID);
- tree ntid_decl = builtin_decl_explicit (BUILT_IN_GOACC_NTID);
-
- if (gwv_bits & MASK_VECTOR)
- {
- tree vector_id = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (tid_decl, 1, u0);
- gimple_call_set_lhs (call, vector_id);
- gimple_seq_add_stmt (seq, call);
- res = vector_id;
- }
-
- if (gwv_bits & MASK_WORKER)
- {
- tree worker_id = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (tid_decl, 1, u1);
- gimple_call_set_lhs (call, worker_id);
- gimple_seq_add_stmt (seq, call);
- if (res != NULL_TREE)
- {
- vector_count = create_tmp_var (unsigned_type_node);
- call = gimple_build_call (ntid_decl, 1, u0);
- gimple_call_set_lhs (call, vector_count);
- gimple_seq_add_stmt (seq, call);
- res = fold_build2 (PLUS_EXPR, unsigned_type_node,
- fold_build2 (MULT_EXPR, unsigned_type_node,
- vector_count, worker_id), res);
- }
- else
- res = worker_id;
- }
-
- if (gwv_bits & MASK_GANG)
- {
- tree worker_count;
- tree ctaid_decl = builtin_decl_explicit (BUILT_IN_GOACC_CTAID);
- tree gang_id = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (ctaid_decl, 1, u0);
- gimple_call_set_lhs (call, gang_id);
- gimple_seq_add_stmt (seq, call);
+ tree id_decl = builtin_decl_explicit (BUILT_IN_GOACC_ID);
+ tree nid_decl = builtin_decl_explicit (BUILT_IN_GOACC_NID);
+ unsigned ix;
- if (gwv_bits & MASK_WORKER)
- {
- worker_count = create_tmp_var (unsigned_type_node);
- call = gimple_build_call (ntid_decl, 1, u1);
- gimple_call_set_lhs (call, worker_count);
- gimple_seq_add_stmt (seq, call);
- }
- else
- worker_count = u1;
+ /* Start at gang level, and examine relevant dimension indices. */
+ for (ix = 0; (1 << ix) <= gwv_bits; ix++)
+ if ((1 << ix) & gwv_bits)
+ {
+ tree arg = build_int_cst (unsigned_type_node, ix);
- if (gwv_bits & MASK_VECTOR)
- {
- if (vector_count == NULL_TREE)
- {
- vector_count = create_tmp_var (unsigned_type_node);
- call = gimple_build_call (ntid_decl, 1, u0);
- gimple_call_set_lhs (call, vector_count);
- gimple_seq_add_stmt (seq, call);
- }
- }
- else
- vector_count = u1;
+ if (res)
+ {
+ /* We had an outer index, so scale that by the size of
+ this dimension. */
+ tree n = create_tmp_var (unsigned_type_node);
+ gimple call = gimple_build_call (nid_decl, 1, arg);
+
+ gimple_call_set_lhs (call, n);
+ gimple_seq_add_stmt (seq, call);
+ res = fold_build2 (MULT_EXPR, unsigned_type_node, res, n);
+ }
- if (gwv_bits & (MASK_WORKER | MASK_VECTOR))
- {
- gcc_assert (res != NULL_TREE);
- res = fold_build2 (PLUS_EXPR, unsigned_type_node,
- fold_build2 (MULT_EXPR, unsigned_type_node,
- fold_build2 (MULT_EXPR, unsigned_type_node,
- worker_count, vector_count),
- gang_id),
- res);
- }
- else
- res = gang_id;
- }
+ /* Determine index in this dimension. */
+ tree id = create_tmp_var (unsigned_type_node);
+ gimple call = gimple_build_call (id_decl, 1, arg);
+
+ gimple_call_set_lhs (call, id);
+ gimple_seq_add_stmt (seq, call);
+ if (res)
+ res = fold_build2 (PLUS_EXPR, unsigned_type_node, res, id);
+ else
+ res = id;
+ }
if (res == NULL_TREE)
- res = u0;
-
+ res = build_int_cst (unsigned_type_node, 0);
+
return res;
}
@@ -7278,10 +7201,10 @@ expand_omp_for_generic (struct omp_regio
static bool
oacc_loop_needs_threadbarrier_p (int gwv_bits)
{
- return (gwv_bits & (MASK_GANG | MASK_WORKER)) == MASK_WORKER;
+ return !(gwv_bits & OACC_LOOP_MASK (OACC_gang))
+ && (gwv_bits & OACC_LOOP_MASK (OACC_worker));
}
-
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with static schedule and no specified chunk size. Given
parameters:
@@ -10416,11 +10339,11 @@ find_omp_for_region_gwv (gimple stmt)
tree clauses = gimple_omp_for_clauses (stmt);
if (find_omp_clause (clauses, OMP_CLAUSE_GANG))
- tmp |= MASK_GANG;
+ tmp |= OACC_LOOP_MASK (OACC_gang);
if (find_omp_clause (clauses, OMP_CLAUSE_WORKER))
- tmp |= MASK_WORKER;
+ tmp |= OACC_LOOP_MASK (OACC_worker);
if (find_omp_clause (clauses, OMP_CLAUSE_VECTOR))
- tmp |= MASK_VECTOR;
+ tmp |= OACC_LOOP_MASK (OACC_vector);
return tmp;
}
@@ -10437,11 +10360,11 @@ find_omp_target_region_data (struct omp_
tree clauses = gimple_omp_target_clauses (stmt);
if (find_omp_clause (clauses, OMP_CLAUSE_NUM_GANGS))
- region->gwv_this |= MASK_GANG;
+ region->gwv_this |= OACC_LOOP_MASK (OACC_gang);
if (find_omp_clause (clauses, OMP_CLAUSE_NUM_WORKERS))
- region->gwv_this |= MASK_WORKER;
+ region->gwv_this |= OACC_LOOP_MASK (OACC_worker);
if (find_omp_clause (clauses, OMP_CLAUSE_VECTOR_LENGTH))
- region->gwv_this |= MASK_VECTOR;
+ region->gwv_this |= OACC_LOOP_MASK (OACC_vector);
region->broadcast_array = gimple_omp_target_broadcast_array (stmt);
}
@@ -10621,14 +10544,14 @@ required_predication_mask (omp_region *r
return 0;
int mask = 0;
- if ((outer_target->gwv_this & MASK_WORKER) != 0
+ if ((outer_target->gwv_this & OACC_LOOP_MASK (OACC_worker)) != 0
&& (region->type == GIMPLE_OMP_TARGET
- || (outer_masks & MASK_WORKER) == 0))
- mask |= MASK_WORKER;
- if ((outer_target->gwv_this & MASK_VECTOR) != 0
+ || (outer_masks & OACC_LOOP_MASK (OACC_worker)) == 0))
+ mask |= OACC_LOOP_MASK (OACC_worker);
+ if ((outer_target->gwv_this & OACC_LOOP_MASK (OACC_vector)) != 0
&& (region->type == GIMPLE_OMP_TARGET
- || (outer_masks & MASK_VECTOR) == 0))
- mask |= MASK_VECTOR;
+ || (outer_masks & OACC_LOOP_MASK (OACC_vector)) == 0))
+ mask |= OACC_LOOP_MASK (OACC_vector);
return mask;
}
@@ -10698,7 +10621,7 @@ generate_vector_broadcast (tree dest_var
/* Generate a broadcast across OpenACC threads in REGION so that VAR
is broadcast to DEST_VAR. MASK specifies the parallelism level and
- thereby the broadcast method. If it is equal to MASK_VECTOR, we
+ thereby the broadcast method. If it is only vector, we
can use a warp broadcast, otherwise we fall back to memory
store/load. */
@@ -10706,7 +10629,7 @@ static gimple
generate_oacc_broadcast (omp_region *region, tree dest_var, tree var,
gimple_stmt_iterator &where, int mask)
{
- if (mask == MASK_VECTOR)
+ if (mask == OACC_LOOP_MASK (OACC_vector))
return generate_vector_broadcast (dest_var, var, where);
omp_region *parent = enclosing_target_region (region);
@@ -10735,7 +10658,7 @@ generate_oacc_broadcast (omp_region *reg
/* Build a test for OpenACC predication. TRUE_EDGE is the edge that should be
taken if the block should be executed. SKIP_DEST_BB is the destination to
jump to otherwise. MASK specifies the type of predication, it can contain
- the bits MASK_VECTOR and/or MASK_WORKER. */
+ the bits for VECTOR and/or WORKER. */
static void
make_predication_test (edge true_edge, basic_block skip_dest_bb, int mask)
@@ -10743,32 +10666,31 @@ make_predication_test (edge true_edge, b
basic_block cond_bb = true_edge->src;
gimple_stmt_iterator tmp_gsi = gsi_last_bb (cond_bb);
- tree decl = builtin_decl_explicit (BUILT_IN_GOACC_TID);
-
- tree vvar = NULL_TREE, wvar = NULL_TREE;
+ tree decl = builtin_decl_explicit (BUILT_IN_GOACC_ID);
tree comp_var = NULL_TREE;
- if (mask & MASK_VECTOR)
- {
- gimple call = gimple_build_call (decl, 1, integer_zero_node);
- vvar = create_tmp_var (unsigned_type_node);
- comp_var = vvar;
- gimple_call_set_lhs (call, vvar);
- gsi_insert_after (&tmp_gsi, call, GSI_NEW_STMT);
- }
- if (mask & MASK_WORKER)
- {
- gimple call = gimple_build_call (decl, 1, integer_one_node);
- wvar = create_tmp_var (unsigned_type_node);
- comp_var = wvar;
- gimple_call_set_lhs (call, wvar);
- gsi_insert_after (&tmp_gsi, call, GSI_NEW_STMT);
- }
- if (wvar && vvar)
- {
- comp_var = create_tmp_var (unsigned_type_node);
- gassign *ior = gimple_build_assign (comp_var, BIT_IOR_EXPR, wvar, vvar);
- gsi_insert_after (&tmp_gsi, ior, GSI_NEW_STMT);
- }
+ unsigned ix;
+
+ for (ix = OACC_worker; ix <= OACC_vector; ix++)
+ if (mask & (1 << ix))
+ {
+ gimple call = gimple_build_call
+ (decl, 1, build_int_cst (unsigned_type_node, ix));
+ tree var = create_tmp_var (unsigned_type_node);
+
+ gimple_call_set_lhs (call, var);
+ gsi_insert_after (&tmp_gsi, call, GSI_NEW_STMT);
+ if (comp_var)
+ {
+ tree new_comp = create_tmp_var (unsigned_type_node);
+ gassign *ior = gimple_build_assign (new_comp,
+ BIT_IOR_EXPR, comp_var, var);
+ gsi_insert_after (&tmp_gsi, ior, GSI_NEW_STMT);
+ comp_var = new_comp;
+ }
+ else
+ comp_var = var;
+ }
+
tree cond = build2 (EQ_EXPR, boolean_type_node, comp_var,
fold_convert (unsigned_type_node, integer_zero_node));
gimple cond_stmt = gimple_build_cond_empty (cond);
@@ -10789,7 +10711,7 @@ make_predication_test (edge true_edge, b
/* Apply OpenACC predication to basic block BB which is in
region PARENT. MASK has a bitmask of levels that need to be
- applied; MASK_VECTOR and/or MASK_WORKER may be set. */
+ applied; VECTOR and/or WORKER may be set. */
static void
predicate_bb (basic_block bb, struct omp_region *parent, int mask)
@@ -10798,8 +10720,8 @@ predicate_bb (basic_block bb, struct omp
around them if not in the controlling worker. Don't insert
unnecessary (and incorrect) predication. */
if (parent->type == GIMPLE_OMP_FOR
- && (parent->gwv_this & MASK_VECTOR))
- mask &= ~MASK_WORKER;
+ && (parent->gwv_this & OACC_LOOP_MASK (OACC_vector)))
+ mask &= ~OACC_LOOP_MASK (OACC_worker);
if (mask == 0 || parent->type == GIMPLE_OMP_ATOMIC_LOAD)
return;
@@ -10873,15 +10795,16 @@ predicate_bb (basic_block bb, struct omp
skip_dest_bb = single_succ (inner->exit);
gcc_assert (inner->entry == bb);
if (code != GIMPLE_OMP_FOR
- || ((inner->gwv_this & (MASK_VECTOR | MASK_WORKER)) == MASK_VECTOR
- && (mask & MASK_WORKER) != 0))
+ || ((inner->gwv_this & OACC_LOOP_MASK (OACC_vector))
+ && !(inner->gwv_this & OACC_LOOP_MASK (OACC_worker))
+ && (mask & OACC_LOOP_MASK (OACC_worker))))
{
gimple_stmt_iterator head_gsi = gsi_start_bb (bb);
gsi_prev (&head_gsi);
edge e0 = split_block (bb, gsi_stmt (head_gsi));
int mask2 = mask;
if (code == GIMPLE_OMP_FOR)
- mask2 &= ~MASK_VECTOR;
+ mask2 &= ~OACC_LOOP_MASK (OACC_vector);
if (!split_stmt || code != GIMPLE_OMP_FOR)
{
/* The simple case: nothing here except the for,
@@ -11199,7 +11122,7 @@ oacc_broadcast (basic_block entry_bb, ba
use.erase (it);
}
- if (mask == MASK_VECTOR)
+ if (mask == OACC_LOOP_MASK (OACC_vector))
{
/* Broadcast all decls in USE right before the last instruction in
entry_bb. */
@@ -11213,7 +11136,7 @@ oacc_broadcast (basic_block entry_bb, ba
gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING);
}
- else if (mask & MASK_WORKER)
+ else if (mask & OACC_LOOP_MASK (OACC_worker))
{
if (use.empty ())
return entry_bb;
@@ -13104,25 +13027,31 @@ lower_omp_taskreg (gimple_stmt_iterator
static void
oacc_init_count_vars (omp_context *ctx, tree clauses ATTRIBUTE_UNUSED)
{
- tree gettid = builtin_decl_explicit (BUILT_IN_GOACC_TID);
- tree getntid = builtin_decl_explicit (BUILT_IN_GOACC_NTID);
+ tree getid = builtin_decl_explicit (BUILT_IN_GOACC_ID);
+ tree getnid = builtin_decl_explicit (BUILT_IN_GOACC_NID);
tree worker_var, worker_count;
- tree u1 = fold_convert (unsigned_type_node, integer_one_node);
- tree u0 = fold_convert (unsigned_type_node, integer_zero_node);
- if (ctx->gwv_this & MASK_WORKER)
+
+ if (ctx->gwv_this & OACC_LOOP_MASK (OACC_worker))
{
+ tree arg = build_int_cst (unsigned_type_node, OACC_worker);
+
worker_var = create_tmp_var (unsigned_type_node, ".worker");
worker_count = create_tmp_var (unsigned_type_node, ".workercount");
- gimple call1 = gimple_build_call (gettid, 1, u1);
+
+ gimple call1 = gimple_build_call (getid, 1, arg);
gimple_call_set_lhs (call1, worker_var);
gimple_seq_add_stmt (&ctx->ganglocal_init, call1);
- gimple call2 = gimple_build_call (getntid, 1, u1);
+
+ gimple call2 = gimple_build_call (getnid, 1, arg);
gimple_call_set_lhs (call2, worker_count);
gimple_seq_add_stmt (&ctx->ganglocal_init, call2);
}
else
- worker_var = u0, worker_count = u1;
-
+ {
+ worker_var = build_int_cst (unsigned_type_node, 0);
+ worker_count = build_int_cst (unsigned_type_node, 1);
+ }
+
ctx->worker_var = worker_var;
ctx->worker_count = worker_count;
}
===================================================================
@@ -20,6 +20,14 @@ along with GCC; see the file COPYING3.
#ifndef GCC_OMP_LOW_H
#define GCC_OMP_LOW_H
+enum oacc_loop_levels
+ {
+ OACC_gang,
+ OACC_worker,
+ OACC_vector,
+ OACC_HWM
+ };
+
struct omp_region;
extern tree find_omp_clause (tree, enum omp_clause_code);
===================================================================
@@ -85,7 +85,7 @@ along with GCC; see the file COPYING3.
#include "tree-chkp.h"
#include "rtl-chkp.h"
#include "gomp-constants.h"
-
+#include "omp-low.h"
static tree do_mpc_arg1 (tree, tree, int (*)(mpc_ptr, mpc_srcptr, mpc_rnd_t));
@@ -5962,44 +5962,42 @@ expand_oacc_threadbarrier (void)
/* Expand a thread-id/thread-count builtin for OpenACC. */
+
static rtx
-expand_oacc_builtin (enum built_in_function fcode, tree exp, rtx target)
+expand_oacc_id (enum built_in_function fcode, tree exp, rtx target)
{
tree arg0 = CALL_EXPR_ARG (exp, 0);
rtx result = const0_rtx;
rtx arg;
- gcc_assert (TREE_CODE (arg0) == INTEGER_CST);
arg = expand_normal (arg0);
+ if (GET_CODE (arg) != CONST_INT
+ || (unsigned HOST_WIDE_INT)INTVAL (arg) >= OACC_HWM)
+ {
+ error ("argument to %D must be constant in range 0 to %d",
+ get_callee_fndecl (exp), OACC_HWM - 1);
+ return result;
+ }
enum insn_code icode = CODE_FOR_nothing;
switch (fcode)
{
- case BUILT_IN_GOACC_NTID:
-#ifdef HAVE_oacc_ntid
- icode = CODE_FOR_oacc_ntid;
-#endif
- result = const1_rtx;
- break;
- case BUILT_IN_GOACC_TID:
-#ifdef HAVE_oacc_tid
- icode = CODE_FOR_oacc_tid;
-#endif
- break;
- case BUILT_IN_GOACC_NCTAID:
-#ifdef HAVE_oacc_nctaid
- icode = CODE_FOR_oacc_nctaid;
+ case BUILT_IN_GOACC_NID:
+#ifdef HAVE_oacc_nid
+ icode = CODE_FOR_oacc_nid;
#endif
result = const1_rtx;
break;
- case BUILT_IN_GOACC_CTAID:
-#ifdef HAVE_oacc_ctaid
- icode = CODE_FOR_oacc_ctaid;
+ case BUILT_IN_GOACC_ID:
+#ifdef HAVE_oacc_id
+ icode = CODE_FOR_oacc_id;
#endif
break;
default:
+ gcc_unreachable ();
break;
}
+
if (icode != CODE_FOR_nothing)
{
machine_mode mode = insn_data[icode].operand[0].mode;
@@ -7218,11 +7216,9 @@ expand_builtin (tree exp, rtx target, rt
return target;
break;
- case BUILT_IN_GOACC_NTID:
- case BUILT_IN_GOACC_TID:
- case BUILT_IN_GOACC_NCTAID:
- case BUILT_IN_GOACC_CTAID:
- return expand_oacc_builtin (fcode, exp, target);
+ case BUILT_IN_GOACC_ID:
+ case BUILT_IN_GOACC_NID:
+ return expand_oacc_id (fcode, exp, target);
case BUILT_IN_GOACC_GET_GANGLOCAL_PTR:
target = expand_oacc_ganglocal_ptr (target);
@@ -12590,9 +12586,8 @@ is_simple_builtin (tree decl)
case BUILT_IN_EH_FILTER:
case BUILT_IN_EH_POINTER:
case BUILT_IN_EH_COPY_VALUES:
- /* Just a special register access. */
- case BUILT_IN_GOACC_NTID:
- case BUILT_IN_GOACC_TID:
+ /* Just a special register read. */
+ case BUILT_IN_GOACC_NID:
return true;
default: