@@ -17377,13 +17377,21 @@ c_parser_omp_clause_reduction (c_parser *parser, enum omp_clause_code kind,
code = MAX_EXPR;
break;
}
+ if (!is_omp)
+ goto name_error;
reduc_id = c_parser_peek_token (parser)->value;
break;
}
default:
- c_parser_error (parser,
- "expected %<+%>, %<*%>, %<-%>, %<&%>, "
- "%<^%>, %<|%>, %<&&%>, %<||%> or identifier");
+ name_error:
+ if (is_omp)
+ c_parser_error (parser,
+ "expected %<+%>, %<*%>, %<-%>, %<&%>, "
+ "%<^%>, %<|%>, %<&&%>, %<||%> or identifier");
+ else
+ c_parser_error (parser,
+ "expected %<+%>, %<*%>, %<-%>, %<&%>, "
+ "%<^%>, %<|%>, %<&&%>, %<||%>, %<min%> or %<max%>");
c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, 0);
return list;
}
@@ -17396,6 +17404,11 @@ c_parser_omp_clause_reduction (c_parser *parser, enum omp_clause_code kind,
nl = c_parser_omp_variable_list (parser, clause_loc, kind, list);
for (c = nl; c != list; c = OMP_CLAUSE_CHAIN (c))
{
+ OMP_CLAUSE_REDUCTION_CODE (c) = code;
+ /* OpenACC does not require anything below. */
+ if (!is_omp)
+ continue;
+
tree d = OMP_CLAUSE_DECL (c), type;
if (TREE_CODE (d) != OMP_ARRAY_SECTION)
type = TREE_TYPE (d);
@@ -17419,7 +17432,6 @@ c_parser_omp_clause_reduction (c_parser *parser, enum omp_clause_code kind,
}
while (TREE_CODE (type) == ARRAY_TYPE)
type = TREE_TYPE (type);
- OMP_CLAUSE_REDUCTION_CODE (c) = code;
if (task)
OMP_CLAUSE_REDUCTION_TASK (c) = 1;
else if (inscan)
@@ -14952,6 +14952,68 @@ c_oacc_check_attachments (tree c)
return false;
}
+static bool
+c_oacc_reduction_defined_type_p (enum tree_code reduction_code, tree t)
+{
+ if (TREE_CODE (t) == INTEGER_TYPE)
+ return true;
+
+ if (FLOAT_TYPE_P (t) || TREE_CODE (t) == COMPLEX_TYPE)
+ switch (reduction_code)
+ {
+ case PLUS_EXPR:
+ case MULT_EXPR:
+ case MINUS_EXPR:
+ case TRUTH_ANDIF_EXPR:
+ case TRUTH_ORIF_EXPR:
+ return true;
+ case MIN_EXPR:
+ case MAX_EXPR:
+ return TREE_CODE (t) != COMPLEX_TYPE;
+ case BIT_AND_EXPR:
+ case BIT_XOR_EXPR:
+ case BIT_IOR_EXPR:
+ return false;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (TREE_CODE (t) == ARRAY_TYPE)
+ return c_oacc_reduction_defined_type_p (reduction_code, TREE_TYPE (t));
+
+ if (TREE_CODE (t) == RECORD_TYPE)
+ {
+ for (tree fld = TYPE_FIELDS (t); fld; fld = TREE_CHAIN (fld))
+ if (TREE_CODE (fld) == FIELD_DECL
+ && !c_oacc_reduction_defined_type_p (reduction_code,
+ TREE_TYPE (fld)))
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+static const char *
+c_oacc_reduction_code_name (enum tree_code reduction_code)
+{
+ switch (reduction_code)
+ {
+ case PLUS_EXPR: return "+";
+ case MULT_EXPR: return "*";
+ case MINUS_EXPR: return "-";
+ case TRUTH_ANDIF_EXPR: return "&&";
+ case TRUTH_ORIF_EXPR: return "||";
+ case MIN_EXPR: return "min";
+ case MAX_EXPR: return "max";
+ case BIT_AND_EXPR: return "&";
+ case BIT_XOR_EXPR: return "^";
+ case BIT_IOR_EXPR: return "|";
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* For all elements of CLAUSES, validate them against their constraints.
Remove any elements from the list that are invalid. */
@@ -15144,9 +15206,22 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
break;
}
}
- if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) == NULL_TREE
- && (FLOAT_TYPE_P (type)
- || TREE_CODE (type) == COMPLEX_TYPE))
+ if (ort == C_ORT_ACC)
+ {
+ enum tree_code r_code = OMP_CLAUSE_REDUCTION_CODE (c);
+ if (!c_oacc_reduction_defined_type_p (r_code, TREE_TYPE (t)))
+ {
+ const char *r_name = c_oacc_reduction_code_name (r_code);
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "%qE has invalid type for %<reduction(%s)%>",
+ t, r_name);
+ remove = true;
+ break;
+ }
+ }
+ else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) == NULL_TREE
+ && (FLOAT_TYPE_P (type)
+ || TREE_CODE (type) == COMPLEX_TYPE))
{
enum tree_code r_code = OMP_CLAUSE_REDUCTION_CODE (c);
const char *r_name = NULL;
@@ -296,6 +296,109 @@ gcn_reduction_update (location_t loc, gimple_stmt_iterator *gsi,
tree type = TREE_TYPE (var);
tree size = TYPE_SIZE (type);
+ if (!VAR_P (ptr))
+ {
+ tree t = make_ssa_name (TREE_TYPE (ptr));
+ gimple_seq seq = NULL;
+ gimplify_assign (t, ptr, &seq);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ ptr = t;
+ }
+
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ gimple *g;
+ gimple_seq seq = NULL;
+ tree array_type = TREE_TYPE (var);
+ tree array_elem_type = TREE_TYPE (array_type);
+ tree max_index = TYPE_MAX_VALUE (TYPE_DOMAIN (array_type));
+
+ tree init_index = make_ssa_name (TREE_TYPE (max_index));
+ tree loop_index = make_ssa_name (TREE_TYPE (max_index));
+ tree update_index = make_ssa_name (TREE_TYPE (max_index));
+
+ g = gimple_build_assign (init_index,
+ build_int_cst (TREE_TYPE (init_index), 0));
+ gimple_seq_add_stmt (&seq, g);
+ gimple *init_end = gimple_seq_last (seq);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+ basic_block init_bb = gsi_bb (*gsi);
+ edge init_edge = split_block (init_bb, init_end);
+ basic_block loop_bb = init_edge->dest;
+ /* Reset the iterator. */
+ *gsi = gsi_for_stmt (gsi_stmt (*gsi));
+
+ seq = NULL;
+ g = gimple_build_assign (update_index, PLUS_EXPR, loop_index,
+ build_int_cst (TREE_TYPE (loop_index), 1));
+ gimple_seq_add_stmt (&seq, g);
+
+ g = gimple_build_cond (LE_EXPR, update_index, max_index, NULL, NULL);
+ gimple_seq_add_stmt (&seq, g);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+ edge post_edge = split_block (loop_bb, g);
+ basic_block post_bb = post_edge->dest;
+ loop_bb = post_edge->src;
+ /* Reset the iterator. */
+ *gsi = gsi_for_stmt (gsi_stmt (*gsi));
+
+ /* Place where we insert reduction code below. */
+ gimple_stmt_iterator reduction_code_gsi = gsi_start_bb (loop_bb);
+
+ post_edge->flags ^= EDGE_FALSE_VALUE | EDGE_FALLTHRU;
+ post_edge->probability = profile_probability::even ();
+ edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_TRUE_VALUE);
+ loop_edge->probability = profile_probability::even ();
+ set_immediate_dominator (CDI_DOMINATORS, loop_bb, init_bb);
+ set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
+ class loop *new_loop = alloc_loop ();
+ new_loop->header = loop_bb;
+ new_loop->latch = loop_bb;
+ add_loop (new_loop, loop_bb->loop_father);
+
+ gphi *phi = create_phi_node (loop_index, loop_bb);
+ add_phi_arg (phi, init_index, init_edge, loc);
+ add_phi_arg (phi, update_index, loop_edge, loc);
+
+ tree var_aref = build4 (ARRAY_REF, array_elem_type,
+ var, loop_index, NULL_TREE, NULL_TREE);
+
+ tree red_array = build_simple_mem_ref (ptr);
+ tree red_array_type = TREE_TYPE (red_array);
+ tree red_array_elem_type
+ = build_qualified_type (TREE_TYPE (red_array_type),
+ TYPE_QUALS (red_array_type));
+ tree ptr_aref = build4 (ARRAY_REF, red_array_elem_type,
+ red_array, loop_index,
+ NULL_TREE, NULL_TREE);
+
+ gcn_reduction_update (loc, &reduction_code_gsi,
+ build_fold_addr_expr (ptr_aref),
+ var_aref, op);
+ return build_simple_mem_ref (ptr);
+ }
+ else if (TREE_CODE (type) == RECORD_TYPE)
+ {
+ for (tree fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld))
+ if (TREE_CODE (fld) == FIELD_DECL)
+ {
+ tree var_fld_ref = build3 (COMPONENT_REF, TREE_TYPE (fld),
+ var, fld, NULL);
+ tree ptr_ref = build_simple_mem_ref (ptr);
+ tree ptr_fld_type
+ = build_qualified_type (TREE_TYPE (fld),
+ TYPE_QUALS (TREE_TYPE (ptr_ref)));
+ tree ptr_fld_ref = build3 (COMPONENT_REF, ptr_fld_type,
+ ptr_ref, fld, NULL);
+ gcn_reduction_update (loc, gsi,
+ build_fold_addr_expr (ptr_fld_ref),
+ var_fld_ref, op);
+ }
+ return build_simple_mem_ref (ptr);
+ }
+
if (size == TYPE_SIZE (unsigned_type_node)
|| size == TYPE_SIZE (long_long_unsigned_type_node))
return gcn_lockless_update (loc, gsi, ptr, var, op);
@@ -359,11 +462,14 @@ gcn_goacc_reduction_setup (gcall *call)
gimplify_assign (decl, var, &seq);
}
- if (lhs)
+ if (lhs
+ && TREE_CODE (TREE_TYPE (var)) != ARRAY_TYPE
+ && TREE_CODE (TREE_TYPE (var)) != RECORD_TYPE)
gimplify_assign (lhs, var, &seq);
pop_gimplify_context (NULL);
- gsi_replace_with_seq (&gsi, seq, true);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* Expand IFN_GOACC_REDUCTION_INIT. */
@@ -395,7 +501,8 @@ gcn_goacc_reduction_init (gcall *call)
gimplify_assign (lhs, init, &seq);
pop_gimplify_context (NULL);
- gsi_replace_with_seq (&gsi, seq, true);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* Expand IFN_GOACC_REDUCTION_FINI. */
@@ -439,11 +546,13 @@ gcn_goacc_reduction_fini (gcall *call)
r = gcn_reduction_update (gimple_location (call), &gsi, accum, var, op);
}
- if (lhs)
+ if (lhs
+ && TREE_CODE (TREE_TYPE (r)) != ARRAY_TYPE
+ && TREE_CODE (TREE_TYPE (r)) != RECORD_TYPE)
gimplify_assign (lhs, r, &seq);
pop_gimplify_context (NULL);
-
- gsi_replace_with_seq (&gsi, seq, true);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* Expand IFN_GOACC_REDUCTION_TEARDOWN. */
@@ -483,8 +592,8 @@ gcn_goacc_reduction_teardown (gcall *call)
gimplify_assign (lhs, unshare_expr (var), &seq);
pop_gimplify_context (NULL);
-
- gsi_replace_with_seq (&gsi, seq, true);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* Implement TARGET_GOACC_REDUCTION.
@@ -2029,19 +2029,15 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
break;
case E_V2SImode:
{
- rtx src0 = gen_rtx_SUBREG (SImode, src, 0);
- rtx src1 = gen_rtx_SUBREG (SImode, src, 4);
- rtx dst0 = gen_rtx_SUBREG (SImode, dst, 0);
- rtx dst1 = gen_rtx_SUBREG (SImode, dst, 4);
rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (SImode);
start_sequence ();
- emit_insn (gen_movsi (tmp0, src0));
- emit_insn (gen_movsi (tmp1, src1));
+ emit_insn (gen_vec_extractv2sisi (tmp0, src, GEN_INT (0)));
+ emit_insn (gen_vec_extractv2sisi (tmp1, src, GEN_INT (1)));
emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
- emit_insn (gen_movsi (dst0, tmp0));
- emit_insn (gen_movsi (dst1, tmp1));
+ emit_insn (gen_vec_setv2si (dst, tmp0, GEN_INT (0)));
+ emit_insn (gen_vec_setv2si (dst, tmp1, GEN_INT (1)));
res = get_insns ();
end_sequence ();
}
@@ -6711,11 +6707,9 @@ nvptx_get_shared_red_addr (tree type, tree offset, bool vector)
enum nvptx_builtins addr_dim = NVPTX_BUILTIN_WORKER_ADDR;
if (vector)
addr_dim = NVPTX_BUILTIN_VECTOR_ADDR;
- machine_mode mode = TYPE_MODE (type);
tree fndecl = nvptx_builtin_decl (addr_dim, true);
- tree size = build_int_cst (unsigned_type_node, GET_MODE_SIZE (mode));
- tree align = build_int_cst (unsigned_type_node,
- GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
+ tree size = TYPE_SIZE_UNIT (type);
+ tree align = build_int_cst (unsigned_type_node, TYPE_ALIGN_UNIT (type));
tree call = build_call_expr (fndecl, 3, offset, size, align);
return fold_convert (build_pointer_type (type), call);
@@ -7032,6 +7026,109 @@ nvptx_reduction_update (location_t loc, gimple_stmt_iterator *gsi,
tree type = TREE_TYPE (var);
tree size = TYPE_SIZE (type);
+ if (!VAR_P (ptr))
+ {
+ tree t = make_ssa_name (TREE_TYPE (ptr));
+ gimple_seq seq = NULL;
+ gimplify_assign (t, ptr, &seq);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ ptr = t;
+ }
+
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ gimple *g;
+ gimple_seq seq = NULL;
+ tree array_type = TREE_TYPE (var);
+ tree array_elem_type = TREE_TYPE (array_type);
+ tree max_index = TYPE_MAX_VALUE (TYPE_DOMAIN (array_type));
+
+ tree init_index = make_ssa_name (TREE_TYPE (max_index));
+ tree loop_index = make_ssa_name (TREE_TYPE (max_index));
+ tree update_index = make_ssa_name (TREE_TYPE (max_index));
+
+ g = gimple_build_assign (init_index,
+ build_int_cst (TREE_TYPE (init_index), 0));
+ gimple_seq_add_stmt (&seq, g);
+ gimple *init_end = gimple_seq_last (seq);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+ basic_block init_bb = gsi_bb (*gsi);
+ edge init_edge = split_block (init_bb, init_end);
+ basic_block loop_bb = init_edge->dest;
+ /* Reset the iterator. */
+ *gsi = gsi_for_stmt (gsi_stmt (*gsi));
+
+ seq = NULL;
+ g = gimple_build_assign (update_index, PLUS_EXPR, loop_index,
+ build_int_cst (TREE_TYPE (loop_index), 1));
+ gimple_seq_add_stmt (&seq, g);
+
+ g = gimple_build_cond (LE_EXPR, update_index, max_index, NULL, NULL);
+ gimple_seq_add_stmt (&seq, g);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+ edge post_edge = split_block (loop_bb, g);
+ basic_block post_bb = post_edge->dest;
+ loop_bb = post_edge->src;
+ /* Reset the iterator. */
+ *gsi = gsi_for_stmt (gsi_stmt (*gsi));
+
+ /* Place where we insert reduction code below. */
+ gimple_stmt_iterator reduction_code_gsi = gsi_start_bb (loop_bb);
+
+ post_edge->flags ^= EDGE_FALSE_VALUE | EDGE_FALLTHRU;
+ post_edge->probability = profile_probability::even ();
+ edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_TRUE_VALUE);
+ loop_edge->probability = profile_probability::even ();
+ set_immediate_dominator (CDI_DOMINATORS, loop_bb, init_bb);
+ set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
+ class loop *new_loop = alloc_loop ();
+ new_loop->header = loop_bb;
+ new_loop->latch = loop_bb;
+ add_loop (new_loop, loop_bb->loop_father);
+
+ gphi *phi = create_phi_node (loop_index, loop_bb);
+ add_phi_arg (phi, init_index, init_edge, loc);
+ add_phi_arg (phi, update_index, loop_edge, loc);
+
+ tree var_aref = build4 (ARRAY_REF, array_elem_type,
+ var, loop_index, NULL_TREE, NULL_TREE);
+
+ tree red_array = build_simple_mem_ref (ptr);
+ tree red_array_type = TREE_TYPE (red_array);
+ tree red_array_elem_type
+ = build_qualified_type (TREE_TYPE (red_array_type),
+ TYPE_QUALS (red_array_type));
+ tree ptr_aref = build4 (ARRAY_REF, red_array_elem_type,
+ red_array, loop_index,
+ NULL_TREE, NULL_TREE);
+
+ nvptx_reduction_update (loc, &reduction_code_gsi,
+ build_fold_addr_expr (ptr_aref),
+ var_aref, op, level);
+ return build_simple_mem_ref (ptr);
+ }
+ else if (TREE_CODE (type) == RECORD_TYPE)
+ {
+ for (tree fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld))
+ if (TREE_CODE (fld) == FIELD_DECL)
+ {
+ tree var_fld_ref = build3 (COMPONENT_REF, TREE_TYPE (fld),
+ var, fld, NULL);
+ tree ptr_ref = build_simple_mem_ref (ptr);
+ tree ptr_fld_type
+ = build_qualified_type (TREE_TYPE (fld),
+ TYPE_QUALS (TREE_TYPE (ptr_ref)));
+ tree ptr_fld_ref = build3 (COMPONENT_REF, ptr_fld_type,
+ ptr_ref, fld, NULL);
+ nvptx_reduction_update (loc, gsi,
+ build_fold_addr_expr (ptr_fld_ref),
+ var_fld_ref, op, level);
+ }
+ return build_simple_mem_ref (ptr);
+ }
+
if (size == TYPE_SIZE (unsigned_type_node)
|| size == TYPE_SIZE (long_long_unsigned_type_node))
return nvptx_lockless_update (loc, gsi, ptr, var, op);
@@ -7062,7 +7159,10 @@ nvptx_goacc_reduction_setup (gcall *call, offload_attrs *oa)
}
if (level == GOMP_DIM_WORKER
- || (level == GOMP_DIM_VECTOR && oa->vector_length > PTX_WARP_SIZE))
+ || (level == GOMP_DIM_VECTOR
+ && (oa->vector_length > PTX_WARP_SIZE
+ || TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
+ || TREE_CODE (TREE_TYPE (var)) == RECORD_TYPE)))
{
/* Store incoming value to worker reduction buffer. */
tree offset = gimple_call_arg (call, 5);
@@ -7076,11 +7176,14 @@ nvptx_goacc_reduction_setup (gcall *call, offload_attrs *oa)
gimplify_assign (ref, var, &seq);
}
- if (lhs)
+ if (lhs
+ && TREE_CODE (TREE_TYPE (var)) != ARRAY_TYPE
+ && TREE_CODE (TREE_TYPE (var)) != RECORD_TYPE)
gimplify_assign (lhs, var, &seq);
pop_gimplify_context (NULL);
- gsi_replace_with_seq (&gsi, seq, true);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* NVPTX implementation of GOACC_REDUCTION_INIT. */
@@ -7100,7 +7203,9 @@ nvptx_goacc_reduction_init (gcall *call, offload_attrs *oa)
push_gimplify_context (true);
- if (level == GOMP_DIM_VECTOR && oa->vector_length == PTX_WARP_SIZE)
+ if (level == GOMP_DIM_VECTOR && oa->vector_length == PTX_WARP_SIZE
+ && TREE_CODE (TREE_TYPE (var)) != ARRAY_TYPE
+ && TREE_CODE (TREE_TYPE (var)) != RECORD_TYPE)
{
/* Initialize vector-non-zeroes to INIT_VAL (OP). */
tree tid = make_ssa_name (integer_type_node);
@@ -7165,7 +7270,8 @@ nvptx_goacc_reduction_init (gcall *call, offload_attrs *oa)
}
pop_gimplify_context (NULL);
- gsi_replace_with_seq (&gsi, seq, true);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* NVPTX implementation of GOACC_REDUCTION_FINI. */
@@ -7185,7 +7291,9 @@ nvptx_goacc_reduction_fini (gcall *call, offload_attrs *oa)
push_gimplify_context (true);
- if (level == GOMP_DIM_VECTOR && oa->vector_length == PTX_WARP_SIZE)
+ if (level == GOMP_DIM_VECTOR && oa->vector_length == PTX_WARP_SIZE
+ && TREE_CODE (TREE_TYPE (var)) != ARRAY_TYPE
+ && TREE_CODE (TREE_TYPE (var)) != RECORD_TYPE)
{
/* Emit binary shuffle tree. TODO. Emit this as an actual loop,
but that requires a method of emitting a unified jump at the
@@ -7232,11 +7340,14 @@ nvptx_goacc_reduction_fini (gcall *call, offload_attrs *oa)
}
}
- if (lhs)
+ if (lhs
+ && TREE_CODE (TREE_TYPE (r)) != ARRAY_TYPE
+ && TREE_CODE (TREE_TYPE (r)) != RECORD_TYPE)
gimplify_assign (lhs, r, &seq);
- pop_gimplify_context (NULL);
- gsi_replace_with_seq (&gsi, seq, true);
+ pop_gimplify_context (NULL);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* NVPTX implementation of GOACC_REDUCTION_TEARDOWN. */
@@ -7252,7 +7363,10 @@ nvptx_goacc_reduction_teardown (gcall *call, offload_attrs *oa)
push_gimplify_context (true);
if (level == GOMP_DIM_WORKER
- || (level == GOMP_DIM_VECTOR && oa->vector_length > PTX_WARP_SIZE))
+ || (level == GOMP_DIM_VECTOR
+ && (oa->vector_length > PTX_WARP_SIZE
+ || TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
+ || TREE_CODE (TREE_TYPE (var)) == RECORD_TYPE)))
{
/* Read the worker reduction buffer. */
tree offset = gimple_call_arg (call, 5);
@@ -7275,11 +7389,11 @@ nvptx_goacc_reduction_teardown (gcall *call, offload_attrs *oa)
}
if (lhs)
- gimplify_assign (lhs, var, &seq);
+ gimplify_assign (lhs, unshare_expr (var), &seq);
pop_gimplify_context (NULL);
-
- gsi_replace_with_seq (&gsi, seq, true);
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
}
/* NVPTX reduction expander. */
@@ -40323,6 +40323,12 @@ cp_parser_omp_clause_reduction (cp_parser *parser, enum omp_clause_code kind,
code = TRUTH_ANDIF_EXPR;
else if (id == ovl_op_identifier (false, TRUTH_ORIF_EXPR))
code = TRUTH_ORIF_EXPR;
+ if (code == ERROR_MARK && !is_omp)
+ {
+ cp_parser_error (parser, "expected %<+%>, %<*%>, %<-%>, %<&%>, "
+ "%<^%>, %<|%>, %<&&%>, %<||%>, %<min%> or %<max%>");
+ goto resync_fail;
+ }
id = omp_reduction_id (code, id, NULL_TREE);
tree scope = parser->scope;
if (scope)
@@ -40350,6 +40356,10 @@ cp_parser_omp_clause_reduction (cp_parser *parser, enum omp_clause_code kind,
for (c = nlist; c != list; c = OMP_CLAUSE_CHAIN (c))
{
OMP_CLAUSE_REDUCTION_CODE (c) = code;
+ /* OpenACC does not require anything below. */
+ if (!is_omp)
+ continue;
+
if (task)
OMP_CLAUSE_REDUCTION_TASK (c) = 1;
else if (inscan)
@@ -6508,6 +6508,69 @@ cp_check_omp_declare_reduction (tree udr)
return true;
}
+
+static bool
+cp_oacc_reduction_defined_type_p (enum tree_code reduction_code, tree t)
+{
+ if (TREE_CODE (t) == INTEGER_TYPE)
+ return true;
+
+ if (FLOAT_TYPE_P (t) || TREE_CODE (t) == COMPLEX_TYPE)
+ switch (reduction_code)
+ {
+ case PLUS_EXPR:
+ case MULT_EXPR:
+ case MINUS_EXPR:
+ case TRUTH_ANDIF_EXPR:
+ case TRUTH_ORIF_EXPR:
+ return true;
+ case MIN_EXPR:
+ case MAX_EXPR:
+ return TREE_CODE (t) != COMPLEX_TYPE;
+ case BIT_AND_EXPR:
+ case BIT_XOR_EXPR:
+ case BIT_IOR_EXPR:
+ return false;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (TREE_CODE (t) == ARRAY_TYPE)
+ return cp_oacc_reduction_defined_type_p (reduction_code, TREE_TYPE (t));
+
+ if (TREE_CODE (t) == RECORD_TYPE)
+ {
+ for (tree fld = TYPE_FIELDS (t); fld; fld = TREE_CHAIN (fld))
+ if (TREE_CODE (fld) == FIELD_DECL
+ && !cp_oacc_reduction_defined_type_p (reduction_code,
+ TREE_TYPE (fld)))
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
+static const char *
+cp_oacc_reduction_code_name (enum tree_code reduction_code)
+{
+ switch (reduction_code)
+ {
+ case PLUS_EXPR: return "+";
+ case MULT_EXPR: return "*";
+ case MINUS_EXPR: return "-";
+ case TRUTH_ANDIF_EXPR: return "&&";
+ case TRUTH_ORIF_EXPR: return "||";
+ case MIN_EXPR: return "min";
+ case MAX_EXPR: return "max";
+ case BIT_AND_EXPR: return "&";
+ case BIT_XOR_EXPR: return "^";
+ case BIT_IOR_EXPR: return "|";
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Helper function of finish_omp_clauses. Clone STMT as if we were making
an inline call. But, remap
the OMP_DECL1 VAR_DECL (omp_out resp. omp_orig) to PLACEHOLDER
@@ -6552,7 +6615,8 @@ find_omp_placeholder_r (tree *tp, int *, void *data)
Return true if there is some error and the clause should be removed. */
static bool
-finish_omp_reduction_clause (tree c, bool *need_default_ctor, bool *need_dtor)
+finish_omp_reduction_clause (tree c, bool *need_default_ctor, bool *need_dtor,
+ enum c_omp_region_type ort)
{
tree t = OMP_CLAUSE_DECL (c);
bool predefined = false;
@@ -6653,6 +6717,20 @@ finish_omp_reduction_clause (tree c, bool *need_default_ctor, bool *need_dtor)
return false;
}
+ if (ort == C_ORT_ACC)
+ {
+ enum tree_code r_code = OMP_CLAUSE_REDUCTION_CODE (c);
+ if (!cp_oacc_reduction_defined_type_p (r_code, TREE_TYPE (t)))
+ {
+ const char *r_name = cp_oacc_reduction_code_name (r_code);
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "%qE has invalid type for %<reduction(%s)%>",
+ t, r_name);
+ return true;
+ }
+ return false;
+ }
+
tree id = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
type = TYPE_MAIN_VARIANT (type);
@@ -9366,7 +9444,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
&& !VAR_P (t) && TREE_CODE (t) != PARM_DECL)
break;
if (finish_omp_reduction_clause (c, &need_default_ctor,
- &need_dtor))
+ &need_dtor, ort))
remove = true;
else
t = OMP_CLAUSE_DECL (c);
@@ -12154,6 +12154,38 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
false);
goto do_add;
case OMP_CLAUSE_REDUCTION:
+ if (region_type & ORT_ACC)
+ {
+ decl = OMP_CLAUSE_DECL (c);
+ if (TREE_CODE (decl) == MEM_REF
+ && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
+ {
+ /* Peel away MEM_REF to get at base array VAR_DECL. */
+ tree addr = TREE_OPERAND (decl, 0);
+ if (TREE_CODE (addr) == POINTER_PLUS_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ else if (INDIRECT_REF_P (addr))
+ addr = TREE_OPERAND (addr, 0);
+ if (!TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (addr))))
+ {
+ sorry_at (OMP_CLAUSE_LOCATION (c),
+ "array in reduction must be of constant size");
+ remove = true;
+ break;
+ }
+ tree min = TYPE_MIN_VALUE (TYPE_DOMAIN (TREE_TYPE (decl)));
+ tree max = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (decl)));
+ if (!TREE_CONSTANT (min) || !TREE_CONSTANT (max))
+ {
+ sorry_at (OMP_CLAUSE_LOCATION (c),
+ "array section bounds in reduction must be constant");
+ remove = true;
+ break;
+ }
+ }
+ }
if (OMP_CLAUSE_REDUCTION_TASK (c))
{
if (region_type == ORT_WORKSHARE || code == OMP_SCOPE)
@@ -14455,6 +14487,17 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
if (ctx->region_type == ORT_ACC_PARALLEL
|| ctx->region_type == ORT_ACC_SERIAL)
{
+ if (TREE_CODE (decl) == MEM_REF
+ && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
+ {
+ tree addr = TREE_OPERAND (decl, 0);
+ if (TREE_CODE (addr) == POINTER_PLUS_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ if (TREE_CODE (addr) == ADDR_EXPR
+ && DECL_P (TREE_OPERAND (addr, 0)))
+ decl = TREE_OPERAND (addr, 0);
+ }
+
n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl);
if (n->value & (GOVD_PRIVATE | GOVD_FIRSTPRIVATE))
{
@@ -1712,10 +1712,14 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
}
gcc_assert (!splay_tree_lookup (ctx->field_map,
(splay_tree_key) decl));
+ tree ptr_type = ptr_type_node;
+ if (TREE_CODE (decl) == ARRAY_REF)
+ ptr_type
+ = build_pointer_type (TREE_TYPE (TREE_OPERAND (decl, 0)));
tree field
= build_decl (OMP_CLAUSE_LOCATION (c),
- FIELD_DECL, NULL_TREE, ptr_type_node);
- SET_DECL_ALIGN (field, TYPE_ALIGN (ptr_type_node));
+ FIELD_DECL, NULL_TREE, ptr_type);
+ SET_DECL_ALIGN (field, TYPE_ALIGN (ptr_type));
insert_field_into_struct (ctx->record_type, field);
splay_tree_insert (ctx->field_map, (splay_tree_key) decl,
(splay_tree_value) field);
@@ -4420,6 +4424,27 @@ maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
tree
omp_reduction_init_op (location_t loc, enum tree_code op, tree type)
{
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ vec<constructor_elt, va_gc> *v = NULL;
+ HOST_WIDE_INT min = tree_to_shwi (TYPE_MIN_VALUE (TYPE_DOMAIN (type)));
+ HOST_WIDE_INT max = tree_to_shwi (TYPE_MAX_VALUE (TYPE_DOMAIN (type)));
+ tree t = omp_reduction_init_op (loc, op, TREE_TYPE (type));
+ for (HOST_WIDE_INT i = min; i <= max; i++)
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), t);
+ return build_constructor (type, v);
+ }
+ else if (TREE_CODE (type) == RECORD_TYPE)
+ {
+ vec<constructor_elt, va_gc> *v = NULL;
+ for (tree fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld))
+ if (TREE_CODE (fld) == FIELD_DECL)
+ CONSTRUCTOR_APPEND_ELT (v, fld,
+ omp_reduction_init_op (loc, op,
+ TREE_TYPE (fld)));
+ return build_constructor (type, v);
+ }
+
switch (op)
{
case PLUS_EXPR:
@@ -5339,6 +5364,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
x = create_tmp_var_raw (type, name);
gimple_add_tmp_var (x);
TREE_ADDRESSABLE (x) = 1;
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+ OMP_CLAUSE_REDUCTION_PRIVATE_EXPR (c) = x;
x = build_fold_addr_expr_loc (clause_loc, x);
}
else
@@ -7368,6 +7395,71 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p,
gimple_seq_add_seq (stmt_list, post_stmt_list);
}
+
+static tree
+oacc_array_reduction_bias (location_t loc, tree reduction_clause,
+ omp_context *ctx, tree map_clause,
+ omp_context *outer)
+{
+ tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (reduction_clause), 1);
+ tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (reduction_clause), 0);
+ if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR)
+ {
+ tree b = TREE_OPERAND (orig_var, 1);
+ b = maybe_lookup_decl (b, ctx);
+ if (b == NULL)
+ {
+ b = TREE_OPERAND (orig_var, 1);
+ b = maybe_lookup_decl_in_outer_ctx (b, ctx);
+ }
+ if (integer_zerop (bias))
+ bias = b;
+ else
+ {
+ bias = fold_convert_loc (loc, TREE_TYPE (b), bias);
+ bias = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (b), b, bias);
+ }
+ orig_var = TREE_OPERAND (orig_var, 0);
+ }
+
+ if (TREE_CODE (orig_var) == INDIRECT_REF
+ || TREE_CODE (orig_var) == ADDR_EXPR)
+ orig_var = TREE_OPERAND (orig_var, 0);
+
+ tree map_decl = OMP_CLAUSE_DECL (map_clause);
+ tree next = OMP_CLAUSE_CHAIN (map_clause);
+
+ tree orig_bias = integer_zero_node;
+ if (TREE_CODE (map_decl) == ARRAY_REF)
+ {
+ if (next && OMP_CLAUSE_CODE (next) == OMP_CLAUSE_MAP
+ && OMP_CLAUSE_DECL (next) == orig_var
+ && OMP_CLAUSE_MAP_KIND (next) == GOMP_MAP_FIRSTPRIVATE_POINTER)
+ {
+ orig_bias = OMP_CLAUSE_SIZE (next);
+ if (DECL_P (orig_bias))
+ orig_bias = lookup_decl (orig_bias, outer);
+ orig_bias = fold_convert_loc (loc, pointer_sized_int_node,
+ orig_bias);
+ }
+ else
+ {
+ tree idx = fold_convert_loc (loc, pointer_sized_int_node,
+ TREE_OPERAND (map_decl, 1));
+ orig_bias = fold_build2_loc (loc, MULT_EXPR,
+ pointer_sized_int_node, idx,
+ TYPE_SIZE_UNIT (TREE_TYPE (map_decl)));
+ gcc_assert (TREE_CONSTANT (orig_bias));
+ }
+ }
+
+ bias = fold_convert_loc (loc, pointer_sized_int_node, bias);
+ tree adjusted_bias = fold_build2_loc (loc, MINUS_EXPR,
+ pointer_sized_int_node,
+ bias, orig_bias);
+ return adjusted_bias;
+}
+
/* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL
(which might be a placeholder). INNER is true if this is an inner
axis of a multi-axis loop. FORK and JOIN are (optional) fork and
@@ -7406,11 +7498,29 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
gcc_checking_assert (!is_oacc_kernels_decomposed_part (ctx));
tree orig = OMP_CLAUSE_DECL (c);
+ tree addr = NULL_TREE, map_clause = NULL_TREE;
+ if (TREE_CODE (orig) == MEM_REF)
+ {
+ /* Peel away MEM_REF to get at base array VAR_DECL. */
+ addr = TREE_OPERAND (orig, 0);
+ if (TREE_CODE (addr) == POINTER_PLUS_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ else if (INDIRECT_REF_P (addr))
+ addr = TREE_OPERAND (addr, 0);
+ orig = addr;
+ gcc_assert (!is_variable_sized (addr));
+ }
+
tree var = maybe_lookup_decl (orig, ctx);
tree ref_to_res = NULL_TREE;
tree incoming, outgoing, v1, v2, v3;
bool is_private = false;
+ if (OMP_CLAUSE_REDUCTION_PRIVATE_EXPR (c))
+ var = OMP_CLAUSE_REDUCTION_PRIVATE_EXPR (c);
+
enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
if (rcode == MINUS_EXPR)
rcode = PLUS_EXPR;
@@ -7458,11 +7568,62 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
outer = probe;
for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
- if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
- && orig == OMP_CLAUSE_DECL (cls))
+ if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION)
{
- incoming = outgoing = lookup_decl (orig, probe);
- goto has_outer_reduction;
+ tree outer_decl = OMP_CLAUSE_DECL (cls);
+ if (TREE_CODE (outer_decl) == MEM_REF
+ && TREE_CODE (TREE_TYPE (orig)) == ARRAY_TYPE)
+ {
+ tree addr = TREE_OPERAND (outer_decl, 0);
+ if (TREE_CODE (addr) == POINTER_PLUS_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ if (TREE_CODE (addr) == ADDR_EXPR)
+ addr = TREE_OPERAND (addr, 0);
+ else if (INDIRECT_REF_P (addr))
+ addr = TREE_OPERAND (addr, 0);
+ outer_decl = addr;
+ }
+ if (orig == outer_decl)
+ {
+ incoming = outgoing = lookup_decl (orig, probe);
+
+ if (TREE_CODE (TREE_TYPE (orig)) == ARRAY_TYPE)
+ {
+ tree m = gimple_omp_target_clauses (probe->stmt);
+ for (; m; m = OMP_CLAUSE_CHAIN (m))
+ if (OMP_CLAUSE_CODE (m) == OMP_CLAUSE_MAP)
+ {
+ tree md = OMP_CLAUSE_DECL (m);
+ if (outer_decl == md
+ || (TREE_CODE (md) == ARRAY_REF
+ && (TREE_OPERAND (md, 0)
+ == outer_decl)))
+ break;
+ }
+ tree adjusted_bias
+ = oacc_array_reduction_bias (loc, c, ctx, m,
+ outer);
+
+ tree addr = build_fold_addr_expr (incoming);
+ if (!TREE_CONSTANT (adjusted_bias))
+ {
+ tree x = create_tmp_var (TREE_TYPE (addr));
+ addr = fold_build2_loc
+ (loc, POINTER_PLUS_EXPR, TREE_TYPE (addr),
+ addr, adjusted_bias);
+ gimplify_assign (x, addr, &before_fork);
+ addr = x;
+ adjusted_bias = integer_zero_node;
+ }
+ tree ref = fold_build2_loc
+ (loc, MEM_REF,
+ TREE_TYPE (OMP_CLAUSE_DECL (c)),
+ addr, fold_convert_loc (loc, ptr_type_node,
+ adjusted_bias));
+ incoming = outgoing = ref;
+ }
+ goto has_outer_reduction;
+ }
}
else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
|| OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
@@ -7476,6 +7637,26 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
do_lookup:
/* This is the outermost construct with this reduction,
see if there's a mapping for it. */
+ if (TREE_CODE (TREE_TYPE (orig)) == ARRAY_TYPE
+ && gimple_code (outer->stmt) == GIMPLE_OMP_TARGET)
+ /* Recover original MEM_REF in OMP_CLAUSE_DECL from array
+ VAR_DECL discovered above. This is due to field lookup
+ key based on whole MEM_REF earlier during scanning. */
+ for (tree c = gimple_omp_target_clauses (outer->stmt); c;
+ c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
+ {
+ tree decl = OMP_CLAUSE_DECL (c);
+ if (orig == decl
+ || (TREE_CODE (decl) == ARRAY_REF
+ && TREE_OPERAND (decl, 0) == orig))
+ {
+ orig = decl;
+ map_clause = c;
+ break;
+ }
+ }
+
if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
&& maybe_lookup_field (orig, outer) && !is_private)
{
@@ -7486,6 +7667,35 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
tree type = TREE_TYPE (var);
if (POINTER_TYPE_P (type))
type = TREE_TYPE (type);
+ else if (TREE_CODE (type) == ARRAY_TYPE
+ && OMP_CLAUSE_REDUCTION_PRIVATE_EXPR (c))
+ {
+ gcc_assert
+ (POINTER_TYPE_P (TREE_TYPE (ref_to_res))
+ && (POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (ref_to_res)))
+ || (TREE_CODE (TREE_TYPE (TREE_TYPE (ref_to_res)))
+ == ARRAY_TYPE)));
+ type = TREE_TYPE (OMP_CLAUSE_REDUCTION_PRIVATE_EXPR (c));
+ tree ptr_type = build_pointer_type (type);
+ tree x = create_tmp_var (ptr_type);
+
+ tree adjusted_bias
+ = oacc_array_reduction_bias (loc, c, ctx, map_clause,
+ outer);
+ if (!integer_zerop (adjusted_bias))
+ {
+ tree y = fold_convert_loc (loc, ptr_type_node,
+ ref_to_res);
+ y = fold_build2_loc (loc, POINTER_PLUS_EXPR,
+ ptr_type_node,
+ y, adjusted_bias);
+ ref_to_res = y;
+ }
+ gimplify_assign (x, fold_convert_loc (loc, ptr_type,
+ ref_to_res),
+ &before_fork);
+ ref_to_res = x;
+ }
outgoing = var;
incoming = omp_reduction_init_op (loc, rcode, type);
@@ -7547,10 +7757,10 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
variable-sized type. */
fixed_size_mode mode
= as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (var)));
- unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
+ unsigned align = TYPE_ALIGN_UNIT (TREE_TYPE (var));
offset = (offset + align - 1) & ~(align - 1);
tree off = build_int_cst (sizetype, offset);
- offset += GET_MODE_SIZE (mode);
+ offset += tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (var)));
if (!init_code)
{
@@ -991,7 +991,8 @@ worker_single_copy (basic_block from, basic_block to,
hash_set<tree> *worker_partitioned_uses,
tree record_type, record_field_map_t *record_field_map,
unsigned HOST_WIDE_INT placement,
- bool isolate_broadcasts, bool has_gang_private_write)
+ bool isolate_broadcasts, bool has_gang_private_write,
+ hash_set<tree> *array_reduction_base_vars)
{
/* If we only have virtual defs, we'll have no record type, but we still want
to emit single_copy_start and (particularly) single_copy_end to act as
@@ -1015,6 +1016,37 @@ worker_single_copy (basic_block from, basic_block to,
edge e = split_block (to, gsi_stmt (gsi));
basic_block barrier_block = e->dest;
+ gimple_seq local_asgns = NULL;
+
+ /* For accesses of variables used in array reductions, instead of
+ propagating the value for the main thread to all other worker threads
+ (which doesn't make sense as a reduction private var), move the defs
+ of such SSA_NAMEs to before the copy block and leave them alone (each
+ thread should access their own local copy). */
+ for (gimple_stmt_iterator i = gsi_after_labels (from); !gsi_end_p (i);)
+ {
+ gimple *stmt = gsi_stmt (i);
+ if (gimple_assign_single_p (stmt)
+ && def_escapes_block->contains (gimple_assign_lhs (stmt))
+ && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
+ {
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ if (TREE_CODE (rhs) == ADDR_EXPR)
+ {
+ rhs = TREE_OPERAND (rhs, 0);
+ if (local_var_based_p (rhs)
+ && array_reduction_base_vars->contains (lhs))
+ {
+ gsi_remove (&i, false);
+ gimple_seq_add_stmt (&local_asgns, stmt);
+ continue;
+ }
+ }
+ }
+ gsi_next (&i);
+ }
+
gimple_stmt_iterator start = gsi_after_labels (from);
tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START);
@@ -1029,6 +1061,9 @@ worker_single_copy (basic_block from, basic_block to,
gsi_insert_before (&start, call, GSI_NEW_STMT);
update_stmt (call);
+ if (local_asgns)
+ gsi_insert_seq_before (&start, local_asgns, GSI_SAME_STMT);
+
/* The shared-memory range for this block overflowed. Add a barrier before
the GOACC_single_copy_start call. */
if (isolate_broadcasts)
@@ -1128,6 +1163,22 @@ worker_single_copy (basic_block from, basic_block to,
if (gimple_nop_p (def_stmt))
continue;
+ /* For accesses of variables used in array reductions, skip creating
+ the barrier phi. Each thread runs same def_stmt to access
+ local variable, there is no main/worker divide here. */
+ if (gimple_assign_single_p (def_stmt))
+ {
+ tree lhs = gimple_assign_lhs (def_stmt);
+ tree rhs = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs) == ADDR_EXPR)
+ {
+ rhs = TREE_OPERAND (rhs, 0);
+ if (local_var_based_p (rhs)
+ && array_reduction_base_vars->contains (lhs))
+ continue;
+ }
+ }
+
/* The barrier phi takes one result from the actual work of the
block we're neutering, and the other result is constant zero of
the same type. */
@@ -1248,7 +1299,8 @@ neuter_worker_single (parallel_g *par, unsigned outer_mask,
hash_set<tree> *partitioned_var_uses,
record_field_map_t *record_field_map,
blk_offset_map_t *blk_offset_map,
- bitmap writes_gang_private)
+ bitmap writes_gang_private,
+ hash_set<tree> *array_reduction_base_vars)
{
unsigned mask = outer_mask | par->mask;
@@ -1398,7 +1450,8 @@ neuter_worker_single (parallel_g *par, unsigned outer_mask,
&worker_partitioned_uses, record_type,
record_field_map,
offset, !range_allocated,
- has_gang_private_write);
+ has_gang_private_write,
+ array_reduction_base_vars);
}
else
worker_single_simple (block, block, &def_escapes_block);
@@ -1436,11 +1489,13 @@ neuter_worker_single (parallel_g *par, unsigned outer_mask,
if (par->inner)
neuter_worker_single (par->inner, mask, worker_single, vector_single,
prop_set, partitioned_var_uses, record_field_map,
- blk_offset_map, writes_gang_private);
+ blk_offset_map, writes_gang_private,
+ array_reduction_base_vars);
if (par->next)
neuter_worker_single (par->next, outer_mask, worker_single, vector_single,
prop_set, partitioned_var_uses, record_field_map,
- blk_offset_map, writes_gang_private);
+ blk_offset_map, writes_gang_private,
+ array_reduction_base_vars);
}
static void
@@ -1587,7 +1642,8 @@ merge_ranges (splay_tree accum, splay_tree sp)
static void
oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo,
- unsigned HOST_WIDE_INT bounds_hi)
+ unsigned HOST_WIDE_INT bounds_hi,
+ hash_set<tree> *array_reduction_base_vars)
{
bb_stmt_map_t bb_stmt_map;
auto_bitmap worker_single, vector_single;
@@ -1792,7 +1848,8 @@ oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo,
neuter_worker_single (par, mask, worker_single, vector_single, &prop_set,
&partitioned_var_uses, &record_field_map,
- &blk_offset_map, writes_gang_private);
+ &blk_offset_map, writes_gang_private,
+ array_reduction_base_vars);
record_field_map.empty ();
@@ -1831,6 +1888,9 @@ execute_omp_oacc_neuter_broadcast ()
private_size[i] = 0;
}
+ /* Set of base variables referencing arrays used in array reductions. */
+ hash_set<tree> array_reduction_base_vars;
+
/* Calculate shared memory size required for reduction variables and
gang-private memory for this offloaded function. */
basic_block bb;
@@ -1869,6 +1929,15 @@ execute_omp_oacc_neuter_broadcast ()
+ tree_to_uhwi (TYPE_SIZE_UNIT (var_type)));
reduction_size[level]
= MAX (reduction_size[level], limit);
+
+ tree lhs = gimple_get_lhs (call);
+ if (TREE_CODE (lhs) == MEM_REF
+ && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME
+ && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
+ {
+ tree addr = TREE_OPERAND (lhs, 0);
+ array_reduction_base_vars.add (addr);
+ }
}
}
break;
@@ -1917,7 +1986,7 @@ execute_omp_oacc_neuter_broadcast ()
/* Perform worker partitioning unless we know 'num_workers(1)'. */
if (dims[GOMP_DIM_WORKER] != 1)
- oacc_do_neutering (bounds_lo, bounds_hi);
+ oacc_do_neutering (bounds_lo, bounds_hi, &array_reduction_base_vars);
return 0;
}
@@ -1819,7 +1819,7 @@ default_goacc_reduction (gcall *call)
/* Copy VAR to LHS, if there is an LHS. */
if (lhs)
- gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
+ gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, unshare_expr (var)));
gsi_replace_with_seq (&gsi, seq, true);
}
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+/* float array reductions. */
+
+#define n 1000
+
+int
+main(void)
+{
+ int i, j;
+ float result[n], array[n];
+ int lresult[n];
+
+ /* '+' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (+:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] += array[i];
+
+ /* '*' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (*:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] *= array[i];
+
+ /* 'max' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (max:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] = result[j] > array[i] ? result[j] : array[i];
+
+ /* 'min' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (min:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] = result[j] < array[i] ? result[j] : array[i];
+
+ /* '&&' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] && (result[j] > array[i]);
+
+ /* '||' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] || (result[j] > array[i]);
+
+ return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 6 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
new file mode 100644
@@ -0,0 +1,60 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+/* double array reductions. */
+
+#define n 1000
+
+int
+main(void)
+{
+ int i, j;
+ double result[n], array[n];
+ int lresult[n];
+
+ /* '+' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (+:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] += array[i];
+
+ /* '*' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (*:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] *= array[i];
+
+ /* 'max' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (max:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] = result[j] > array[i] ? result[j] : array[i];
+
+ /* 'min' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (min:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] = result[j] < array[i] ? result[j] : array[i];
+
+ /* '&&' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] && (result[j] > array[i]);
+
+ /* '||' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] || (result[j] > array[i]);
+
+ return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 6 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+/* complex array reductions. */
+
+#define n 1000
+
+int
+main(void)
+{
+ int i, j;
+ __complex__ double result[n], array[n];
+ int lresult[n];
+
+ /* '+' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (+:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] += array[i];
+
+ /* '*' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (*:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] *= array[i];
+
+ /* '&&' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] && (__real__(result[j]) > __real__(array[i]));
+
+ /* '||' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (||:lresult[j])
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] || (__real__(result[j]) > __real__(array[i]));
+
+ return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+/* struct reductions. */
+
+typedef struct { int x, y; } int_pair;
+typedef struct { float m, n; } flt_pair;
+typedef struct
+{
+ int i;
+ double d;
+ float f;
+ int a[4];
+ int_pair ip;
+ flt_pair fp;
+} rectype;
+
+#define n 1000
+
+int
+main(void)
+{
+ int i;
+ rectype result, array[n];
+
+ /* '+' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (+:result)
+ for (i = 0; i < n; i++)
+ {
+ result.i += array[i].i;
+ result.f += array[i].f;
+ result.ip.x += array[i].ip.x;
+ result.ip.y += array[i].ip.y;
+ }
+
+ /* '*' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (*:result)
+ for (i = 0; i < n; i++)
+ {
+ result.i *= array[i].i;
+ result.f *= array[i].f;
+ result.ip.x *= array[i].ip.x;
+ result.ip.y *= array[i].ip.y;
+ }
+
+ return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
+
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-compile } */
+#include <stdlib.h>
+
+int foo (int n)
+{
+ int x[5][5];
+ int y[n];
+ int *z = (int *) malloc (5 * sizeof (int));
+
+ #pragma acc parallel
+ {
+ #pragma acc loop reduction(+:x)
+ for (int i = 0; i < 5; i++) ;
+ #pragma acc loop reduction(+:y) /* { dg-message "sorry, unimplemented: array in reduction must be of constant size" } */
+ for (int i = 0; i < 5; i++) ;
+
+ #pragma acc loop reduction(+:x[2:1][0:5])
+ for (int i = 0; i < 5; i++) ;
+ #pragma acc loop reduction(+:x[0:5][2:1]) /* { dg-error "array section is not contiguous in 'reduction' clause" } */
+ for (int i = 0; i < 5; i++) ;
+
+ #pragma acc loop reduction(+:y[0:5]) /* { dg-message "sorry, unimplemented: array in reduction must be of constant size" } */
+ for (int i = 0; i < 5; i++) ;
+
+ #pragma acc loop reduction(+:z[0:5])
+ for (int i = 0; i < 5; i++) ;
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,81 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+/* Integer array reductions. */
+
+#define n 1000
+
+int
+main(void)
+{
+ int i, j;
+ int result[n], array[n];
+ int lresult[n];
+
+ /* '+' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (+:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] += array[i];
+
+ /* '*' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (*:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] *= array[i];
+
+ /* 'max' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (max:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] = result[j] > array[i] ? result[j] : array[i];
+
+ /* 'min' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (min:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] = result[j] < array[i] ? result[j] : array[i];
+
+ /* '&' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (&:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] &= array[i];
+
+ /* '|' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (|:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] |= array[i];
+
+ /* '^' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (^:result)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ result[j] ^= array[i];
+
+ /* '&&' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] && (result[j] > array[i]);
+
+ /* '||' reductions. */
+#pragma acc parallel
+#pragma acc loop gang worker vector reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ lresult[j] = lresult[j] || (result[j] > array[i]);
+
+ return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 9 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
@@ -244,7 +244,7 @@ unsigned const char omp_clause_num_ops[] =
1, /* OMP_CLAUSE_SHARED */
1, /* OMP_CLAUSE_FIRSTPRIVATE */
2, /* OMP_CLAUSE_LASTPRIVATE */
- 5, /* OMP_CLAUSE_REDUCTION */
+ 6, /* OMP_CLAUSE_REDUCTION */
5, /* OMP_CLAUSE_TASK_REDUCTION */
5, /* OMP_CLAUSE_IN_REDUCTION */
1, /* OMP_CLAUSE_COPYIN */
@@ -1913,6 +1913,10 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \
OMP_CLAUSE_IN_REDUCTION), 4)
+/* Used for carrying the private copy used for reductions, currently used for
+ OpenACC array reductions. */
+#define OMP_CLAUSE_REDUCTION_PRIVATE_EXPR(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_REDUCTION), 5)
/* True if a REDUCTION clause may reference the original list item (omp_orig)
in its OMP_CLAUSE_REDUCTION_{,GIMPLE_}INIT. */
new file mode 100644
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+
+/* Array reductions. */
+
+#include <stdlib.h>
+#include "reduction.h"
+
+#define ng 8
+#define nw 4
+#define vl 32
+
+#define N 10
+
+#define check_reduction_array_op_all(type, opr, init, b) \
+ check_reduction_xxx_xx_all(array, op, type, opr, init, b)
+#define check_reduction_arraysec_op_all(type, opr, init, b) \
+ check_reduction_xxx_xx_all(arraysec, op, type, opr, init, b)
+#define check_reduction_array_macro_all(type, opr, init, b) \
+ check_reduction_xxx_xx_all(array, macro, type, opr, init, b)
+#define check_reduction_arraysec_macro_all(type, opr, init, b) \
+ check_reduction_xxx_xx_all(arraysec, macro, type, opr, init, b)
+
+int
+main (void)
+{
+ const int n = 100;
+ int ints[n];
+ float flts[n];
+ double dbls[n];
+ int cmp_val = 5;
+
+ for (int i = 0; i < n; i++)
+ {
+ ints[i] = i + 1;
+ flts[i] = i + 1;
+ dbls[i] = i + 1;
+ }
+
+ check_reduction_array_op_all (int, +, 0, ints[i]);
+ check_reduction_array_op_all (int, *, 1, ints[i]);
+ check_reduction_array_op_all (int, &, -1, ints[i]);
+ check_reduction_array_op_all (int, |, 0, ints[i]);
+ check_reduction_array_op_all (int, ^, 0, ints[i]);
+ check_reduction_array_op_all (int, &&, 1, (cmp_val > ints[i]));
+ check_reduction_array_op_all (int, ||, 0, (cmp_val > ints[i]));
+ check_reduction_array_macro_all (int, min, n + 1, ints[i]);
+ check_reduction_array_macro_all (int, max, -1, ints[i]);
+
+ check_reduction_array_op_all (float, +, 0, flts[i]);
+ check_reduction_array_op_all (float, *, 1, flts[i]);
+ check_reduction_array_macro_all (float, min, n + 1, flts[i]);
+ check_reduction_array_macro_all (float, max, -1, flts[i]);
+
+ check_reduction_arraysec_op_all (int, +, 0, ints[i]);
+ check_reduction_arraysec_op_all (float, *, 1, flts[i]);
+ check_reduction_arraysec_macro_all (double, min, n + 1, dbls[i]);
+ check_reduction_arraysec_macro_all (double, max, -1, dbls[i]);
+
+ check_reduction_array_op_all (double, +, 0, dbls[i]);
+#if 0
+ /* Currently fails due to unclear issue, presumably unrelated to reduction
+ mechanics. Avoiding for now. */
+ check_reduction_array_op_all (double, *, 1.0, dbls[i]);
+#endif
+ check_reduction_array_macro_all (double, min, n + 1, dbls[i]);
+ check_reduction_array_macro_all (double, max, -1, dbls[i]);
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+
+/* More array reduction tests, different combinations of parallel/loop
+ construct, implied/explicit copy clauses, and subarrays. */
+
+#define ARRAY_BODY(ARRAY, MIN, LEN) \
+ for (int i = 0; i < 10; i++) \
+ for (int j = MIN; j < MIN + LEN; j++) \
+ ARRAY[j] += 1;
+
+int main (void)
+{
+ int o[6] = { 5, 1, 1, 5, 9, 9 };
+ int a[6];
+
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ a[i] = o[i];
+
+ #pragma acc parallel
+ #pragma acc loop reduction(+:a[1:2])
+ ARRAY_BODY (a, 1, 2)
+ ARRAY_BODY (o, 1, 2)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel copy(a[3:2])
+ #pragma acc loop reduction(+:a[3:2])
+ ARRAY_BODY (a, 3, 2)
+ ARRAY_BODY (o, 3, 2)
+ for (int i = 0; i < 6; i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel copy(a)
+ #pragma acc loop reduction(+:a[0:5])
+ ARRAY_BODY (a, 0, 5)
+ ARRAY_BODY (o, 0, 5)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel
+ #pragma acc loop reduction(+:a)
+ ARRAY_BODY (a, 4, 1)
+ ARRAY_BODY (o, 4, 1)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel copy(a)
+ #pragma acc loop reduction(+:a)
+ ARRAY_BODY (a, 3, 3)
+ ARRAY_BODY (o, 3, 3)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel loop reduction(+:a)
+ ARRAY_BODY (a, 1, 3)
+ ARRAY_BODY (o, 1, 3)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel loop reduction(+:a[2:3])
+ ARRAY_BODY (a, 2, 3)
+ ARRAY_BODY (o, 2, 3)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel reduction(+:a)
+ ARRAY_BODY (a, 3, 2)
+ ARRAY_BODY (o, 3, 2)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel reduction(+:a[1:2])
+ ARRAY_BODY (a, 1, 2)
+ ARRAY_BODY (o, 1, 2)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,87 @@
+/* { dg-do run } */
+
+/* Same as reduction-arrays-2.c test, but with non-constant subarray
+ base indexes. */
+
+#define ARRAY_BODY(ARRAY, MIN, LEN) \
+ for (int i = 0; i < 10; i++) \
+ for (int j = MIN; j < MIN + LEN; j++) \
+ ARRAY[j] += 1;
+
+int zero = 0;
+int one = 1;
+int two = 2;
+int three = 3;
+int four = 4;
+
+int main (void)
+{
+ int o[6] = { 5, 1, 1, 5, 9, 9 };
+ int a[6];
+
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ a[i] = o[i];
+
+ #pragma acc parallel
+ #pragma acc loop reduction(+:a[one:2])
+ ARRAY_BODY (a, one, 2)
+ ARRAY_BODY (o, one, 2)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel copy(a[three:2])
+ #pragma acc loop reduction(+:a[three:2])
+ ARRAY_BODY (a, three, 2)
+ ARRAY_BODY (o, three, 2)
+ for (int i = 0; i < 6; i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel copy(a)
+ #pragma acc loop reduction(+:a[zero:5])
+ ARRAY_BODY (a, zero, 5)
+ ARRAY_BODY (o, zero, 5)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel
+ #pragma acc loop reduction(+:a)
+ ARRAY_BODY (a, four, 1)
+ ARRAY_BODY (o, four, 1)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel copy(a)
+ #pragma acc loop reduction(+:a)
+ ARRAY_BODY (a, three, 3)
+ ARRAY_BODY (o, three, 3)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel loop reduction(+:a)
+ ARRAY_BODY (a, one, 3)
+ ARRAY_BODY (o, one, 3)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel loop reduction(+:a[two:3])
+ ARRAY_BODY (a, two, 3)
+ ARRAY_BODY (o, two, 3)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ #pragma acc parallel reduction(+:a[one:2])
+ ARRAY_BODY (a, one, 2)
+ ARRAY_BODY (o, one, 2)
+ for (int i = 0; i < sizeof (a) / sizeof (int); i++)
+ if (a[i] != o[i])
+ __builtin_abort ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,121 @@
+/* { dg-do run } */
+
+/* Struct reductions. */
+
+#include <stdlib.h>
+#include "reduction.h"
+
+#define ng 8
+#define nw 4
+#define vl 32
+
+#define N 10
+
+typedef struct { int x, y; } int_pair;
+typedef struct { float m, n; } flt_pair;
+typedef struct
+{
+ int i;
+ double d;
+ float f;
+ int a[N];
+ int_pair ip;
+ flt_pair fp;
+} rectype;
+
+static void
+init_struct (rectype *rec, int val)
+{
+ rec->i = val;
+ rec->d = (double) val;
+ rec->f = (float) val;
+ for (int i = 0; i < N; i++)
+ rec->a[i] = val;
+ rec->ip.x = val;
+ rec->ip.y = val;
+ rec->fp.m = (float) val;
+ rec->fp.n = (float) val;
+}
+
+static int
+struct_eq (rectype *a, rectype *b)
+{
+ if (a->i != b->i || a->d != b->d
+ || a->f != b->f
+ || a->ip.x != b->ip.x
+ || a->ip.y != b->ip.y
+ || a->fp.m != b->fp.m
+ || a->fp.n != b->fp.n)
+ return 0;
+
+ for (int i = 0; i < N; i++)
+ if (a->a[i] != b->a[i])
+ return 0;
+ return 1;
+}
+
+#define check_reduction_struct_xx(type, op, init, b, gwv_par, gwv_loop, apply) \
+ { \
+ type res, vres; \
+ init_struct (&res, init); \
+ DO_PRAGMA (acc parallel gwv_par copy(res)) \
+ DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
+ for (int i = 0; i < n; i++) \
+ { \
+ res.i = apply (op, res.i, b); \
+ res.d = apply (op, res.d, b); \
+ res.f = apply (op, res.f, b); \
+ for (int j = 0; j < N; j++) \
+ res.a[j] = apply (op, res.a[j], b); \
+ res.ip.x = apply (op, res.ip.x, b); \
+ res.ip.y = apply (op, res.ip.y, b); \
+ res.fp.m = apply (op, res.fp.m, b); \
+ res.fp.n = apply (op, res.fp.n, b); \
+ } \
+ \
+ init_struct (&vres, init); \
+ for (int i = 0; i < n; i++) \
+ { \
+ vres.i = apply (op, vres.i, b); \
+ vres.d = apply (op, vres.d, b); \
+ vres.f = apply (op, vres.f, b); \
+ for (int j = 0; j < N; j++) \
+ vres.a[j] = apply (op, vres.a[j], b); \
+ vres.ip.x = apply (op, vres.ip.x, b); \
+ vres.ip.y = apply (op, vres.ip.y, b); \
+ vres.fp.m = apply (op, vres.fp.m, b); \
+ vres.fp.n = apply (op, vres.fp.n, b); \
+ } \
+ \
+ if (!struct_eq (&res, &vres)) \
+ __builtin_abort (); \
+ }
+
+#define operator_apply(op, a, b) (a op b)
+#define check_reduction_struct_op(type, op, init, b, gwv_par, gwv_loop) \
+ check_reduction_struct_xx(type, op, init, b, gwv_par, gwv_loop, operator_apply)
+
+#define function_apply(op, a, b) (op (a, b))
+#define check_reduction_struct_macro(type, op, init, b, gwv_par, gwv_loop) \
+ check_reduction_struct_xx(type, op, init, b, gwv_par, gwv_loop, function_apply)
+
+#define check_reduction_struct_op_all(type, opr, init, b) \
+ check_reduction_xxx_xx_all (struct, op, type, opr, init, b)
+#define check_reduction_struct_macro_all(type, opr, init, b) \
+ check_reduction_xxx_xx_all (struct, macro, type, opr, init, b)
+
+int
+main (void)
+{
+ const int n = 10;
+ int ints[n];
+
+ for (int i = 0; i < n; i++)
+ ints[i] = i + 1;
+
+ check_reduction_struct_op_all (rectype, +, 0, ints[i]);
+ check_reduction_struct_op_all (rectype, *, 1, ints[i]);
+ check_reduction_struct_macro_all (rectype, min, n + 1, ints[i]);
+ check_reduction_struct_macro_all (rectype, max, -1, ints[i]);
+ return 0;
+}
@@ -37,6 +37,58 @@ DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
abort (); \
}
+#define check_reduction_array_xx(type, var, var_in_clause, op, init, b, \
+ gwv_par, gwv_loop, apply) \
+ { \
+ type var[N], var ## _check[N]; \
+ for (int i = 0; i < N; i++) \
+ var[i] = var ## _check[i] = (init); \
+ DO_PRAGMA (acc parallel gwv_par copy (var_in_clause)) \
+ DO_PRAGMA (acc loop gwv_loop reduction (op: var_in_clause)) \
+ for (int i = 0; i < n; i++) \
+ for (int j = 0; j < N; j++) \
+ var[j] = apply (op, var[j], (b)); \
+ \
+ for (int i = 0; i < n; i++) \
+ for (int j = 0; j < N; j++) \
+ var ## _check[j] = apply (op, var ## _check[j], (b)); \
+ \
+ for (int j = 0; j < N; j++) \
+ if (var[j] != var ## _check[j]) \
+ abort (); \
+ }
+
+#define operator_apply(op, a, b) (a op b)
+#define check_reduction_array_op(type, op, init, b, gwv_par, gwv_loop) \
+ check_reduction_array_xx (type, v, v, op, init, b, gwv_par, gwv_loop, \
+ operator_apply)
+#define check_reduction_arraysec_op(type, op, init, b, gwv_par, gwv_loop) \
+ check_reduction_array_xx (type, v, v[:N], op, init, b, gwv_par, gwv_loop, \
+ operator_apply)
+
+
+#define function_apply(op, a, b) (op (a, b))
+#define check_reduction_array_macro(type, op, init, b, gwv_par, gwv_loop)\
+ check_reduction_array_xx (type, v, v, op, init, b, gwv_par, gwv_loop, \
+ function_apply)
+#define check_reduction_arraysec_macro(type, op, init, b, gwv_par, gwv_loop)\
+ check_reduction_array_xx (type, v, v[:N], op, init, b, gwv_par, gwv_loop, \
+ function_apply)
+
+#define check_reduction_xxx_xx_all(tclass, form, type, op, init, b) \
+ check_reduction_ ## tclass ## _ ## form (type, op, init, b, num_gangs (ng), gang); \
+ check_reduction_ ## tclass ## _ ## form (type, op, init, b, num_workers (nw), worker); \
+ check_reduction_ ## tclass ## _ ## form (type, op, init, b, vector_length (vl), vector); \
+ check_reduction_ ## tclass ## _ ## form (type, op, init, b, \
+ num_gangs (ng) num_workers (nw), gang worker); \
+ check_reduction_ ## tclass ## _ ## form (type, op, init, b, \
+ num_gangs (ng) vector_length (vl), gang vector); \
+ check_reduction_ ## tclass ## _ ## form (type, op, init, b, \
+ num_workers (nw) vector_length (vl), worker vector); \
+ check_reduction_ ## tclass ## _ ## form (type, op, init, b, \
+ num_gangs (ng) num_workers (nw) vector_length (vl), \
+ gang worker vector);
+
#define max(a, b) (((a) > (b)) ? (a) : (b))
#define min(a, b) (((a) < (b)) ? (a) : (b))