@@ -2991,6 +2991,10 @@ gfc_omp_deep_map_kind_p (tree clause)
case GOMP_MAP_FIRSTPRIVATE_POINTER:
case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
case GOMP_MAP_ATTACH_DETACH:
+ case GOMP_MAP_TO_GRID:
+ case GOMP_MAP_FROM_GRID:
+ case GOMP_MAP_GRID_DIM:
+ case GOMP_MAP_GRID_STRIDE:
break;
default:
gcc_unreachable ();
@@ -4258,6 +4262,346 @@ get_symbol_rooted_namelist (hash_map<gfc_symbol *,
return NULL;
}
+/* We build an "un-Fortrannish" array-of-arrays here to pass our calculated
+ array bounds to the middle end for strided/rectangular OpenMP
+ "target update" operations. */
+
+static tree
+gfc_trans_omp_arrayshape_type (tree type, vec<tree> *dims)
+{
+ gcc_assert (dims->length () > 0);
+
+ for (int i = dims->length () - 1; i >= 0; i--)
+ {
+ tree dim = fold_convert (sizetype, (*dims)[i]);
+ /* We need the index of the last element, not the array size. */
+ dim = size_binop (MINUS_EXPR, dim, size_one_node);
+ tree idxtype = build_index_type (dim);
+ type = build_array_type (type, idxtype);
+ }
+
+ return type;
+}
+
+/* Emit code to find the greatest common divisor of two (gfc_array_index_type)
+ trees to BLOCK. This is Euclid's algorithm:
+
+ int
+ gcd (int a, int b)
+ {
+ int tmp;
+ while (b != 0)
+ {
+ tmp = b;
+ b = a % b;
+ a = tmp;
+ }
+ return a;
+ }
+*/
+
+static void
+gfc_omp_calculate_gcd (stmtblock_t *block, tree dst, tree a, tree b)
+{
+ tree tmp = gfc_create_var (gfc_array_index_type, "tmp");
+ tree avar = gfc_create_var (gfc_array_index_type, "a");
+ tree bvar = gfc_create_var (gfc_array_index_type, "b");
+
+ /* Avoid clobbering the inputs. */
+ gfc_add_modify (block, avar, a);
+ gfc_add_modify (block, bvar, b);
+
+ tree label_cond = gfc_build_label_decl (NULL_TREE);
+ tree label_loop = gfc_build_label_decl (NULL_TREE);
+ TREE_USED (label_cond) = 1;
+ TREE_USED (label_loop) = 1;
+
+ gfc_add_expr_to_block (block, build1_v (GOTO_EXPR, label_cond));
+ gfc_add_expr_to_block (block, build1_v (LABEL_EXPR, label_loop));
+
+ gfc_add_modify (block, tmp, bvar);
+ gfc_add_modify (block, bvar,
+ fold_build2_loc (input_location, TRUNC_MOD_EXPR,
+ gfc_array_index_type, avar, bvar));
+ gfc_add_modify (block, avar, tmp);
+
+ gfc_add_expr_to_block (block, build1_v (LABEL_EXPR, label_cond));
+
+ tmp = fold_build2_loc (input_location, NE_EXPR, boolean_type_node, bvar,
+ gfc_index_zero_node);
+ tmp = build3_v (COND_EXPR, tmp, build1_v (GOTO_EXPR, label_loop),
+ build_empty_stmt (input_location));
+ gfc_add_expr_to_block (block, tmp);
+
+ gfc_add_modify (block, dst, avar);
+}
+
+/* Convert a gfortran array descriptor -- specifically the per-dimension
+ strides -- into a form that can be easily translated to a noncontiguous
+ OpenMP "target update" operation. We emit a specialized version of a
+ function like this inline:
+
+ void
+ gfc_desc_to_omp_noncontig_array (int *dims, int *strides, int ndims,
+ int *fstrides, int *flo, int *fhi)
+ {
+ dims[ndims - 1] = (fhi[ndims - 1] - flo[ndims - 1] + 1);
+ strides[0] = fstrides[0];
+ if (ndims > 1)
+ strides[ndims - 1] = 1;
+ if (ndims == 2)
+ dims[0] = fstrides[1];
+ else if (ndims > 2)
+ {
+ int grains[ndims - 2];
+
+ int bigger_grain = fstrides[ndims - 1];
+ for (int i = ndims - 2; i > 0; i--)
+ {
+ grains[i - 1] = gcd (fstrides[i], bigger_grain);
+ bigger_grain = grains[i - 1];
+ }
+
+ int volume = 1;
+ for (int i = 0; i < ndims - 2; i++)
+ {
+ int g = grains[i];
+ dims[i] = g / volume;
+ strides[i + 1] = fstrides[i + 1] / g;
+ volume = volume * dims[i];
+ }
+ dims[ndims - 2] = fstrides[ndims - 1] / volume;
+ }
+ }
+
+ where "fstrides", "flo" and "fhi" represent the stride, low bound and upper
+ bound of each dimension in the Fortran array descriptor.
+
+ (Note that most of the complexity only applies to arrays with more than two
+ dimensions, and the final stanza won't be emitted at all for lower-ranked
+ arrays.)
+
+ The output of the algorithm is a set of dimensions dims[] = { D, C, B, A }
+ "as if" the array was declared like this (in C):
+
+ type arr[A][B][C][D];
+
+ i.e. with the innermost dimension first, and a set of strides (in terms of
+ the step size along each dimension, without previous dimensions multiplied
+ in).
+
+ As an example, if we have an array:
+
+ allocate (arr(18,19,20,21,22))
+
+ and an update operation:
+
+ !$omp target update to(arr(1:3:2,1:4:3,1:5:4,1:6:5,1:7:6))
+
+ the strides we see in the Fortran array descriptor will be:
+
+ 2 54 1368 34200 861840
+
+ as given by:
+
+ 2 = stride0
+ 54 = dim0 * stride1
+ 1368 = dim0 * dim1 * stride2
+ 34200 = dim0 * dim1 * dim2 * stride3
+ 861840 = dim0 * dim1 * dim2 * dim3 * stride4
+
+ where "dimN" are the extents of each dimension (18,19,20,21,22), and
+ "strideN" are the strides in terms of step length along each dimension
+ (2,3,4,5,6).
+
+ We'd like to figure out what the original dimN, strideN were from the
+ Fortran array descriptor, but that's in general impossible. Furthermore,
+ if we naively divide a stride by the preceding stride, the result isn't
+ necessarily an integer, as for e.g.:
+
+ 861840/34200 = 25.2
+
+ What we can do though is figure out the greatest common divisor of
+ each stride and the preceding one, from the largest down, and use those as
+ units of granularity, i.e. the size of the corresponding dimension we pass
+ to the middle-end/runtime. The stepwise stride is then the number of
+ times each "grain" fits into the Fortran array descriptor stride.
+
+ The output of the algorithm will be:
+
+ dims strides
+ 18 2
+ 76 3
+ 5 1
+ 126 5
+ 9 1
+
+ These numbers work fine for libgomp target.c:omp_target_memcpy_rect_worker.
+ Multiplying them through also gives the same numbers as the source Fortran
+ array strides, i.e. dim0*dim1*dim2*stride3 (18*76*5*5) = 34200. */
+
+static void
+gfc_desc_to_omp_noncontig_array (stmtblock_t *block, tree *ompdimsp,
+ tree *ompstridesp, tree desc, int ndims)
+{
+ tree lastdim = build_int_cst (gfc_array_index_type, ndims - 1);
+ tree dimrange = build_index_type (lastdim);
+ tree ndimarrtype = build_array_type (gfc_array_index_type, dimrange);
+ tree ompdims = gfc_create_var (ndimarrtype, "dims");
+ tree ompstrides = gfc_create_var (ndimarrtype, "strides");
+
+ *ompdimsp = ompdims;
+ *ompstridesp = ompstrides;
+
+ /* dims[ndims - 1] = (fhi[ndims - 1] - flo[ndims - 1] + 1); */
+ tree lastlbound = gfc_conv_array_lbound (desc, ndims - 1);
+ tree lastubound = gfc_conv_array_ubound (desc, ndims - 1);
+ tree lastrange = fold_build2_loc (input_location, MINUS_EXPR,
+ gfc_array_index_type, lastubound,
+ lastlbound);
+ lastrange = fold_build2_loc (input_location, PLUS_EXPR, gfc_array_index_type,
+ lastrange, gfc_index_one_node);
+
+ gfc_add_modify (block,
+ gfc_build_array_ref (ompdims, lastdim, NULL_TREE, true),
+ lastrange);
+
+ /* strides[0] = fstrides[0]; */
+ tree stride0 = gfc_conv_array_stride (desc, 0);
+ gfc_add_modify (block,
+ gfc_build_array_ref (ompstrides, gfc_index_zero_node,
+ NULL_TREE, true),
+ stride0);
+
+ if (ndims > 1)
+ /* strides[ndims - 1] = 1; */
+ gfc_add_modify (block,
+ gfc_build_array_ref (ompstrides, lastdim, NULL_TREE, true),
+ gfc_index_one_node);
+
+ if (ndims == 2)
+ /* dims[0] = fstrides[1]; */
+ gfc_add_modify (block,
+ gfc_build_array_ref (ompdims, gfc_index_zero_node,
+ NULL_TREE, true),
+ gfc_conv_array_stride (desc, 1));
+ else if (ndims > 2)
+ {
+ /* int grains[ndims - 2]; */
+ tree lastgrain = build_int_cst (gfc_array_index_type, ndims - 3);
+ tree grainrange = build_index_type (lastgrain);
+ tree grainarrtype = build_array_type (gfc_array_index_type, grainrange);
+ tree grains = gfc_create_var (grainarrtype, "grains");
+
+ /* int bigger_grain = fstrides[ndims - 1]; */
+ tree bigger_grain = gfc_create_var (gfc_array_index_type, "bigger_grain");
+ tree fstridem1 = gfc_conv_array_stride (desc, ndims - 1);
+ gfc_add_modify (block, bigger_grain, fstridem1);
+
+ /*
+ for (int i = ndims - 2; i > 0; i--)
+ {
+ grains[i - 1] = gcd (fstrides[i], bigger_grain);
+ bigger_grain = grains[i - 1];
+ }
+ */
+ stmtblock_t loop_body;
+ gfc_init_block (&loop_body);
+
+ tree idx = gfc_create_var (gfc_array_index_type, "idx");
+
+ tree gcdtmp = gfc_create_var (gfc_array_index_type, "tmp");
+ gfc_omp_calculate_gcd (&loop_body, gcdtmp,
+ gfc_conv_descriptor_stride_get (desc, idx),
+ bigger_grain);
+ tree idxm1 = fold_build2_loc (input_location, MINUS_EXPR,
+ gfc_array_index_type, idx,
+ gfc_index_one_node);
+ gfc_add_modify (&loop_body,
+ gfc_build_array_ref (grains, idxm1, NULL_TREE, true),
+ gcdtmp);
+ gfc_add_modify (&loop_body, bigger_grain, gcdtmp);
+
+ gfc_simple_for_loop (block, idx,
+ build_int_cst (gfc_array_index_type, ndims - 2),
+ gfc_index_zero_node, GT_EXPR,
+ build_int_cst (gfc_array_index_type, -1),
+ gfc_finish_block (&loop_body));
+ /*
+ int volume = 1;
+ for (int i = 0; i < ndims - 2; i++)
+ {
+ int g = grains[i];
+ dims[i] = g / volume;
+ strides[i + 1] = fstrides[i + 1] / g;
+ volume = volume * dims[i];
+ }
+ */
+ tree volume = gfc_create_var (gfc_array_index_type, "volume");
+ gfc_add_modify (block, volume, gfc_index_one_node);
+
+ gfc_init_block (&loop_body);
+ tree grain = gfc_create_var (gfc_array_index_type, "grain");
+ gfc_add_modify (&loop_body, grain,
+ gfc_build_array_ref (grains, idx, NULL_TREE, true));
+ tree dims_i = gfc_build_array_ref (ompdims, idx, NULL_TREE, true);
+ gfc_add_modify (&loop_body, dims_i,
+ fold_build2_loc (input_location, TRUNC_DIV_EXPR,
+ gfc_array_index_type, grain, volume));
+ tree nidx = fold_build2_loc (input_location, PLUS_EXPR,
+ gfc_array_index_type, idx,
+ gfc_index_one_node);
+ tree strides_ni = gfc_build_array_ref (ompstrides, nidx, NULL_TREE, true);
+ tree fstrides_ni = gfc_conv_descriptor_stride_get (desc, nidx);
+ gfc_add_modify (&loop_body, strides_ni,
+ fold_build2_loc (input_location, TRUNC_DIV_EXPR,
+ gfc_array_index_type, fstrides_ni,
+ grain));
+ gfc_add_modify (&loop_body, volume,
+ fold_build2_loc (input_location, MULT_EXPR,
+ gfc_array_index_type, volume, dims_i));
+
+ gfc_simple_for_loop (block, idx, gfc_index_zero_node,
+ build_int_cst (gfc_array_index_type, ndims - 2),
+ LT_EXPR, gfc_index_one_node,
+ gfc_finish_block (&loop_body));
+
+ /* dims[ndims - 2] = fstrides[ndims - 1] / volume; */
+ tree dimsm2
+ = gfc_build_array_ref (ompdims,
+ build_int_cst (gfc_array_index_type, ndims - 2),
+ NULL_TREE, true);
+ gfc_add_modify (block, dimsm2,
+ fold_build2_loc (input_location, TRUNC_DIV_EXPR,
+ gfc_array_index_type, fstridem1,
+ volume));
+ }
+}
+
+/* Return TRUE if update for N can definitely be done with a single contiguous
+ transfer. If no or if we can't tell, return FALSE. */
+
+static bool
+gfc_omp_contiguous_update_p (gfc_omp_namelist *n)
+{
+ gfc_expr *contig_expr = n->expr;
+
+ if (!n->expr)
+ {
+ if (n->sym->attr.contiguous)
+ return true;
+
+ tree desc = gfc_trans_omp_variable (n->sym, false);
+ tree type = TREE_TYPE (desc);
+ if (!GFC_ARRAY_TYPE_P (type) && !GFC_DESCRIPTOR_TYPE_P (type))
+ return true;
+
+ contig_expr = gfc_lval_expr_from_sym (n->sym);
+ }
+
+ return gfc_is_simply_contiguous (contig_expr, false, true);
+}
+
static tree
gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
locus where, toc_directive cd = TOC_OPENMP)
@@ -5838,6 +6182,162 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
default:
gcc_unreachable ();
}
+
+ if ((list == OMP_LIST_TO || list == OMP_LIST_FROM)
+ && (!n->expr
+ || (n->expr
+ && n->expr->ref
+ && n->expr->ref->type == REF_ARRAY))
+ && !gfc_omp_contiguous_update_p (n))
+ {
+ int ndims;
+ gfc_se se;
+ gfc_init_se (&se, NULL);
+
+ tree desc, span = NULL_TREE;
+
+ if (n->expr)
+ {
+ if (n->expr->rank)
+ gfc_conv_expr_descriptor (&se, n->expr);
+ else
+ gfc_conv_expr (&se, n->expr);
+
+ desc = se.expr;
+ /* The span is the distance between two array elements
+ along the innermost dimension (there may be padding
+ or other data between elements, e.g. of a derived-type
+ array). */
+ span = gfc_get_array_span (desc, n->expr);
+ ndims = n->expr->ref->u.ar.dimen;
+ }
+ else
+ {
+ desc = gfc_trans_omp_variable (n->sym, false);
+ tree type = TREE_TYPE (desc);
+ if (GFC_DESCRIPTOR_TYPE_P (type))
+ span = gfc_conv_descriptor_span_get (desc);
+ ndims = GFC_TYPE_ARRAY_RANK (type);
+ }
+
+ gfc_add_block_to_block (block, &se.pre);
+
+ tree ompdims, ompstrides;
+
+ gfc_desc_to_omp_noncontig_array (block, &ompdims,
+ &ompstrides, desc, ndims);
+
+ tree type = TREE_TYPE (desc);
+ tree etype = gfc_get_element_type (type);
+ tree elsize = fold_convert (gfc_array_index_type,
+ size_in_bytes (etype));
+
+ tree ptr = gfc_conv_array_data (desc);
+ tree offset = gfc_conv_array_offset (desc);
+
+ if (!span)
+ /* The span is the element size. */
+ span = elsize;
+
+ tree node = build_omp_clause (input_location, OMP_CLAUSE_MAP);
+
+ switch (list)
+ {
+ case OMP_LIST_TO:
+ OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_TO_GRID);
+ break;
+ case OMP_LIST_FROM:
+ OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FROM_GRID);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (ptr)));
+ tree byte_offset = fold_convert (sizetype, offset);
+ byte_offset = size_binop (MULT_EXPR, byte_offset,
+ fold_convert (sizetype, span));
+ tree origin
+ = fold_build2_loc (input_location, POINTER_PLUS_EXPR,
+ TREE_TYPE (ptr), ptr, byte_offset);
+
+ OMP_CLAUSE_SIZE (node) = elsize;
+
+ omp_clauses = gfc_trans_add_clause (node, omp_clauses);
+
+ auto_vec<tree, 5> dims;
+
+ for (int r = 0; r < ndims; r++)
+ {
+ tree d
+ = gfc_build_array_ref (ompdims,
+ build_int_cst
+ (gfc_array_index_type, r),
+ NULL_TREE, true);
+ d = gfc_evaluate_now (d, block);
+ dims.safe_push (d);
+ }
+
+ for (int r = ndims - 1; r >= 0; r--)
+ {
+ tree stride_r, len_r, lowbound_r;
+
+ tree rcst = build_int_cst (gfc_array_index_type, r);
+
+ stride_r = gfc_build_array_ref (ompstrides, rcst,
+ NULL_TREE, true);
+ lowbound_r = gfc_conv_array_lbound (desc, r);
+ len_r
+ = fold_build2_loc (input_location, MINUS_EXPR,
+ gfc_array_index_type,
+ gfc_conv_array_ubound (desc, r),
+ lowbound_r);
+ len_r
+ = fold_build2_loc (input_location, PLUS_EXPR,
+ gfc_array_index_type,
+ len_r, gfc_index_one_node);
+
+ lowbound_r
+ = fold_build2_loc (input_location, MULT_EXPR,
+ gfc_array_index_type, lowbound_r,
+ stride_r);
+
+ stride_r = gfc_evaluate_now (stride_r, block);
+ lowbound_r = gfc_evaluate_now (lowbound_r, block);
+ len_r = gfc_evaluate_now (len_r, block);
+
+ tree dim = build_omp_clause (input_location,
+ OMP_CLAUSE_MAP);
+ OMP_CLAUSE_SET_MAP_KIND (dim, GOMP_MAP_GRID_DIM);
+ OMP_CLAUSE_DECL (dim) = lowbound_r;
+ OMP_CLAUSE_SIZE (dim) = len_r;
+
+ omp_clauses = gfc_trans_add_clause (dim, omp_clauses);
+
+ if (!integer_onep (stride_r)
+ || (r == 0 && !operand_equal_p (span, elsize)))
+ {
+ tree snode = build_omp_clause (input_location,
+ OMP_CLAUSE_MAP);
+ OMP_CLAUSE_SET_MAP_KIND (snode,
+ GOMP_MAP_GRID_STRIDE);
+ OMP_CLAUSE_DECL (snode) = stride_r;
+ if (r == 0 && !operand_equal_p (span, elsize))
+ OMP_CLAUSE_SIZE (snode) = span;
+ omp_clauses = gfc_trans_add_clause (snode,
+ omp_clauses);
+ }
+ }
+ origin = build_fold_indirect_ref (origin);
+ tree eltype = gfc_get_element_type (TREE_TYPE (desc));
+ tree arrtype
+ = gfc_trans_omp_arrayshape_type (eltype, &dims);
+ OMP_CLAUSE_DECL (node)
+ = build1_loc (input_location, VIEW_CONVERT_EXPR,
+ arrtype, origin);
+ continue;
+ }
+
tree node = build_omp_clause (input_location, clause_code);
if (n->expr == NULL
|| (n->expr->ref->type == REF_ARRAY
@@ -14601,6 +14601,16 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_STRUCT)
break;
+ /* If we have a non-contiguous (strided/rectangular) update
+ operation with a VIEW_CONVERT_EXPR, we need to be careful not
+ to gimplify the conversion away, because we need it during
+ omp-low.cc in order to retrieve the array's dimensions. Just
+ gimplify partially instead. */
+ if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_TO_GRID
+ || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FROM_GRID)
+ && TREE_CODE (*pd) == VIEW_CONVERT_EXPR)
+ pd = &TREE_OPERAND (*pd, 0);
+
/* We've already partly gimplified this in
gimplify_scan_omp_clauses. Don't do any more. */
if (code == OMP_TARGET && OMP_CLAUSE_MAP_IN_REDUCTION (c))
@@ -1409,6 +1409,11 @@ omp_noncontig_descriptor_type (location_t loc)
TREE_CHAIN (field) = fields;
fields = field;
+ field = build_decl (loc, FIELD_DECL, get_identifier ("__span"),
+ size_type_node);
+ TREE_CHAIN (field) = fields;
+ fields = field;
+
tree ptr_size_type = build_pointer_type (size_type_node);
field = build_decl (loc, FIELD_DECL, get_identifier ("__dim"), ptr_size_type);
@@ -2044,7 +2049,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
OMP_CLAUSE_MAP);
OMP_CLAUSE_SET_MAP_KIND (dn, GOMP_MAP_TO_PSET);
OMP_CLAUSE_DECL (dn) = desc;
- OMP_CLAUSE_SIZE (dn) = TYPE_SIZE_UNIT (desc_type);
OMP_CLAUSE_CHAIN (dn) = OMP_CLAUSE_CHAIN (c);
OMP_CLAUSE_CHAIN (c) = dn;
@@ -13825,6 +13829,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
&& OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_TO_PSET);
c = nc;
while ((nc = OMP_CLAUSE_CHAIN (c))
+ && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP
&& (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_DIM
|| OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_STRIDE))
c = nc;
@@ -14261,7 +14266,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
int i, dims = 0;
auto_vec<tree> tdims;
bool pointer_based = false, handled_pointer_section = false;
- tree arrsize = fold_convert (sizetype, elsize);
+ tree arrsize = size_one_node;
/* Allow a single (maybe strided) array section if we have a
pointer base. */
@@ -14273,8 +14278,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
dims = 1;
}
else
+ /* NOTE: Don't treat (e.g. Fortran, fixed-length) strings as
+ array types here; array section syntax isn't applicable to
+ strings. */
for (tree itype = type;
- TREE_CODE (itype) == ARRAY_TYPE;
+ TREE_CODE (itype) == ARRAY_TYPE
+ && !TYPE_STRING_FLAG (itype);
itype = TREE_TYPE (itype))
{
tdims.safe_push (itype);
@@ -14315,13 +14324,16 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
oc = c;
c = dn;
+ tree span = NULL_TREE;
+
for (i = 0; i < dims; i++)
{
nc = OMP_CLAUSE_CHAIN (c);
tree dim = NULL_TREE, index = NULL_TREE, len = NULL_TREE,
stride = size_one_node;
- if (OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP
+ if (nc
+ && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP
&& OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_DIM)
{
index = OMP_CLAUSE_DECL (nc);
@@ -14338,6 +14350,18 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
stride = OMP_CLAUSE_DECL (nc2);
stride = fold_convert (sizetype, stride);
+ if (OMP_CLAUSE_SIZE (nc2))
+ {
+ /* If the element size is not the same as the
+ distance between two adjacent array
+ elements (in the innermost dimension),
+ retrieve the latter value ("span") from the
+ size field of the stride. We only expect to
+ see one such field per array. */
+ gcc_assert (!span);
+ span = OMP_CLAUSE_SIZE (nc2);
+ span = fold_convert (sizetype, span);
+ }
nc = nc2;
}
@@ -14395,7 +14419,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
dim = size_binop (MINUS_EXPR, maxval, minval);
dim = size_binop (PLUS_EXPR, dim, size_one_node);
len = dim;
- index = size_zero_node;
+ index = minval;
+ nc = c;
}
if (TREE_CODE (dim) != INTEGER_CST)
@@ -14417,10 +14442,40 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
CONSTRUCTOR_APPEND_ELT (vstride, cidx, stride);
}
+ tree bias = size_zero_node;
+ tree volume = size_one_node;
+ for (i = dims - 1; i >= 0; i--)
+ {
+ tree dim = (*vdim)[i].value;
+ tree index = (*vindex)[i].value;
+ tree stride = (*vstride)[i].value;
+
+ /* For the bias we want, e.g.:
+
+ index[0] * stride[0] * dim[1] * dim[2]
+ + index[1] * stride[1] * dim[2]
+ + index[2] * stride[2]
+
+ All multiplied by "span" (or "elsize"). */
+
+ tree index_stride = size_binop (MULT_EXPR, index, stride);
+ bias = size_binop (PLUS_EXPR, bias,
+ size_binop (MULT_EXPR, volume,
+ index_stride));
+ volume = size_binop (MULT_EXPR, volume, dim);
+ }
+
+ /* If we don't have a separate span size, use the element size
+ instead. */
+ if (!span)
+ span = fold_convert (sizetype, elsize);
+
/* The size of the whole array -- to make sure we find any
part of the array via splay-tree lookup that might be
mapped on the target at runtime. */
- OMP_CLAUSE_SIZE (oc) = arrsize;
+ OMP_CLAUSE_SIZE (oc) = size_binop (MULT_EXPR, arrsize, span);
+ /* And the bias of the first element we will update. */
+ OMP_CLAUSE_SIZE (dn) = size_binop (MULT_EXPR, bias, span);
tree cdim = build_constructor (size_arr_type, vdim);
tree cindex = build_constructor (size_arr_type, vindex);
@@ -14451,13 +14506,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
tree ndims_field = TYPE_FIELDS (desc_type);
tree elemsize_field = DECL_CHAIN (ndims_field);
- tree dim_field = DECL_CHAIN (elemsize_field);
+ tree span_field = DECL_CHAIN (elemsize_field);
+ tree dim_field = DECL_CHAIN (span_field);
tree index_field = DECL_CHAIN (dim_field);
tree len_field = DECL_CHAIN (index_field);
tree stride_field = DECL_CHAIN (len_field);
vec<constructor_elt, va_gc> *v;
- vec_alloc (v, 6);
+ vec_alloc (v, 7);
bool all_static = (TREE_STATIC (dim_tmp)
&& TREE_STATIC (index_tmp)
@@ -14487,6 +14543,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
CONSTRUCTOR_APPEND_ELT (v, ndims_field, ndims);
CONSTRUCTOR_APPEND_ELT (v, elemsize_field, elsize);
+ CONSTRUCTOR_APPEND_ELT (v, span_field, span);
CONSTRUCTOR_APPEND_ELT (v, dim_field, dim_tmp);
CONSTRUCTOR_APPEND_ELT (v, index_field, index_tmp);
CONSTRUCTOR_APPEND_ELT (v, len_field, len_tmp);
new file mode 100644
@@ -0,0 +1,19 @@
+! { dg-additional-options "-fdump-tree-original" }
+
+integer :: basicarray(100)
+integer, allocatable :: allocarray(:)
+
+allocate(allocarray(1:20))
+
+!$omp target update to(basicarray)
+
+!$omp target update from(basicarray(:))
+
+!$omp target update to(allocarray)
+
+!$omp target update from(allocarray(:))
+
+end
+
+! { dg-final { scan-tree-dump-times {omp target update from\(} 2 "original" } }
+! { dg-final { scan-tree-dump-times {omp target update to\(} 2 "original" } }
new file mode 100644
@@ -0,0 +1,16 @@
+! { dg-additional-options "-fdump-tree-original" }
+
+integer, allocatable :: allocarray(:)
+integer, allocatable :: allocarray2(:,:)
+
+allocate(allocarray(1:20))
+allocate(allocarray2(1:20,1:20))
+
+! This one must be noncontiguous
+!$omp target update to(allocarray(::2))
+! { dg-final { scan-tree-dump {omp target update map\(to_grid:} "original" } }
+
+!$omp target update from(allocarray2(:,5:15))
+! { dg-final { scan-tree-dump {omp target update from\(} "original" } }
+
+end
new file mode 100644
@@ -0,0 +1,16 @@
+! { dg-additional-options "-fdump-tree-original" }
+
+integer, allocatable :: allocarray(:,:)
+
+allocate(allocarray(1:20,1:20))
+
+! This one could possibly be handled as a contiguous update - but isn't,
+! for now.
+!$omp target update to(allocarray(1:20,5:15))
+! { dg-final { scan-tree-dump {omp target update map\(to_grid:} "original" } }
+
+!$omp target update from(allocarray(:,5:15:2))
+! { dg-final { scan-tree-dump {omp target update map\(from_grid:} "original" } }
+
+end
+
new file mode 100644
@@ -0,0 +1,15 @@
+! { dg-additional-options "-fdump-tree-original" }
+
+integer, target :: tgtarray(20)
+integer, pointer, contiguous :: arrayptr(:)
+
+arrayptr => tgtarray
+
+!$omp target update from(arrayptr)
+! { dg-final { scan-tree-dump {omp target update from\(} "original" } }
+
+!$omp target update to(arrayptr(::2))
+! { dg-final { scan-tree-dump {omp target update map\(to_grid:} "original" } }
+
+end
+
@@ -1320,6 +1320,7 @@ struct target_mem_desc {
typedef struct {
size_t ndims;
size_t elemsize;
+ size_t span;
size_t *dim;
size_t *index;
size_t *length;
@@ -2604,8 +2604,8 @@ goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
}
-static int omp_target_memcpy_rect_worker (void *, const void *, size_t, int,
- const size_t *, const size_t *,
+static int omp_target_memcpy_rect_worker (void *, const void *, size_t, size_t,
+ int, const size_t *, const size_t *,
const size_t *, const size_t *,
const size_t *, const size_t *,
struct gomp_device_descr *,
@@ -2640,9 +2640,9 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
{
omp_noncontig_array_desc *desc
= (omp_noncontig_array_desc *) hostaddrs[i + 1];
- cur_node.host_start = (uintptr_t) hostaddrs[i];
+ size_t bias = sizes[i + 1];
+ cur_node.host_start = (uintptr_t) hostaddrs[i] + bias;
cur_node.host_end = cur_node.host_start + sizes[i];
- assert (sizes[i + 1] == sizeof (omp_noncontig_array_desc));
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
if (n)
{
@@ -2654,21 +2654,23 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
}
void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start
- - n->host_start);
+ - n->host_start
+ - bias);
if ((kind & typemask) == GOMP_MAP_TO_GRID)
omp_target_memcpy_rect_worker (devaddr, hostaddrs[i],
- desc->elemsize, desc->ndims,
- desc->length, desc->stride,
- desc->index, desc->index,
- desc->dim, desc->dim, devicep,
+ desc->elemsize, desc->span,
+ desc->ndims, desc->length,
+ desc->stride, desc->index,
+ desc->index, desc->dim,
+ desc->dim, devicep,
NULL);
else
omp_target_memcpy_rect_worker (hostaddrs[i], devaddr,
- desc->elemsize, desc->ndims,
- desc->length, desc->stride,
- desc->index, desc->index,
- desc->dim, desc->dim, NULL,
- devicep);
+ desc->elemsize, desc->span,
+ desc->ndims, desc->length,
+ desc->stride, desc->index,
+ desc->index, desc->dim,
+ desc->dim, NULL, devicep);
}
i++;
}
@@ -5686,7 +5688,7 @@ omp_target_memcpy_async (void *dst, const void *src, size_t length,
static int
omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
- int num_dims, const size_t *volume,
+ size_t span, int num_dims, const size_t *volume,
const size_t *strides,
const size_t *dst_offsets,
const size_t *src_offsets,
@@ -5700,7 +5702,7 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
size_t j, dst_off, src_off, length;
int i, ret;
- if (num_dims == 1 && (!strides || strides[0] == 1))
+ if (num_dims == 1 && (!strides || (strides[0] == 1 && element_size == span)))
{
if (__builtin_mul_overflow (element_size, volume[0], &length)
|| __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
@@ -5780,12 +5782,11 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
assert ((src_devicep == NULL || dst_devicep == NULL)
&& (src_devicep != NULL || dst_devicep != NULL));
- if (__builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
- || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
+ if (__builtin_mul_overflow (span, dst_offsets[0], &dst_off)
+ || __builtin_mul_overflow (span, src_offsets[0], &src_off))
return EINVAL;
- if (strides
- && __builtin_mul_overflow (element_size, strides[0], &stride))
+ if (__builtin_mul_overflow (span, strides[0], &stride))
return EINVAL;
for (i = 0, ret = 1; i < volume[0] && ret; i++)
@@ -5826,7 +5827,7 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
{
ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
(const char *) src + src_off,
- element_size, num_dims - 1,
+ element_size, span, num_dims - 1,
volume + 1,
strides ? strides + 1 : NULL,
dst_offsets + 1, src_offsets + 1,
@@ -5875,8 +5876,8 @@ omp_target_memcpy_rect_copy (void *dst, const void *src,
gomp_mutex_lock (&src_devicep->lock);
else if (dst_devicep)
gomp_mutex_lock (&dst_devicep->lock);
- int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
- volume, NULL, dst_offsets,
+ int ret = omp_target_memcpy_rect_worker (dst, src, element_size, element_size,
+ num_dims, volume, NULL, dst_offsets,
src_offsets, dst_dimensions,
src_dimensions, dst_devicep,
src_devicep);
new file mode 100644
@@ -0,0 +1,54 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+implicit none
+integer, allocatable, target :: arr(:), arr2(:,:)
+integer, pointer :: ap(:), ap2(:,:)
+integer :: i, j
+
+allocate(arr(1:20))
+
+arr = 0
+
+!$omp target enter data map(to: arr)
+
+ap => arr(1:20:2)
+ap = 5
+
+!$omp target update to(ap)
+
+!$omp target exit data map(from: arr)
+
+do i=1,20
+ if (mod(i,2).eq.1.and.arr(i).ne.5) stop 1
+ if (mod(i,2).eq.0.and.arr(i).ne.0) stop 2
+end do
+
+allocate(arr2(1:20,1:20))
+
+ap2 => arr2(2:10:2,3:12:3)
+
+arr2 = 1
+
+!$omp target enter data map(to: arr2)
+
+!$omp target
+ap2 = 5
+!$omp end target
+
+!$omp target update from(ap2)
+
+do i=1,20
+ do j=1,20
+ if (i.ge.2.and.i.le.10.and.mod(i-2,2).eq.0.and.&
+ &j.ge.3.and.j.le.12.and.mod(j-3,3).eq.0) then
+ if (arr2(i,j).ne.5) stop 3
+ else
+ if (arr2(i,j).ne.1) stop 4
+ end if
+ end do
+end do
+
+!$omp target exit data map(delete: arr2)
+
+end
new file mode 100644
@@ -0,0 +1,29 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+character(len=8), allocatable, dimension(:) :: lines
+integer :: i
+
+allocate(lines(10))
+
+lines = "OMPHELLO"
+
+!$omp target enter data map(to: lines)
+
+!$omp target
+lines = "NEWVALUE"
+!$omp end target
+
+!$omp target update from(lines(5:7:2))
+
+do i=1,10
+ if (i.eq.5.or.i.eq.7) then
+ if (lines(i).ne."NEWVALUE") stop 1
+ else
+ if (lines(i).ne."OMPHELLO") stop 2
+ end if
+end do
+
+!$omp target exit data map(delete: lines)
+
+end
new file mode 100644
@@ -0,0 +1,51 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+program p
+implicit none
+real(kind=4) :: arr(10,10,10,10)
+
+call s(arr,9,9,9,9)
+
+contains
+
+subroutine s(arr,m,n,o,p)
+implicit none
+integer :: i,m,n,o,p
+integer :: a,b,c,d
+real(kind=4) :: arr(0:m,0:n,0:o,0:p)
+
+arr = 0
+
+!$omp target enter data map(to: arr)
+
+!$omp target
+do i=0,9
+ arr(i,i,i,i) = i
+end do
+!$omp end target
+
+!$omp target update from(arr(0:2,0:2,0:2,0:2))
+
+do a=0,9
+ do b=0,9
+ do c=0,9
+ do d=0,9
+ if (a.le.2.and.b.le.2.and.c.le.2.and.d.le.2) then
+ if (a.eq.b.and.b.eq.c.and.c.eq.d) then
+ if (arr(a,b,c,d).ne.a) stop 1
+ else
+ if (arr(a,b,c,d).ne.0) stop 2
+ end if
+ else
+ if (arr(a,b,c,d).ne.0) stop 3
+ end if
+ end do
+ end do
+ end do
+end do
+
+!$omp target exit data map(delete: arr)
+
+end subroutine s
+end program p
new file mode 100644
@@ -0,0 +1,59 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+! Test plain, fixed-size arrays, and also pointers to same.
+
+implicit none
+integer(kind=8) :: arr(10,30)
+integer, target :: arr2(9,11,13)
+integer, pointer :: parr(:,:,:)
+integer :: i, j, k
+
+arr = 0
+!$omp target enter data map(to: arr)
+
+!$omp target
+arr = 99
+!$omp end target
+
+!$omp target update from(arr(1:10:3,5:30:7))
+
+do i=1,10
+ do j=1,30
+ if (mod(i-1,3).eq.0.and.mod(j-5,7).eq.0) then
+ if (arr(i,j).ne.99) stop 1
+ else
+ if (arr(i,j).ne.0) stop 2
+ endif
+ end do
+end do
+
+!$omp target exit data map(delete: arr)
+
+arr2 = 0
+parr => arr2
+!$omp target enter data map(to: parr)
+
+!$omp target
+parr = 99
+!$omp end target
+
+!$omp target update from(parr(7:9:2,5:7:2,3:6:3))
+
+do i=1,9
+ do j=1,11
+ do k=1,13
+ if (i.ge.7.and.j.ge.5.and.k.ge.3.and.&
+ &i.le.9.and.j.le.7.and.k.le.6.and.&
+ &mod(i-7,2).eq.0.and.mod(j-5,2).eq.0.and.mod(k-3,3).eq.0) then
+ if (parr(i,j,k).ne.99) stop 3
+ else
+ if (parr(i,j,k).ne.0) stop 4
+ end if
+ end do
+ end do
+end do
+
+!$omp target exit data map(delete: parr)
+
+end
new file mode 100644
@@ -0,0 +1,42 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+implicit none
+integer, allocatable :: arr(:,:,:,:,:)
+integer :: i, j, k, l, m
+
+allocate (arr(18,19,20,21,22))
+
+arr = 0
+
+!$omp target enter data map(to: arr)
+
+arr = 10
+
+!$omp target update to(arr(1:3:2,1:4:3,1:5:4,1:6:5,1:7:6))
+
+!$omp target
+do i=1,18
+ do j=1,19
+ do k=1,20
+ do l=1,21
+ do m=1,22
+ if ((i.eq.1.or.i.eq.3).and.&
+ &(j.eq.1.or.j.eq.4).and.&
+ &(k.eq.1.or.k.eq.5).and.&
+ &(l.eq.1.or.l.eq.6).and.&
+ &(m.eq.1.or.m.eq.7)) then
+ if (arr(i,j,k,l,m).ne.10) stop 1
+ else
+ if (arr(i,j,k,l,m).ne.0) stop 2
+ end if
+ end do
+ end do
+ end do
+ end do
+end do
+!$omp end target
+
+!$omp target exit data map(delete: arr)
+
+end
new file mode 100644
@@ -0,0 +1,101 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+program p
+implicit none
+integer, allocatable, target :: arr3(:,:,:)
+integer, pointer :: ap3(:,:,:)
+integer :: i, j, k
+
+allocate(arr3(1:10,1:10,1:10))
+
+! CHECK 1
+
+arr3 = 0
+ap3 => arr3(1:10,1:10,1:10:2)
+
+!$omp target enter data map(to: arr3)
+
+!$omp target
+ap3 = 5
+!$omp end target
+
+!$omp target update from(ap3)
+
+call check(arr3, 0, 1, 1, 2)
+
+!$omp target exit data map(delete: arr3)
+
+! CHECK 2
+
+arr3 = 0
+ap3 => arr3(1:10,1:10:2,1:10)
+
+!$omp target enter data map(to: arr3)
+
+!$omp target
+ap3 = 5
+!$omp end target
+
+!$omp target update from(ap3)
+
+call check(arr3, 2, 1, 2, 1)
+
+!$omp target exit data map(delete: arr3)
+
+! CHECK 3
+
+arr3 = 0
+ap3 => arr3(1:10:2,1:10,1:10)
+
+!$omp target enter data map(to: arr3)
+
+!$omp target
+ap3 = 5
+!$omp end target
+
+!$omp target update from(ap3)
+
+call check(arr3, 4, 2, 1, 1)
+
+!$omp target exit data map(delete: arr3)
+
+! CHECK 4
+
+arr3 = 0
+ap3 => arr3(1:10:2,1:10:2,1:10:2)
+
+!$omp target enter data map(to: arr3)
+
+!$omp target
+ap3 = 5
+!$omp end target
+
+!$omp target update from(ap3)
+
+call check(arr3, 6, 2, 2, 2)
+
+!$omp target exit data map(delete: arr3)
+
+contains
+
+subroutine check(arr,cb,s1,s2,s3)
+implicit none
+integer :: arr(:,:,:)
+integer :: cb, s1, s2, s3
+
+do i=1,10
+ do j=1,10
+ do k=1,10
+ if (mod(k-1,s1).eq.0.and.mod(j-1,s2).eq.0.and.mod(i-1,s3).eq.0) then
+ if (arr(k,j,i).ne.5) stop cb+1
+ else
+ if (arr(k,j,i).ne.0) stop cb+2
+ end if
+ end do
+ end do
+end do
+
+end subroutine check
+
+end program p
new file mode 100644
@@ -0,0 +1,47 @@
+program p
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+integer :: A(200)
+A = [(i, i=1,200)]
+!$omp target enter data map(to: A(40:200))
+call foo(A(101:))
+
+contains
+
+subroutine foo(x)
+integer, target :: x(100)
+integer, pointer :: p(:,:)
+integer :: i, j
+
+p(0:5,-5:-1) => x(::2)
+
+!$omp target
+x = x * 2
+!$omp end target
+
+!$omp target update from(x(1:20:2))
+
+do i=1,20
+if (mod(i,2).eq.1 .and. x(i).ne.(100+i)*2) stop 1
+if (mod(i,2).eq.0 .and. x(i).ne.100+i) stop 2
+end do
+
+!$omp target
+p = 0
+!$omp end target
+
+!$omp target update from(p(::3,::2))
+
+do i=0,5
+ do j=-5,-1
+ if (mod(i,3).eq.0 .and. mod(j+5,2).eq.0) then
+ if (p(i,j).ne.0) stop 3
+ else
+ if (p(i,j).eq.0) stop 4
+ end if
+ end do
+end do
+
+end subroutine foo
+end program p
new file mode 100644
@@ -0,0 +1,78 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+type t
+ complex(kind=8) :: c
+ integer :: i
+end type t
+
+type u
+ integer :: i, j
+ complex(kind=8) :: c
+ integer :: k
+end type u
+
+type(t), target :: var(10)
+type(u), target :: var2(10)
+complex(kind=8), pointer :: ptr(:)
+integer :: i
+
+do i=1,10
+ var(i)%c = dcmplx(i,0)
+ var(i)%i = i
+end do
+
+ptr => var(:)%c
+
+!$omp target enter data map(to: var)
+
+!$omp target
+var(:)%c = dcmplx(0,0)
+var(:)%i = 0
+!$omp end target
+
+!$omp target update from(ptr)
+
+do i=1,10
+ if (var(i)%c.ne.dcmplx(0,0)) stop 1
+ if (var(i)%i.ne.i) stop 2
+end do
+
+!$omp target exit data map(delete: var)
+
+! Now do it again with a differently-ordered derived type.
+
+do i=1,10
+ var2(i)%c = dcmplx(0,i)
+ var2(i)%i = i
+ var2(i)%j = i * 2
+ var2(i)%k = i * 3
+end do
+
+ptr => var2(::2)%c
+
+!$omp target enter data map(to: var2)
+
+!$omp target
+var2(:)%c = dcmplx(0,0)
+var2(:)%i = 0
+var2(:)%j = 0
+var2(:)%k = 0
+!$omp end target
+
+!$omp target update from(ptr)
+
+do i=1,10
+ if (mod(i,2).eq.1) then
+ if (var2(i)%c.ne.dcmplx(0,0)) stop 3
+ else
+ if (var2(i)%c.ne.dcmplx(0,i)) stop 4
+ end if
+ if (var2(i)%i.ne.i) stop 5
+ if (var2(i)%j.ne.i * 2) stop 6
+ if (var2(i)%k.ne.i * 3) stop 7
+end do
+
+!$omp target exit data map(delete: var2)
+
+end
new file mode 100644
@@ -0,0 +1,55 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+! Only some of an array mapped on the target
+
+integer, target :: arr(100)
+integer, pointer :: ptr(:)
+
+arr = [(i * 2, i=1,100)]
+
+!$omp target enter data map(to: arr(51:100))
+
+!$omp target
+arr(51:100) = arr(51:100) + 1
+!$omp end target
+
+!$omp target update from(arr(51:100:2))
+
+do i=1,100
+ if (i.le.50) then
+ if (arr(i).ne.i*2) stop 1
+ else
+ if (mod(i,2).eq.1 .and. arr(i).ne.i*2+1) stop 2
+ if (mod(i,2).eq.0 .and. arr(i).ne.i*2) stop 3
+ end if
+end do
+
+!$omp target exit data map(delete: arr)
+
+arr = [(i * 2, i=1,100)]
+
+! Similar, but update via pointer.
+
+ptr => arr(51:100)
+
+!$omp target enter data map(to: ptr(1:50))
+
+!$omp target
+ptr = ptr + 1
+!$omp end target
+
+!$omp target update from(ptr(::2))
+
+do i=1,100
+ if (i.le.50) then
+ if (arr(i).ne.i*2) stop 1
+ else
+ if (mod(i,2).eq.1 .and. arr(i).ne.i*2+1) stop 2
+ if (mod(i,2).eq.0 .and. arr(i).ne.i*2) stop 3
+ end if
+end do
+
+!$omp target exit data map(delete: ptr)
+
+end
new file mode 100644
@@ -0,0 +1,34 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+program p
+implicit none
+integer, dimension(100) :: parr
+integer :: i
+
+parr = [(i, i=1,100)]
+
+!$omp target enter data map(to: parr)
+
+call s(parr)
+
+do i=1,100
+ if (mod(i,3).eq.1 .and. parr(i).ne.999) stop 1
+ if (mod(i,3).ne.1 .and. parr(i).ne.i) stop 2
+end do
+
+!$omp target exit data map(delete: parr)
+
+contains
+subroutine s(arr)
+implicit none
+integer, intent(inout) :: arr(*)
+
+!$omp target map(alloc: arr(1:100))
+arr(1:100) = 999
+!$omp end target
+
+!$omp target update from(arr(1:100:3))
+
+end subroutine s
+end program p
new file mode 100644
@@ -0,0 +1,36 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+! Assumed-shape arrays
+
+program p
+implicit none
+integer, dimension(100) :: parr
+integer :: i
+
+parr = [(i, i=1,100)]
+
+!$omp target enter data map(to: parr)
+
+call s(parr)
+
+do i=1,100
+ if (mod(i,3).eq.1 .and. parr(i).ne.999) stop 1
+ if (mod(i,3).ne.1 .and. parr(i).ne.i) stop 2
+end do
+
+!$omp target exit data map(delete: parr)
+
+contains
+subroutine s(arr)
+implicit none
+integer, intent(inout) :: arr(:)
+
+!$omp target
+arr = 999
+!$omp end target
+
+!$omp target update from(arr(1:100:3))
+
+end subroutine s
+end program p
new file mode 100644
@@ -0,0 +1,39 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+! Test biasing for target-region lookup.
+
+implicit none
+integer, allocatable, target :: var(:,:,:)
+integer, pointer :: p(:,:,:)
+integer :: i, j, k
+
+allocate(var(1:20,5:25,10:30))
+
+var = 0
+
+!$omp target enter data map(to: var)
+
+!$omp target
+var = 99
+!$omp end target
+
+p => var(1:3:2,5:5,10:10)
+
+!$omp target update from(p)
+
+do i=1,20
+ do j=5,25
+ do k=10,30
+ if ((i.eq.1.or.i.eq.3).and.j.eq.5.and.k.eq.10) then
+ if (var(i,j,k).ne.99) stop 1
+ else
+ if (var(i,j,k).ne.0) stop 2
+ end if
+ end do
+ end do
+end do
+
+!$omp target exit data map(delete: var)
+
+end
new file mode 100644
@@ -0,0 +1,34 @@
+! { dg-do run }
+! { dg-require-effective-target offload_device_nonshared_as }
+
+! This test case hits the problem described in:
+! https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612219.html
+
+! { dg-xfail-run-if "'enter data' bug" { offload_device_nonshared_as } }
+
+character(len=:), allocatable, dimension(:) :: lines
+integer :: i
+
+allocate(character(len=8) :: lines(10))
+
+lines = "OMPHELLO"
+
+!$omp target enter data map(to: lines)
+
+!$omp target
+lines = "NEWVALUE"
+!$omp end target
+
+!$omp target update from(lines(5:7:2))
+
+do i=1,10
+ if (i.eq.5.or.i.eq.7) then
+ if (lines(i).ne."NEWVALUE") stop 1
+ else
+ if (lines(i).ne."OMPHELLO") stop 2
+ end if
+end do
+
+!$omp target exit data map(delete: lines)
+
+end