Message ID | 20180920195908.04486d45@squid.athome |
---|---|
State | New |
Headers | show |
Series | [OpenACC] Fortran "declare create"/allocate support for OpenACC | expand |
[Please Cc the fortran list on fortran patches] On Thu, 20 Sep 2018 19:59:08 -0400 Julian Brown <julian@codesourcery.com> wrote: > From b63d0329fb73679b07f6318b8dd092113d5c8505 Mon Sep 17 00:00:00 2001 > From: Julian Brown <julian@codesourcery.com> > Date: Wed, 12 Sep 2018 20:15:08 -0700 > Subject: [PATCH 2/2] Fortran "declare create"/allocate support for > OpenACC > > gcc/ > * omp-low.c (scan_sharing_clauses): Update handling of > OpenACC declare create, declare copyin and declare deviceptr to have > local lifetimes. (convert_to_firstprivate_int): Handle pointer types. > (convert_from_firstprivate_int): Likewise. Create local > storage for the values being pointed to. Add new orig_type argument. > (lower_omp_target): Handle > GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. Add orig_type argument to > convert_from_firstprivate_int call. Allow pointer types with > GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize firstprivate VLAs. > * tree-pretty-print.c (dump_omp_clause): Handle > GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. > > gcc/fortran/ > * gfortran.h (enum gfc_omp_map_op): Add > OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. > (gfc_omp_clauses): Add update_allocatable. > * trans-array.c (trans-stmt.h): Include. > (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate > for decls that have oacc_declare_create attribute set. > * trans-decl.c (add_attributes_to_decl): Enable lowering of > OpenACC declare create, declare copyin and declare deviceptr clauses. > (add_clause): Don't duplicate OpenACC declare clauses. > Populate sym->backend_decl so that it can be used to determine if two > symbols are unique. > (find_module_oacc_declare_clauses): Relax oacc_declare_create > to OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to > match OpenACC 2.5 semantics. > * trans-openmp.c (gfc_trans_omp_clauses): Use > GOMP_MAP_ALWAYS_POINTER (for update directive) or > GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for allocatable scalar > decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} clauses. > (gfc_trans_oacc_executable_directive): Use > GOMP_MAP_ALWAYS_POINTER for allocatable scalar data clauses inside > acc update directives. (gfc_trans_oacc_declare_allocate): New > function. > * trans-stmt.c (gfc_trans_allocate): Call > gfc_trans_oacc_declare_allocate for decls with > oacc_declare_create attribute set. > (gfc_trans_deallocate): Likewise. > * trans-stmt.h (gfc_trans_oacc_declare_allocate): Declare. > > gcc/testsuite/ > * gfortran.dg/goacc/declare-allocatable-1.f90: New test. > > include/ > * gomp-constants.h (enum gomp_map_kind): Define > GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and > GOMP_MAP_FLAG_SPECIAL_4. > > libgomp/ > * oacc-mem.c (gomp_acc_declare_allocate): New function. > * oacc-parallel.c (GOACC_enter_exit_data): Handle > GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. > * testsuite/libgomp.oacc-fortran/allocatable-array.f90: New > test. > * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New > test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: > New test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: > New test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: > New test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: > New test. --- > gcc/fortran/gfortran.h | 6 +- > gcc/fortran/trans-array.c | 10 +- > gcc/fortran/trans-decl.c | 22 ++- > gcc/fortran/trans-openmp.c | 57 +++++- > gcc/fortran/trans-stmt.c | 12 ++ > gcc/fortran/trans-stmt.h | 1 + > gcc/omp-low.c | 62 ++++-- > .../gfortran.dg/goacc/declare-allocatable-1.f90 | 25 +++ > gcc/tree-pretty-print.c | 6 + > include/gomp-constants.h | 6 + > libgomp/oacc-mem.c | 28 +++ > libgomp/oacc-parallel.c | 30 ++- > .../libgomp.oacc-fortran/allocatable-array-1.f90 | 30 +++ > .../libgomp.oacc-fortran/allocatable-scalar.f90 | 33 ++++ > .../libgomp.oacc-fortran/declare-allocatable-1.f90 | 211 > ++++++++++++++++++++ .../libgomp.oacc-fortran/declare-allocatable-2.f90 > | 48 +++++ .../libgomp.oacc-fortran/declare-allocatable-3.f90 | 218 > +++++++++++++++++++++ .../libgomp.oacc-fortran/declare-allocatable-4.f90 > | 66 +++++++ 18 files changed, 834 insertions(+), 37 deletions(-) > create mode 100644 > gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 create mode > 100644 libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 > create mode 100644 > libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 create > mode 100644 > libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 > create mode 100644 > libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 > create mode 100644 > libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 > create mode 100644 > libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 > > diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h > index 3359974..92e13d9 100644 > --- a/gcc/fortran/gfortran.h > +++ b/gcc/fortran/gfortran.h > @@ -1188,7 +1188,9 @@ enum gfc_omp_map_op > OMP_MAP_RELEASE, > OMP_MAP_ALWAYS_TO, > OMP_MAP_ALWAYS_FROM, > - OMP_MAP_ALWAYS_TOFROM > + OMP_MAP_ALWAYS_TOFROM, > + OMP_MAP_DECLARE_ALLOCATE, > + OMP_MAP_DECLARE_DEALLOCATE > }; > > enum gfc_omp_linear_op > @@ -1344,7 +1346,7 @@ typedef struct gfc_omp_clauses > gfc_expr_list *tile_list; > unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; > unsigned wait:1, par_auto:1, gang_static:1; > - unsigned if_present:1, finalize:1; > + unsigned if_present:1, finalize:1, update_allocatable:1; > locus loc; > > } > diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c > index 95ea615..2ac5908 100644 > --- a/gcc/fortran/trans-array.c > +++ b/gcc/fortran/trans-array.c > @@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see > #include "trans-types.h" > #include "trans-array.h" > #include "trans-const.h" > +#include "trans-stmt.h" > #include "dependency.h" please dont mix declarations and definitions, i.e. please put gfc_trans_oacc_declare_allocate() into trans-openmp.c, and add the declaration to trans.h, in the corresponding /* In trans-openmp.c */ block there. thanks, > > static bool gfc_get_array_constructor_size (mpz_t *, > gfc_constructor_base); @@ -5670,6 +5671,7 @@ gfc_array_allocate > (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_ref > *ref, *prev_ref = NULL, *coref; bool allocatable, coarray, dimension, > alloc_w_e3_arr_spec = false, non_ulimate_coarray_ptr_comp; > + bool oacc_declare = false; > > ref = expr->ref; > > @@ -5684,6 +5686,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * > expr, tree status, tree errmsg, allocatable = > expr->symtree->n.sym->attr.allocatable; dimension = > expr->symtree->n.sym->attr.dimension; non_ulimate_coarray_ptr_comp = > false; > + oacc_declare = expr->symtree->n.sym->attr.oacc_declare_create; > } > else > { > @@ -5845,7 +5848,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * > expr, tree status, tree errmsg, > /* Update the array descriptors. */ > if (dimension) > - gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, > offset); > + { > + gfc_conv_descriptor_offset_set (&set_descriptor_block, > se->expr, offset); + > + if (oacc_declare) > + gfc_trans_oacc_declare_allocate (&set_descriptor_block, > expr, true); > + } > > /* Pointer arrays need the span field to be set. */ > if (is_pointer_array (se->expr) > diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c > index 06066eb..df9bdaf 100644 > --- a/gcc/fortran/trans-decl.c > +++ b/gcc/fortran/trans-decl.c > @@ -1399,7 +1399,10 @@ add_attributes_to_decl (symbol_attribute > sym_attr, tree list) if (sym_attr.omp_declare_target_link) > list = tree_cons (get_identifier ("omp declare target link"), > NULL_TREE, list); > - else if (sym_attr.omp_declare_target) > + else if (sym_attr.omp_declare_target > + || sym_attr.oacc_declare_create > + || sym_attr.oacc_declare_copyin > + || sym_attr.oacc_declare_deviceptr) > list = tree_cons (get_identifier ("omp declare target"), > NULL_TREE, list); > > @@ -6218,13 +6221,20 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op > map_op) { > gfc_omp_namelist *n; > > + if (!module_oacc_clauses) > + module_oacc_clauses = gfc_get_omp_clauses (); > + > + if (sym->backend_decl == NULL) > + gfc_get_symbol_decl (sym); > + > + for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = > n->next) > + if (n->sym->backend_decl == sym->backend_decl) > + return; > + > n = gfc_get_omp_namelist (); > n->sym = sym; > n->u.map_op = map_op; > > - if (!module_oacc_clauses) > - module_oacc_clauses = gfc_get_omp_clauses (); > - > if (module_oacc_clauses->lists[OMP_LIST_MAP]) > n->next = module_oacc_clauses->lists[OMP_LIST_MAP]; > > @@ -6240,10 +6250,10 @@ find_module_oacc_declare_clauses (gfc_symbol > *sym) gfc_omp_map_op map_op; > > if (sym->attr.oacc_declare_create) > - map_op = OMP_MAP_FORCE_ALLOC; > + map_op = OMP_MAP_ALLOC; > > if (sym->attr.oacc_declare_copyin) > - map_op = OMP_MAP_FORCE_TO; > + map_op = OMP_MAP_TO; > > if (sym->attr.oacc_declare_deviceptr) > map_op = OMP_MAP_FORCE_DEVICEPTR; > diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c > index f038f4c..e18c0af 100644 > --- a/gcc/fortran/trans-openmp.c > +++ b/gcc/fortran/trans-openmp.c > @@ -2119,9 +2119,18 @@ gfc_trans_omp_clauses (stmtblock_t *block, > gfc_omp_clauses *clauses, (TREE_TYPE (TREE_TYPE (decl))))) > { > tree orig_decl = decl; > + enum gomp_map_kind gmk = GOMP_MAP_POINTER; > + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) > + && n->sym->attr.oacc_declare_create) > + { > + if (clauses->update_allocatable) > + gmk = GOMP_MAP_ALWAYS_POINTER; > + else > + gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; > + } > node4 = build_omp_clause (input_location, > OMP_CLAUSE_MAP); > - OMP_CLAUSE_SET_MAP_KIND (node4, > GOMP_MAP_POINTER); > + OMP_CLAUSE_SET_MAP_KIND (node4, gmk); > OMP_CLAUSE_DECL (node4) = decl; > OMP_CLAUSE_SIZE (node4) = size_int (0); > decl = build_fold_indirect_ref (decl); > @@ -2330,6 +2339,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, > gfc_omp_clauses *clauses, case OMP_MAP_FORCE_DEVICEPTR: > OMP_CLAUSE_SET_MAP_KIND (node, > GOMP_MAP_FORCE_DEVICEPTR); break; > + case OMP_MAP_DECLARE_ALLOCATE: > + OMP_CLAUSE_SET_MAP_KIND (node, > GOMP_MAP_DECLARE_ALLOCATE); > + break; > + case OMP_MAP_DECLARE_DEALLOCATE: > + OMP_CLAUSE_SET_MAP_KIND (node, > GOMP_MAP_DECLARE_DEALLOCATE); > + break; > default: > gcc_unreachable (); > } > @@ -3082,12 +3097,14 @@ gfc_trans_oacc_executable_directive (gfc_code > *code) { > stmtblock_t block; > tree stmt, oacc_clauses; > + gfc_omp_clauses *clauses = code->ext.omp_clauses; > enum tree_code construct_code; > > switch (code->op) > { > case EXEC_OACC_UPDATE: > construct_code = OACC_UPDATE; > + clauses->update_allocatable = 1; > break; > case EXEC_OACC_ENTER_DATA: > construct_code = OACC_ENTER_DATA; > @@ -3103,8 +3120,7 @@ gfc_trans_oacc_executable_directive (gfc_code > *code) } > > gfc_start_block (&block); > - oacc_clauses = gfc_trans_omp_clauses (&block, > code->ext.omp_clauses, > - code->loc); > + oacc_clauses = gfc_trans_omp_clauses (&block, clauses, code->loc); > stmt = build1_loc (input_location, construct_code, void_type_node, > oacc_clauses); > gfc_add_expr_to_block (&block, stmt); > @@ -5099,6 +5115,41 @@ gfc_trans_oacc_declare (gfc_code *code) > return gfc_finish_block (&block); > } > > +/* Create an OpenACC enter or exit data construct for an OpenACC > declared > + variable that has been allocated or deallocated. */ > + > +tree > +gfc_trans_oacc_declare_allocate (stmtblock_t *block, gfc_expr *expr, > + bool allocate) > +{ > + gfc_omp_clauses *clauses = gfc_get_omp_clauses (); > + gfc_omp_namelist *p = gfc_get_omp_namelist (); > + tree oacc_clauses, stmt; > + enum tree_code construct_code; > + > + p->sym = expr->symtree->n.sym; > + p->where = expr->where; > + > + if (allocate) > + { > + p->u.map_op = OMP_MAP_DECLARE_ALLOCATE; > + construct_code = OACC_ENTER_DATA; > + } > + else > + { > + p->u.map_op = OMP_MAP_DECLARE_DEALLOCATE; > + construct_code = OACC_EXIT_DATA; > + } > + clauses->lists[OMP_LIST_MAP] = p; > + > + oacc_clauses = gfc_trans_omp_clauses (block, clauses, expr->where); > + stmt = build1_loc (input_location, construct_code, void_type_node, > + oacc_clauses); > + gfc_add_expr_to_block (block, stmt); > + > + return stmt; > +} > + > tree > gfc_trans_oacc_directive (gfc_code *code) > { > diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c > index 795d3cc..0b1a4b4 100644 > --- a/gcc/fortran/trans-stmt.c > +++ b/gcc/fortran/trans-stmt.c > @@ -6422,6 +6422,10 @@ gfc_trans_allocate (gfc_code * code) > label_finish, expr, 0); > else > gfc_allocate_using_malloc (&se.pre, se.expr, memsz, > stat); + > + /* Allocate memory for OpenACC declared variables. */ > + if (expr->symtree->n.sym->attr.oacc_declare_create) > + gfc_trans_oacc_declare_allocate (&se.pre, expr, true); > } > else > { > @@ -6894,6 +6898,10 @@ gfc_trans_deallocate (gfc_code *code) > > if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr))) > { > + if (!is_coarray > + && expr->symtree->n.sym->attr.oacc_declare_create) > + gfc_trans_oacc_declare_allocate (&se.pre, expr, > false); + > gfc_coarray_deregtype caf_dtype; > > if (is_coarray) > @@ -6947,6 +6955,10 @@ gfc_trans_deallocate (gfc_code *code) > } > else > { > + /* Deallocate memory for OpenACC declared variables. */ > + if (expr->symtree->n.sym->attr.oacc_declare_create) > + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); > + > tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, > label_finish, false, al->expr, > al->expr->ts, > is_coarray); diff --git a/gcc/fortran/trans-stmt.h > b/gcc/fortran/trans-stmt.h index 848c7d9..0597579 100644 > --- a/gcc/fortran/trans-stmt.h > +++ b/gcc/fortran/trans-stmt.h > @@ -72,6 +72,7 @@ tree gfc_trans_omp_directive (gfc_code *); > void gfc_trans_omp_declare_simd (gfc_namespace *); > tree gfc_trans_oacc_directive (gfc_code *); > tree gfc_trans_oacc_declare (gfc_namespace *); > +tree gfc_trans_oacc_declare_allocate (stmtblock_t *, gfc_expr *, > bool); > /* trans-io.c */ > tree gfc_trans_open (gfc_code *); > diff --git a/gcc/omp-low.c b/gcc/omp-low.c > index 5fc4a66..bc5a5dd 100644 > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -1196,7 +1196,8 @@ scan_sharing_clauses (tree clauses, omp_context > *ctx) && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) > && varpool_node::get_create (decl)->offloadable > && !lookup_attribute ("omp declare target link", > - DECL_ATTRIBUTES (decl))) > + DECL_ATTRIBUTES (decl)) > + && !is_gimple_omp_oacc (ctx->stmt)) > break; > if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP > && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) > @@ -7501,7 +7502,7 @@ convert_to_firstprivate_int (tree var, > gimple_seq *gs) > if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) > { > - if (omp_is_reference (var)) > + if (omp_is_reference (var) || POINTER_TYPE_P (type)) > { > tmp = create_tmp_var (type); > gimplify_assign (tmp, build_simple_mem_ref (var), gs); > @@ -7533,7 +7534,8 @@ convert_to_firstprivate_int (tree var, > gimple_seq *gs) /* Like convert_to_firstprivate_int, but restore the > original type. */ > static tree > -convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) > +convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref, > + gimple_seq *gs) > { > tree type = TREE_TYPE (var); > tree new_type = NULL_TREE; > @@ -7542,7 +7544,31 @@ convert_from_firstprivate_int (tree var, bool > is_ref, gimple_seq *gs) gcc_assert (TREE_CODE (var) == MEM_REF); > var = TREE_OPERAND (var, 0); > > - if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) > + if (is_ref || POINTER_TYPE_P (orig_type)) > + { > + tree_code code = NOP_EXPR; > + > + if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == > COMPLEX_TYPE) > + code = VIEW_CONVERT_EXPR; > + > + if (code == VIEW_CONVERT_EXPR > + && TYPE_SIZE (type) != TYPE_SIZE (orig_type)) > + { > + tree ptype = build_pointer_type (type); > + var = fold_build1 (code, ptype, build_fold_addr_expr > (var)); > + var = build_simple_mem_ref (var); > + } > + else > + var = fold_build1 (code, type, var); > + > + tree inst = create_tmp_var (type); > + gimplify_assign (inst, var, gs); > + var = build_fold_addr_expr (inst); > + > + return var; > + } > + > + if (INTEGRAL_TYPE_P (var)) > return fold_convert (type, var); > > gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); > @@ -7553,16 +7579,8 @@ convert_from_firstprivate_int (tree var, bool > is_ref, gimple_seq *gs) tmp = create_tmp_var (new_type); > var = fold_convert (new_type, var); > gimplify_assign (tmp, var, gs); > - var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); > - > - if (is_ref) > - { > - tmp = create_tmp_var (build_pointer_type (type)); > - gimplify_assign (tmp, build_fold_addr_expr (var), gs); > - var = tmp; > - } > > - return var; > + return fold_build1 (VIEW_CONVERT_EXPR, type, tmp); > } > > /* Lower the GIMPLE_OMP_TARGET in the current statement > @@ -7665,6 +7683,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, > omp_context *ctx) case GOMP_MAP_FORCE_DEVICEPTR: > case GOMP_MAP_DEVICE_RESIDENT: > case GOMP_MAP_LINK: > + case GOMP_MAP_DECLARE_ALLOCATE: > + case GOMP_MAP_DECLARE_DEALLOCATE: > gcc_assert (is_gimple_omp_oacc (stmt)); > break; > default: > @@ -7743,7 +7763,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, > omp_context *ctx) && !maybe_lookup_field_in_outer_ctx (var, ctx)) > { > gcc_assert (is_gimple_omp_oacc (ctx->stmt)); > - x = convert_from_firstprivate_int (x, > omp_is_reference (var), > + x = convert_from_firstprivate_int (x, TREE_TYPE > (new_var), > + omp_is_reference > (var), &fplist); > gimplify_assign (new_var, x, &fplist); > map_cnt++; > @@ -7760,13 +7781,19 @@ lower_omp_target (gimple_stmt_iterator > *gsi_p, omp_context *ctx) { > gcc_assert (is_gimple_omp_oacc (ctx->stmt)); > if (omp_is_reference (new_var) > - && TREE_CODE (var_type) != POINTER_TYPE) > + /* Accelerators may not have alloca, so it's not > + possible to privatize local storage for those > + objects. */ > + && TREE_CONSTANT (TYPE_SIZE (TREE_TYPE > (var_type)))) { > /* Create a local object to hold the instance > value. */ > const char *id = IDENTIFIER_POINTER (DECL_NAME > (new_var)); tree inst = create_tmp_var (TREE_TYPE (var_type), id); > - gimplify_assign (inst, fold_indirect_ref (x), > &fplist); > + if (TREE_CODE (var_type) == POINTER_TYPE) > + gimplify_assign (inst, x, &fplist); > + else > + gimplify_assign (inst, fold_indirect_ref (x), > &fplist); x = build_fold_addr_expr (inst); > } > gimplify_assign (new_var, x, &fplist); > @@ -7996,8 +8023,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, > omp_context *ctx) else if (OMP_CLAUSE_CODE (c) == > OMP_CLAUSE_FIRSTPRIVATE) { > gcc_checking_assert (is_gimple_omp_oacc > (ctx->stmt)); > + tree new_var = lookup_decl (var, ctx); > tree type = TREE_TYPE (var); > - tree inner_type = omp_is_reference (var) > + tree inner_type = omp_is_reference (new_var) > ? TREE_TYPE (type) : type; > if ((TREE_CODE (inner_type) == REAL_TYPE > || (!omp_is_reference (var) > diff --git > a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 > b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 new file > mode 100644 index 0000000..5349e0d --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 > @@ -0,0 +1,25 @@ > +! Verify that OpenACC declared allocatable arrays have implicit > +! OpenACC enter and exit pragmas at the time of allocation and > +! deallocation. > + > +! { dg-additional-options "-fdump-tree-original" } > + > +program allocate > + implicit none > + integer, allocatable :: a(:), b > + integer, parameter :: n = 100 > + integer i > + !$acc declare create(a,b) > + > + allocate (a(n), b) > + > + !$acc parallel loop copyout(a, b) > + do i = 1, n > + a(i) = b > + end do > + > + deallocate (a, b) > +end program allocate > + > +! { dg-final { scan-tree-dump-times "pragma acc enter data > map.declare_allocate" 2 "original" } } +! { dg-final > { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" > 2 "original" } } diff --git a/gcc/tree-pretty-print.c > b/gcc/tree-pretty-print.c index 2c089b1..47b8aaa 100644 --- > a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c > @@ -755,6 +755,12 @@ dump_omp_clause (pretty_printer *pp, tree > clause, int spc, dump_flags_t flags) case GOMP_MAP_LINK: > pp_string (pp, "link"); > break; > + case GOMP_MAP_DECLARE_ALLOCATE: > + pp_string (pp, "declare_allocate"); > + break; > + case GOMP_MAP_DECLARE_DEALLOCATE: > + pp_string (pp, "declare_deallocate"); > + break; > default: > gcc_unreachable (); > } > diff --git a/include/gomp-constants.h b/include/gomp-constants.h > index ccfb657..9fc8767 100644 > --- a/include/gomp-constants.h > +++ b/include/gomp-constants.h > @@ -40,6 +40,7 @@ > #define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) > #define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) > #define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) > +#define GOMP_MAP_FLAG_SPECIAL_4 (1 << 6) > #define GOMP_MAP_FLAG_SPECIAL > (GOMP_MAP_FLAG_SPECIAL_1 \ | GOMP_MAP_FLAG_SPECIAL_0) > /* Flag to force a specific behavior (or else, trigger a run-time > error). */ @@ -128,6 +129,11 @@ enum gomp_map_kind > /* Decrement usage count and deallocate if zero. */ > GOMP_MAP_RELEASE = > (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_DELETE), > + /* Mapping kinds for allocatable arrays. */ > + GOMP_MAP_DECLARE_ALLOCATE = > (GOMP_MAP_FLAG_SPECIAL_4 > + | GOMP_MAP_FORCE_TO), > + GOMP_MAP_DECLARE_DEALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 > + | GOMP_MAP_FORCE_FROM), > > /* Internal to GCC, not used in libgomp. */ > /* Do not map, but pointer assign a pointer instead. */ > diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c > index 3787ce4..c678a22 100644 > --- a/libgomp/oacc-mem.c > +++ b/libgomp/oacc-mem.c > @@ -725,6 +725,34 @@ acc_update_self (void *h, size_t s) > } > > void > +gomp_acc_declare_allocate (bool allocate, size_t mapnum, void > **hostaddrs, > + size_t *sizes, unsigned short *kinds) > +{ > + gomp_debug (0, " %s: processing\n", __FUNCTION__); > + > + if (allocate) > + { > + assert (mapnum == 3); > + > + /* Allocate memory for the array data. */ > + uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], > sizes[0]); + > + /* Update the PSET. */ > + acc_update_device (hostaddrs[1], sizes[1]); > + void *pset = acc_deviceptr (hostaddrs[1]); > + acc_memcpy_to_device (pset, &data, sizeof (uintptr_t)); > + } > + else > + { > + /* Deallocate memory for the array data. */ > + void *data = acc_deviceptr (hostaddrs[0]); > + acc_free (data); > + } > + > + gomp_debug (0, " %s: end\n", __FUNCTION__); > +} > + > +void > gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t > *sizes, void *kinds) > { > diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c > index 070c5dc..f80b9a2 100644 > --- a/libgomp/oacc-parallel.c > +++ b/libgomp/oacc-parallel.c > @@ -391,7 +391,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, > || kind == GOMP_MAP_FORCE_PRESENT > || kind == GOMP_MAP_FORCE_TO > || kind == GOMP_MAP_TO > - || kind == GOMP_MAP_ALLOC) > + || kind == GOMP_MAP_ALLOC > + || kind == GOMP_MAP_DECLARE_ALLOCATE) > { > data_enter = true; > break; > @@ -400,7 +401,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, > if (kind == GOMP_MAP_RELEASE > || kind == GOMP_MAP_DELETE > || kind == GOMP_MAP_FROM > - || kind == GOMP_MAP_FORCE_FROM) > + || kind == GOMP_MAP_FORCE_FROM > + || kind == GOMP_MAP_DECLARE_DEALLOCATE) > break; > > gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", > @@ -429,6 +431,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, > { > switch (kind) > { > + case GOMP_MAP_DECLARE_ALLOCATE: > case GOMP_MAP_ALLOC: > acc_present_or_create (hostaddrs[i], sizes[i]); > break; > @@ -449,8 +452,12 @@ GOACC_enter_exit_data (int device, size_t mapnum, > } > else > { > - gomp_acc_insert_pointer (pointer, &hostaddrs[i], > - &sizes[i], &kinds[i]); > + if (kind == GOMP_MAP_DECLARE_ALLOCATE) > + gomp_acc_declare_allocate (true, pointer, > &hostaddrs[i], > + &sizes[i], &kinds[i]); > + else > + gomp_acc_insert_pointer (pointer, &hostaddrs[i], > + &sizes[i], &kinds[i]); > /* Increment 'i' by two because OpenACC requires > fortran arrays to be contiguous, so each PSET is associated with > one of > MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -480,6 +487,7 > @@ GOACC_enter_exit_data (int device, size_t mapnum, acc_delete > (hostaddrs[i], sizes[i]); } > break; > + case GOMP_MAP_DECLARE_DEALLOCATE: > case GOMP_MAP_FROM: > case GOMP_MAP_FORCE_FROM: > if (finalize) > @@ -495,10 +503,16 @@ GOACC_enter_exit_data (int device, size_t > mapnum, } > else > { > - bool copyfrom = (kind == GOMP_MAP_FORCE_FROM > - || kind == GOMP_MAP_FROM); > - gomp_acc_remove_pointer (hostaddrs[i], sizes[i], > copyfrom, async, > - finalize, pointer); > + if (kind == GOMP_MAP_DECLARE_DEALLOCATE) > + gomp_acc_declare_allocate (false, pointer, > &hostaddrs[i], > + &sizes[i], &kinds[i]); > + else > + { > + bool copyfrom = (kind == GOMP_MAP_FORCE_FROM > + || kind == GOMP_MAP_FROM); > + gomp_acc_remove_pointer (hostaddrs[i], sizes[i], > copyfrom, > + async, finalize, pointer); > + } > /* See the above comment. */ > i += pointer - 1; > } > diff --git > a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 > b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 new > file mode 100644 index 0000000..3758031 --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 > @@ -0,0 +1,30 @@ > +! Ensure that dummy arguments of allocatable arrays don't cause > +! "libgomp: [...] is not mapped" errors. > + > +! { dg-do run } > + > +program main > + integer, parameter :: n = 40 > + integer, allocatable :: ar(:,:,:) > + integer :: i > + > + allocate (ar(1:n,0:n-1,0:n-1)) > + !$acc enter data copyin (ar) > + > + !$acc update host (ar) > + > + !$acc update device (ar) > + > + call update_ar (ar, n) > + > + !$acc exit data copyout (ar) > +end program main > + > +subroutine update_ar (ar, n) > + integer :: n > + integer, dimension (1:n,0:n-1,0:n-1) :: ar > + > + !$acc update host (ar) > + > + !$acc update device (ar) > +end subroutine update_ar > diff --git > a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 > b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 new > file mode 100644 index 0000000..be86d14 --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 > @@ -0,0 +1,33 @@ > +! Test non-declared allocatable scalars in OpenACC data clauses. > + > +! { dg-do run } > + > +program main > + implicit none > + integer, parameter :: n = 100 > + integer, allocatable :: a, c > + integer :: i, b(n) > + > + allocate (a) > + > + a = 50 > + > + !$acc parallel loop > + do i = 1, n; > + b(i) = a > + end do > + > + do i = 1, n > + if (b(i) /= a) call abort > + end do > + > + allocate (c) > + > + !$acc parallel copyout(c) num_gangs(1) > + c = a > + !$acc end parallel > + > + if (c /= a) call abort > + > + deallocate (a, c) > +end program main > diff --git > a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 > b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 > new file mode 100644 index 0000000..d68b124 --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 > @@ -0,0 +1,211 @@ > +! Test declare create with allocatable arrays. > + > +! { dg-do run } > + > +module vars > + implicit none > + integer, parameter :: n = 100 > + real*8, allocatable :: b(:) > + !$acc declare create (b) > +end module vars > + > +program test > + use vars > + use openacc > + implicit none > + real*8 :: a > + integer :: i > + > + interface > + subroutine sub1 > + !$acc routine gang > + end subroutine sub1 > + > + subroutine sub2 > + end subroutine sub2 > + > + real*8 function fun1 (ix) > + integer ix > + !$acc routine seq > + end function fun1 > + > + real*8 function fun2 (ix) > + integer ix > + !$acc routine seq > + end function fun2 > + end interface > + > + if (allocated (b)) call abort > + > + ! Test local usage of an allocated declared array. > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + a = 2.0 > + > + !$acc parallel loop > + do i = 1, n > + b(i) = i * a > + end do > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= i*a) call abort > + end do > + > + deallocate (b) > + > + ! Test the usage of an allocated declared array inside an acc > + ! routine subroutine. > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel > + call sub1 > + !$acc end parallel > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= i*2) call abort > + end do > + > + deallocate (b) > + > + ! Test the usage of an allocated declared array inside a host > + ! subroutine. > + > + call sub2 > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= 1.0) call abort > + end do > + > + deallocate (b) > + > + if (allocated (b)) call abort > + > + ! Test the usage of an allocated declared array inside an acc > + ! routine function. > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel loop > + do i = 1, n > + b(i) = 1.0 > + end do > + > + !$acc parallel loop > + do i = 1, n > + b(i) = fun1 (i) > + end do > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= i) call abort > + end do > + > + deallocate (b) > + > + ! Test the usage of an allocated declared array inside a host > + ! function. > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel loop > + do i = 1, n > + b(i) = 1.0 > + end do > + > + !$acc update host(b) > + > + do i = 1, n > + b(i) = fun2 (i) > + end do > + > + if (.not.acc_is_present (b)) call abort > + > + do i = 1, n > + if (b(i) /= i*i) call abort > + end do > + > + deallocate (b) > +end program test > + > +! Set each element in array 'b' at index i to i*2. > + > +subroutine sub1 > + use vars > + implicit none > + integer i > + !$acc routine gang > + > + !$acc loop > + do i = 1, n > + b(i) = i*2 > + end do > +end subroutine sub1 > + > +! Allocate array 'b', and set it to all 1.0. > + > +subroutine sub2 > + use vars > + use openacc > + implicit none > + integer i > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel loop > + do i = 1, n > + b(i) = 1.0 > + end do > +end subroutine sub2 > + > +! Return b(i) * i; > + > +real*8 function fun1 (i) > + use vars > + implicit none > + integer i > + !$acc routine seq > + > + fun1 = b(i) * i > +end function fun1 > + > +! Return b(i) * i * i; > + > +real*8 function fun2 (i) > + use vars > + implicit none > + integer i > + > + fun2 = b(i) * i * i > +end function fun2 > diff --git > a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 > b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 > new file mode 100644 index 0000000..3521a7f --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 > @@ -0,0 +1,48 @@ > +! Test declare create with allocatable scalars. > + > +! { dg-do run } > + > +program main > + use openacc > + implicit none > + integer, parameter :: n = 100 > + integer, allocatable :: a, c > + integer :: i, b(n) > + !$acc declare create (c) > + > + allocate (a) > + > + a = 50 > + > + !$acc parallel loop firstprivate(a) > + do i = 1, n; > + b(i) = a > + end do > + > + do i = 1, n > + if (b(i) /= a) call abort > + end do > + > + allocate (c) > + a = 100 > + > + if (.not.acc_is_present(c)) call abort > + > + !$acc parallel num_gangs(1) present(c) > + c = a > + !$acc end parallel > + > + !$acc update host(c) > + if (c /= a) call abort > + > + !$acc parallel loop > + do i = 1, n > + b(i) = c > + end do > + > + do i = 1, n > + if (b(i) /= a) call abort > + end do > + > + deallocate (a, c) > +end program main > diff --git > a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 > b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 > new file mode 100644 index 0000000..5d12d75 --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 > @@ -0,0 +1,218 @@ > +! Test declare create with allocatable arrays. > + > +! { dg-do run } > + > +module vars > + implicit none > + integer, parameter :: n = 100 > + real*8, allocatable :: a, b(:) > + !$acc declare create (a, b) > +end module vars > + > +program test > + use vars > + use openacc > + implicit none > + integer :: i > + > + interface > + subroutine sub1 > + !$acc routine gang > + end subroutine sub1 > + > + subroutine sub2 > + end subroutine sub2 > + > + real*8 function fun1 (ix) > + integer ix > + !$acc routine seq > + end function fun1 > + > + real*8 function fun2 (ix) > + integer ix > + !$acc routine seq > + end function fun2 > + end interface > + > + if (allocated (a)) call abort > + if (allocated (b)) call abort > + > + ! Test local usage of an allocated declared array. > + > + allocate (a) > + > + if (.not.allocated (a)) call abort > + if (acc_is_present (a) .neqv. .true.) call abort > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + a = 2.0 > + !$acc update device(a) > + > + !$acc parallel loop > + do i = 1, n > + b(i) = i * a > + end do > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= i*a) call abort > + end do > + > + deallocate (b) > + > + ! Test the usage of an allocated declared array inside an acc > + ! routine subroutine. > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel > + call sub1 > + !$acc end parallel > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= a+i*2) call abort > + end do > + > + deallocate (b) > + > + ! Test the usage of an allocated declared array inside a host > + ! subroutine. > + > + call sub2 > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= 1.0) call abort > + end do > + > + deallocate (b) > + > + if (allocated (b)) call abort > + > + ! Test the usage of an allocated declared array inside an acc > + ! routine function. > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel loop > + do i = 1, n > + b(i) = 1.0 > + end do > + > + !$acc parallel loop > + do i = 1, n > + b(i) = fun1 (i) > + end do > + > + if (.not.acc_is_present (b)) call abort > + > + !$acc update host(b) > + > + do i = 1, n > + if (b(i) /= i) call abort > + end do > + > + deallocate (b) > + > + ! Test the usage of an allocated declared array inside a host > + ! function. > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel loop > + do i = 1, n > + b(i) = 1.0 > + end do > + > + !$acc update host(b) > + > + do i = 1, n > + b(i) = fun2 (i) > + end do > + > + if (.not.acc_is_present (b)) call abort > + > + do i = 1, n > + if (b(i) /= i*a) call abort > + end do > + > + deallocate (a) > + deallocate (b) > +end program test > + > +! Set each element in array 'b' at index i to a+i*2. > + > +subroutine sub1 > + use vars > + implicit none > + integer i > + !$acc routine gang > + > + !$acc loop > + do i = 1, n > + b(i) = a+i*2 > + end do > +end subroutine sub1 > + > +! Allocate array 'b', and set it to all 1.0. > + > +subroutine sub2 > + use vars > + use openacc > + implicit none > + integer i > + > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + if (acc_is_present (b) .neqv. .true.) call abort > + > + !$acc parallel loop > + do i = 1, n > + b(i) = 1.0 > + end do > +end subroutine sub2 > + > +! Return b(i) * i; > + > +real*8 function fun1 (i) > + use vars > + implicit none > + integer i > + !$acc routine seq > + > + fun1 = b(i) * i > +end function fun1 > + > +! Return b(i) * i * a; > + > +real*8 function fun2 (i) > + use vars > + implicit none > + integer i > + > + fun2 = b(i) * i * a > +end function fun2 > diff --git > a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 > b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 > new file mode 100644 index 0000000..b4cf26e --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 > @@ -0,0 +1,66 @@ > +! Test declare create with allocatable arrays and scalars. The > unused +! declared array 'b' caused an ICE in the past. > + > +! { dg-do run } > + > +module vars > + implicit none > + integer, parameter :: n = 100 > + real*8, allocatable :: a, b(:) > + !$acc declare create (a, b) > +end module vars > + > +program test > + use vars > + implicit none > + integer :: i > + > + interface > + subroutine sub1 > + end subroutine sub1 > + > + subroutine sub2 > + end subroutine sub2 > + > + real*8 function fun1 (ix) > + integer ix > + !$acc routine seq > + end function fun1 > + > + real*8 function fun2 (ix) > + integer ix > + !$acc routine seq > + end function fun2 > + end interface > + > + if (allocated (a)) call abort > + if (allocated (b)) call abort > + > + ! Test the usage of an allocated declared array inside an acc > + ! routine subroutine. > + > + allocate (a) > + allocate (b(n)) > + > + if (.not.allocated (b)) call abort > + > + call sub1 > + > + !$acc update self(a) > + if (a /= 50) call abort > + > + deallocate (a) > + deallocate (b) > + > +end program test > + > +! Set 'a' to 50. > + > +subroutine sub1 > + use vars > + implicit none > + integer i > + > + a = 50 > + !$acc update device(a) > +end subroutine sub1
On Fri, 21 Sep 2018 03:14:22 +0200 Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote: > > diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c > > index 95ea615..2ac5908 100644 > > --- a/gcc/fortran/trans-array.c > > +++ b/gcc/fortran/trans-array.c > > @@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see > > #include "trans-types.h" > > #include "trans-array.h" > > #include "trans-const.h" > > +#include "trans-stmt.h" > > #include "dependency.h" > > please dont mix declarations and definitions, i.e. please put > gfc_trans_oacc_declare_allocate() into trans-openmp.c, and add the > declaration to trans.h, in the corresponding /* In trans-openmp.c */ > block there. Do you mean like this? Thanks, Julian ChangeLog 2018-09-20 Cesar Philippidis <cesar@codesourcery.com> Julian Brown <julian@codesourcery.com> gcc/ * omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare create, declare copyin and declare deviceptr to have local lifetimes. (convert_to_firstprivate_int): Handle pointer types. (convert_from_firstprivate_int): Likewise. Create local storage for the values being pointed to. Add new orig_type argument. (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. Add orig_type argument to convert_from_firstprivate_int call. Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize firstprivate VLAs. * tree-pretty-print.c (dump_omp_clause): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. gcc/fortran/ * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. (gfc_omp_clauses): Add update_allocatable. * trans-array.c (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate for decls that have oacc_declare_create attribute set. * trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC declare create, declare copyin and declare deviceptr clauses. (add_clause): Don't duplicate OpenACC declare clauses. Populate sym->backend_decl so that it can be used to determine if two symbols are unique. (find_module_oacc_declare_clauses): Relax oacc_declare_create to OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to match OpenACC 2.5 semantics. * trans-openmp.c (gfc_trans_omp_clauses): Use GOMP_MAP_ALWAYS_POINTER (for update directive) or GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for allocatable scalar decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} clauses. (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER for allocatable scalar data clauses inside acc update directives. (gfc_trans_oacc_declare_allocate): New function. * trans-stmt.c (gfc_trans_allocate): Call gfc_trans_oacc_declare_allocate for decls with oacc_declare_create attribute set. (gfc_trans_deallocate): Likewise. * trans.h (gfc_trans_oacc_declare_allocate): Declare. gcc/testsuite/ * gfortran.dg/goacc/declare-allocatable-1.f90: New test. include/ * gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and GOMP_MAP_FLAG_SPECIAL_4. libgomp/ * oacc-mem.c (gomp_acc_declare_allocate): New function. * oacc-parallel.c (GOACC_enter_exit_data): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. * testsuite/libgomp.oacc-fortran/allocatable-array.f90: New test. * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. commit 2601a2c2c6222026baf0e73cd2d9694c64356e77 Author: Julian Brown <julian@codesourcery.com> Date: Wed Sep 12 20:15:08 2018 -0700 Fortran "declare create"/allocate support for OpenACC gcc/ * omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare create, declare copyin and declare deviceptr to have local lifetimes. (convert_to_firstprivate_int): Handle pointer types. (convert_from_firstprivate_int): Likewise. Create local storage for the values being pointed to. Add new orig_type argument. (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. Add orig_type argument to convert_from_firstprivate_int call. Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize firstprivate VLAs. * tree-pretty-print.c (dump_omp_clause): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. gcc/fortran/ * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. (gfc_omp_clauses): Add update_allocatable. * trans-array.c (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate for decls that have oacc_declare_create attribute set. * trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC declare create, declare copyin and declare deviceptr clauses. (add_clause): Don't duplicate OpenACC declare clauses. Populate sym->backend_decl so that it can be used to determine if two symbols are unique. (find_module_oacc_declare_clauses): Relax oacc_declare_create to OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to match OpenACC 2.5 semantics. * trans-openmp.c (gfc_trans_omp_clauses): Use GOMP_MAP_ALWAYS_POINTER (for update directive) or GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for allocatable scalar decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} clauses. (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER for allocatable scalar data clauses inside acc update directives. (gfc_trans_oacc_declare_allocate): New function. * trans-stmt.c (gfc_trans_allocate): Call gfc_trans_oacc_declare_allocate for decls with oacc_declare_create attribute set. (gfc_trans_deallocate): Likewise. * trans.h (gfc_trans_oacc_declare_allocate): Declare. gcc/testsuite/ * gfortran.dg/goacc/declare-allocatable-1.f90: New test. include/ * gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and GOMP_MAP_FLAG_SPECIAL_4. libgomp/ * oacc-mem.c (gomp_acc_declare_allocate): New function. * oacc-parallel.c (GOACC_enter_exit_data): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. * testsuite/libgomp.oacc-fortran/allocatable-array.f90: New test. * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index b0518e2..48dc3d5 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1188,7 +1188,9 @@ enum gfc_omp_map_op OMP_MAP_RELEASE, OMP_MAP_ALWAYS_TO, OMP_MAP_ALWAYS_FROM, - OMP_MAP_ALWAYS_TOFROM + OMP_MAP_ALWAYS_TOFROM, + OMP_MAP_DECLARE_ALLOCATE, + OMP_MAP_DECLARE_DEALLOCATE }; enum gfc_omp_linear_op @@ -1344,7 +1346,7 @@ typedef struct gfc_omp_clauses gfc_expr_list *tile_list; unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; unsigned wait:1, par_auto:1, gang_static:1; - unsigned if_present:1, finalize:1; + unsigned if_present:1, finalize:1, update_allocatable:1; locus loc; } diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 95ea615..82308cb 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -5670,6 +5670,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_ref *ref, *prev_ref = NULL, *coref; bool allocatable, coarray, dimension, alloc_w_e3_arr_spec = false, non_ulimate_coarray_ptr_comp; + bool oacc_declare = false; ref = expr->ref; @@ -5684,6 +5685,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, allocatable = expr->symtree->n.sym->attr.allocatable; dimension = expr->symtree->n.sym->attr.dimension; non_ulimate_coarray_ptr_comp = false; + oacc_declare = expr->symtree->n.sym->attr.oacc_declare_create; } else { @@ -5845,7 +5847,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, /* Update the array descriptors. */ if (dimension) - gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + { + gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + + if (oacc_declare) + gfc_trans_oacc_declare_allocate (&set_descriptor_block, expr, true); + } /* Pointer arrays need the span field to be set. */ if (is_pointer_array (se->expr) diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 159c3db..89e78be 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1399,7 +1399,10 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list) if (sym_attr.omp_declare_target_link) list = tree_cons (get_identifier ("omp declare target link"), NULL_TREE, list); - else if (sym_attr.omp_declare_target) + else if (sym_attr.omp_declare_target + || sym_attr.oacc_declare_create + || sym_attr.oacc_declare_copyin + || sym_attr.oacc_declare_deviceptr) list = tree_cons (get_identifier ("omp declare target"), NULL_TREE, list); @@ -6218,13 +6221,20 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op) { gfc_omp_namelist *n; + if (!module_oacc_clauses) + module_oacc_clauses = gfc_get_omp_clauses (); + + if (sym->backend_decl == NULL) + gfc_get_symbol_decl (sym); + + for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = n->next) + if (n->sym->backend_decl == sym->backend_decl) + return; + n = gfc_get_omp_namelist (); n->sym = sym; n->u.map_op = map_op; - if (!module_oacc_clauses) - module_oacc_clauses = gfc_get_omp_clauses (); - if (module_oacc_clauses->lists[OMP_LIST_MAP]) n->next = module_oacc_clauses->lists[OMP_LIST_MAP]; @@ -6240,10 +6250,10 @@ find_module_oacc_declare_clauses (gfc_symbol *sym) gfc_omp_map_op map_op; if (sym->attr.oacc_declare_create) - map_op = OMP_MAP_FORCE_ALLOC; + map_op = OMP_MAP_ALLOC; if (sym->attr.oacc_declare_copyin) - map_op = OMP_MAP_FORCE_TO; + map_op = OMP_MAP_TO; if (sym->attr.oacc_declare_deviceptr) map_op = OMP_MAP_FORCE_DEVICEPTR; diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index f038f4c..e18c0af 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -2119,9 +2119,18 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, (TREE_TYPE (TREE_TYPE (decl))))) { tree orig_decl = decl; + enum gomp_map_kind gmk = GOMP_MAP_POINTER; + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) + && n->sym->attr.oacc_declare_create) + { + if (clauses->update_allocatable) + gmk = GOMP_MAP_ALWAYS_POINTER; + else + gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; + } node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node4, gmk); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -2330,6 +2339,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, case OMP_MAP_FORCE_DEVICEPTR: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEVICEPTR); break; + case OMP_MAP_DECLARE_ALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_ALLOCATE); + break; + case OMP_MAP_DECLARE_DEALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_DEALLOCATE); + break; default: gcc_unreachable (); } @@ -3082,12 +3097,14 @@ gfc_trans_oacc_executable_directive (gfc_code *code) { stmtblock_t block; tree stmt, oacc_clauses; + gfc_omp_clauses *clauses = code->ext.omp_clauses; enum tree_code construct_code; switch (code->op) { case EXEC_OACC_UPDATE: construct_code = OACC_UPDATE; + clauses->update_allocatable = 1; break; case EXEC_OACC_ENTER_DATA: construct_code = OACC_ENTER_DATA; @@ -3103,8 +3120,7 @@ gfc_trans_oacc_executable_directive (gfc_code *code) } gfc_start_block (&block); - oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, - code->loc); + oacc_clauses = gfc_trans_omp_clauses (&block, clauses, code->loc); stmt = build1_loc (input_location, construct_code, void_type_node, oacc_clauses); gfc_add_expr_to_block (&block, stmt); @@ -5099,6 +5115,41 @@ gfc_trans_oacc_declare (gfc_code *code) return gfc_finish_block (&block); } +/* Create an OpenACC enter or exit data construct for an OpenACC declared + variable that has been allocated or deallocated. */ + +tree +gfc_trans_oacc_declare_allocate (stmtblock_t *block, gfc_expr *expr, + bool allocate) +{ + gfc_omp_clauses *clauses = gfc_get_omp_clauses (); + gfc_omp_namelist *p = gfc_get_omp_namelist (); + tree oacc_clauses, stmt; + enum tree_code construct_code; + + p->sym = expr->symtree->n.sym; + p->where = expr->where; + + if (allocate) + { + p->u.map_op = OMP_MAP_DECLARE_ALLOCATE; + construct_code = OACC_ENTER_DATA; + } + else + { + p->u.map_op = OMP_MAP_DECLARE_DEALLOCATE; + construct_code = OACC_EXIT_DATA; + } + clauses->lists[OMP_LIST_MAP] = p; + + oacc_clauses = gfc_trans_omp_clauses (block, clauses, expr->where); + stmt = build1_loc (input_location, construct_code, void_type_node, + oacc_clauses); + gfc_add_expr_to_block (block, stmt); + + return stmt; +} + tree gfc_trans_oacc_directive (gfc_code *code) { diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 795d3cc..0b1a4b4 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -6422,6 +6422,10 @@ gfc_trans_allocate (gfc_code * code) label_finish, expr, 0); else gfc_allocate_using_malloc (&se.pre, se.expr, memsz, stat); + + /* Allocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, true); } else { @@ -6894,6 +6898,10 @@ gfc_trans_deallocate (gfc_code *code) if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr))) { + if (!is_coarray + && expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + gfc_coarray_deregtype caf_dtype; if (is_coarray) @@ -6947,6 +6955,10 @@ gfc_trans_deallocate (gfc_code *code) } else { + /* Deallocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, label_finish, false, al->expr, al->expr->ts, is_coarray); diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 1813882..cefc4ec 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -780,6 +780,7 @@ bool gfc_omp_private_debug_clause (tree, bool); bool gfc_omp_private_outer_ref (tree); struct gimplify_omp_ctx; void gfc_omp_firstprivatize_type_sizes (struct gimplify_omp_ctx *, tree); +tree gfc_trans_oacc_declare_allocate (stmtblock_t *, gfc_expr *, bool); /* Runtime library function decls. */ extern GTY(()) tree gfor_fndecl_pause_numeric; diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 5fc4a66..bc5a5dd 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1196,7 +1196,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) && varpool_node::get_create (decl)->offloadable && !lookup_attribute ("omp declare target link", - DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl)) + && !is_gimple_omp_oacc (ctx->stmt)) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) @@ -7501,7 +7502,7 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) { - if (omp_is_reference (var)) + if (omp_is_reference (var) || POINTER_TYPE_P (type)) { tmp = create_tmp_var (type); gimplify_assign (tmp, build_simple_mem_ref (var), gs); @@ -7533,7 +7534,8 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) /* Like convert_to_firstprivate_int, but restore the original type. */ static tree -convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) +convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref, + gimple_seq *gs) { tree type = TREE_TYPE (var); tree new_type = NULL_TREE; @@ -7542,7 +7544,31 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) gcc_assert (TREE_CODE (var) == MEM_REF); var = TREE_OPERAND (var, 0); - if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + if (is_ref || POINTER_TYPE_P (orig_type)) + { + tree_code code = NOP_EXPR; + + if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == COMPLEX_TYPE) + code = VIEW_CONVERT_EXPR; + + if (code == VIEW_CONVERT_EXPR + && TYPE_SIZE (type) != TYPE_SIZE (orig_type)) + { + tree ptype = build_pointer_type (type); + var = fold_build1 (code, ptype, build_fold_addr_expr (var)); + var = build_simple_mem_ref (var); + } + else + var = fold_build1 (code, type, var); + + tree inst = create_tmp_var (type); + gimplify_assign (inst, var, gs); + var = build_fold_addr_expr (inst); + + return var; + } + + if (INTEGRAL_TYPE_P (var)) return fold_convert (type, var); gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); @@ -7553,16 +7579,8 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) tmp = create_tmp_var (new_type); var = fold_convert (new_type, var); gimplify_assign (tmp, var, gs); - var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); - - if (is_ref) - { - tmp = create_tmp_var (build_pointer_type (type)); - gimplify_assign (tmp, build_fold_addr_expr (var), gs); - var = tmp; - } - return var; + return fold_build1 (VIEW_CONVERT_EXPR, type, tmp); } /* Lower the GIMPLE_OMP_TARGET in the current statement @@ -7665,6 +7683,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GOMP_MAP_FORCE_DEVICEPTR: case GOMP_MAP_DEVICE_RESIDENT: case GOMP_MAP_LINK: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: gcc_assert (is_gimple_omp_oacc (stmt)); break; default: @@ -7743,7 +7763,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) && !maybe_lookup_field_in_outer_ctx (var, ctx)) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - x = convert_from_firstprivate_int (x, omp_is_reference (var), + x = convert_from_firstprivate_int (x, TREE_TYPE (new_var), + omp_is_reference (var), &fplist); gimplify_assign (new_var, x, &fplist); map_cnt++; @@ -7760,13 +7781,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_is_reference (new_var) - && TREE_CODE (var_type) != POINTER_TYPE) + /* Accelerators may not have alloca, so it's not + possible to privatize local storage for those + objects. */ + && TREE_CONSTANT (TYPE_SIZE (TREE_TYPE (var_type)))) { /* Create a local object to hold the instance value. */ const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); tree inst = create_tmp_var (TREE_TYPE (var_type), id); - gimplify_assign (inst, fold_indirect_ref (x), &fplist); + if (TREE_CODE (var_type) == POINTER_TYPE) + gimplify_assign (inst, x, &fplist); + else + gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } gimplify_assign (new_var, x, &fplist); @@ -7996,8 +8023,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); + tree new_var = lookup_decl (var, ctx); tree type = TREE_TYPE (var); - tree inner_type = omp_is_reference (var) + tree inner_type = omp_is_reference (new_var) ? TREE_TYPE (type) : type; if ((TREE_CODE (inner_type) == REAL_TYPE || (!omp_is_reference (var) diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 new file mode 100644 index 0000000..5349e0d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 @@ -0,0 +1,25 @@ +! Verify that OpenACC declared allocatable arrays have implicit +! OpenACC enter and exit pragmas at the time of allocation and +! deallocation. + +! { dg-additional-options "-fdump-tree-original" } + +program allocate + implicit none + integer, allocatable :: a(:), b + integer, parameter :: n = 100 + integer i + !$acc declare create(a,b) + + allocate (a(n), b) + + !$acc parallel loop copyout(a, b) + do i = 1, n + a(i) = b + end do + + deallocate (a, b) +end program allocate + +! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 2 "original" } } +! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 2 "original" } } diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 2c089b1..47b8aaa 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -755,6 +755,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case GOMP_MAP_LINK: pp_string (pp, "link"); break; + case GOMP_MAP_DECLARE_ALLOCATE: + pp_string (pp, "declare_allocate"); + break; + case GOMP_MAP_DECLARE_DEALLOCATE: + pp_string (pp, "declare_deallocate"); + break; default: gcc_unreachable (); } diff --git a/include/gomp-constants.h b/include/gomp-constants.h index ccfb657..9fc8767 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -40,6 +40,7 @@ #define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) #define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) #define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) +#define GOMP_MAP_FLAG_SPECIAL_4 (1 << 6) #define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ | GOMP_MAP_FLAG_SPECIAL_0) /* Flag to force a specific behavior (or else, trigger a run-time error). */ @@ -128,6 +129,11 @@ enum gomp_map_kind /* Decrement usage count and deallocate if zero. */ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_DELETE), + /* Mapping kinds for allocatable arrays. */ + GOMP_MAP_DECLARE_ALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_TO), + GOMP_MAP_DECLARE_DEALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_FROM), /* Internal to GCC, not used in libgomp. */ /* Do not map, but pointer assign a pointer instead. */ diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 3787ce4..c678a22 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -725,6 +725,34 @@ acc_update_self (void *h, size_t s) } void +gomp_acc_declare_allocate (bool allocate, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + gomp_debug (0, " %s: processing\n", __FUNCTION__); + + if (allocate) + { + assert (mapnum == 3); + + /* Allocate memory for the array data. */ + uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], sizes[0]); + + /* Update the PSET. */ + acc_update_device (hostaddrs[1], sizes[1]); + void *pset = acc_deviceptr (hostaddrs[1]); + acc_memcpy_to_device (pset, &data, sizeof (uintptr_t)); + } + else + { + /* Deallocate memory for the array data. */ + void *data = acc_deviceptr (hostaddrs[0]); + acc_free (data); + } + + gomp_debug (0, " %s: end\n", __FUNCTION__); +} + +void gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds) { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 070c5dc..f80b9a2 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -391,7 +391,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, || kind == GOMP_MAP_FORCE_PRESENT || kind == GOMP_MAP_FORCE_TO || kind == GOMP_MAP_TO - || kind == GOMP_MAP_ALLOC) + || kind == GOMP_MAP_ALLOC + || kind == GOMP_MAP_DECLARE_ALLOCATE) { data_enter = true; break; @@ -400,7 +401,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (kind == GOMP_MAP_RELEASE || kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FROM - || kind == GOMP_MAP_FORCE_FROM) + || kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_DECLARE_DEALLOCATE) break; gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -429,6 +431,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, { switch (kind) { + case GOMP_MAP_DECLARE_ALLOCATE: case GOMP_MAP_ALLOC: acc_present_or_create (hostaddrs[i], sizes[i]); break; @@ -449,8 +452,12 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i]); + if (kind == GOMP_MAP_DECLARE_ALLOCATE) + gomp_acc_declare_allocate (true, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else + gomp_acc_insert_pointer (pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -480,6 +487,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, acc_delete (hostaddrs[i], sizes[i]); } break; + case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: if (finalize) @@ -495,10 +503,16 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - bool copyfrom = (kind == GOMP_MAP_FORCE_FROM - || kind == GOMP_MAP_FROM); - gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, - finalize, pointer); + if (kind == GOMP_MAP_DECLARE_DEALLOCATE) + gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else + { + bool copyfrom = (kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_FROM); + gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, + async, finalize, pointer); + } /* See the above comment. */ i += pointer - 1; } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 new file mode 100644 index 0000000..3758031 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 @@ -0,0 +1,30 @@ +! Ensure that dummy arguments of allocatable arrays don't cause +! "libgomp: [...] is not mapped" errors. + +! { dg-do run } + +program main + integer, parameter :: n = 40 + integer, allocatable :: ar(:,:,:) + integer :: i + + allocate (ar(1:n,0:n-1,0:n-1)) + !$acc enter data copyin (ar) + + !$acc update host (ar) + + !$acc update device (ar) + + call update_ar (ar, n) + + !$acc exit data copyout (ar) +end program main + +subroutine update_ar (ar, n) + integer :: n + integer, dimension (1:n,0:n-1,0:n-1) :: ar + + !$acc update host (ar) + + !$acc update device (ar) +end subroutine update_ar diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 new file mode 100644 index 0000000..be86d14 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 @@ -0,0 +1,33 @@ +! Test non-declared allocatable scalars in OpenACC data clauses. + +! { dg-do run } + +program main + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + + allocate (a) + + a = 50 + + !$acc parallel loop + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + allocate (c) + + !$acc parallel copyout(c) num_gangs(1) + c = a + !$acc end parallel + + if (c /= a) call abort + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 new file mode 100644 index 0000000..d68b124 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 @@ -0,0 +1,211 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: b(:) + !$acc declare create (b) +end module vars + +program test + use vars + use openacc + implicit none + real*8 :: a + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (b)) call abort + + ! Test local usage of an allocated declared array. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + a = 2.0 + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*2) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) call abort + end do + + deallocate (b) + + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) call abort + + do i = 1, n + if (b(i) /= i*i) call abort + end do + + deallocate (b) +end program test + +! Set each element in array 'b' at index i to i*2. + +subroutine sub1 + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * i; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * i +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 new file mode 100644 index 0000000..3521a7f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 @@ -0,0 +1,48 @@ +! Test declare create with allocatable scalars. + +! { dg-do run } + +program main + use openacc + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + !$acc declare create (c) + + allocate (a) + + a = 50 + + !$acc parallel loop firstprivate(a) + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + allocate (c) + a = 100 + + if (.not.acc_is_present(c)) call abort + + !$acc parallel num_gangs(1) present(c) + c = a + !$acc end parallel + + !$acc update host(c) + if (c /= a) call abort + + !$acc parallel loop + do i = 1, n + b(i) = c + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 new file mode 100644 index 0000000..5d12d75 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 @@ -0,0 +1,218 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) call abort + if (allocated (b)) call abort + + ! Test local usage of an allocated declared array. + + allocate (a) + + if (.not.allocated (a)) call abort + if (acc_is_present (a) .neqv. .true.) call abort + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + a = 2.0 + !$acc update device(a) + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= a+i*2) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) call abort + end do + + deallocate (b) + + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) call abort + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (a) + deallocate (b) +end program test + +! Set each element in array 'b' at index i to a+i*2. + +subroutine sub1 + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = a+i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * a; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * a +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 new file mode 100644 index 0000000..b4cf26e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 @@ -0,0 +1,66 @@ +! Test declare create with allocatable arrays and scalars. The unused +! declared array 'b' caused an ICE in the past. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + implicit none + integer :: i + + interface + subroutine sub1 + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) call abort + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (a) + allocate (b(n)) + + if (.not.allocated (b)) call abort + + call sub1 + + !$acc update self(a) + if (a /= 50) call abort + + deallocate (a) + deallocate (b) + +end program test + +! Set 'a' to 50. + +subroutine sub1 + use vars + implicit none + integer i + + a = 50 + !$acc update device(a) +end subroutine sub1
On Sat, 22 Sep 2018 at 00:32, Julian Brown <julian@codesourcery.com> wrote: > > On Fri, 21 Sep 2018 03:14:22 +0200 > Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote: > > > > diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c > > > index 95ea615..2ac5908 100644 > > > --- a/gcc/fortran/trans-array.c > > > +++ b/gcc/fortran/trans-array.c > > > @@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see > > > #include "trans-types.h" > > > #include "trans-array.h" > > > #include "trans-const.h" > > > +#include "trans-stmt.h" > > > #include "dependency.h" > > > > please dont mix declarations and definitions, i.e. please put > > gfc_trans_oacc_declare_allocate() into trans-openmp.c, and add the > > declaration to trans.h, in the corresponding /* In trans-openmp.c */ > > block there. > > Do you mean like this? yes. @@ -6218,13 +6221,20 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op) { gfc_omp_namelist *n; + if (!module_oacc_clauses) + module_oacc_clauses = gfc_get_omp_clauses (); + + if (sym->backend_decl == NULL) + gfc_get_symbol_decl (sym); + + for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = n->next) + if (n->sym->backend_decl == sym->backend_decl) + return; + Didn't look too close, but should this throw an error instead of silently returning, or was the error emitted earlier? Furthermore the testcase uses "call abort" which is non-standard. We recently moved to "STOP n" in the testsuite, please adjust the new testcases accordingly. Like (modulo typos, untested): $ cat abort_to_stop.awk ; echo EOF # awk -f ./abort_to_stop.awk < foo.f90 > x && mv x foo.f90 BEGIN { IGNORECASE = 1; i = 1 } { while (sub(/call\s\s*abort/, "stop " i)) {let i++;}; print $0; } EOF thanks,
On Sun, 23 Sep 2018 10:48:52 +0200 Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote: > On Sat, 22 Sep 2018 at 00:32, Julian Brown <julian@codesourcery.com> > wrote: > > @@ -6218,13 +6221,20 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op > map_op) { > gfc_omp_namelist *n; > > + if (!module_oacc_clauses) > + module_oacc_clauses = gfc_get_omp_clauses (); > + > + if (sym->backend_decl == NULL) > + gfc_get_symbol_decl (sym); > + > + for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = > n->next) > + if (n->sym->backend_decl == sym->backend_decl) > + return; > + > > Didn't look too close, but should this throw an error instead of > silently returning, or was the error emitted earlier? The purpose of this fragment seems not to have been to do with error reporting at all, but rather to do with de-duplicating symbols that are listed (once) in clauses of "declare" directives in module blocks. Variables that are listed twice are diagnosed elsewhere. As for why the de-duplication is necessary, it seems to be because of the way that modules are instantiated in programs and in subroutines. E.g. in declare-allocatable-1.f90, we have something along the lines of: module vars implicit none integer, parameter :: n = 100 real*8, allocatable :: b(:) !$acc declare create (b) end module vars program test use vars ... end program test subroutine sub1 use vars ... end subroutine sub1 subroutine sub2 use vars ... end subroutine sub2 The function find_module_oacc_declare_clauses is called for each of 'test', 'sub1' and 'sub2'. But in trans-decl.c:finish_oacc_declare, the new declare clauses are only attached to the namespace for a FL_PROGRAM (i.e. 'test'), not for the subroutines. The module_oacc_clauses global variable is reset only after moving the clauses to a FL_PROGRAM's namespace, otherwise it accumulates. Hence, with the above code, we'd scan 'test', find declare clauses, and attach them to the namespace for 'test'. We'd then reset module_oacc_clauses. Then, we'd scan 'sub1', and accumulate declare clauses from 'vars' into a fresh module_oacc_clauses. Then we'd scan 'sub2', and accumulate declare clauses from 'vars' again: this is why the de-duplication in the patch seemed to be necessary. This seems wrong to me though, and admits the possibility of clauses instantiated in a subroutine "leaking" into a subsequent program block. As a tentative fix, I've tried resetting module_oacc_clauses before each time the find_module_oacc_declare_clauses traversal takes place, and removing the de-duplication code. This seems to work fine for the current tests in the testsuite, but I wonder the reason that things weren't done like like that to start with? The code dates back to 2015 (by James Norris): https://gcc.gnu.org/ml/gcc-patches/2015-11/msg02367.html > Furthermore the testcase uses "call abort" which is non-standard. > We recently moved to "STOP n" in the testsuite, please adjust the new > testcases accordingly. Fixed. Re-tested with offloading to NVPTX and bootstrapped. OK? Thank you, Julian ChangeLog gcc/ * omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare create, declare copyin and declare deviceptr to have local lifetimes. (convert_to_firstprivate_int): Handle pointer types. (convert_from_firstprivate_int): Likewise. Create local storage for the values being pointed to. Add new orig_type argument. (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. Add orig_type argument to convert_from_firstprivate_int call. Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize firstprivate VLAs. * tree-pretty-print.c (dump_omp_clause): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. gcc/fortran/ * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. (gfc_omp_clauses): Add update_allocatable. * trans-array.c (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate for decls that have oacc_declare_create attribute set. * trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC declare create, declare copyin and declare deviceptr clauses. (find_module_oacc_declare_clauses): Relax oacc_declare_create to OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to match OpenACC 2.5 semantics. (finish_oacc_declare): Reset module_oacc_clauses before scanning each namespace. * trans-openmp.c (gfc_trans_omp_clauses): Use GOMP_MAP_ALWAYS_POINTER (for update directive) or GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for allocatable scalar decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} clauses. (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER for allocatable scalar data clauses inside acc update directives. (gfc_trans_oacc_declare_allocate): New function. * trans-stmt.c (gfc_trans_allocate): Call gfc_trans_oacc_declare_allocate for decls with oacc_declare_create attribute set. (gfc_trans_deallocate): Likewise. * trans.h (gfc_trans_oacc_declare_allocate): Declare. gcc/testsuite/ * gfortran.dg/goacc/declare-allocatable-1.f90: New test. include/ * gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and GOMP_MAP_FLAG_SPECIAL_4. libgomp/ * oacc-mem.c (gomp_acc_declare_allocate): New function. * oacc-parallel.c (GOACC_enter_exit_data): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. * testsuite/libgomp.oacc-fortran/allocatable-array.f90: New test. * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. commit fc14b1a9ba30338170a623be83e25f23497162d1 Author: Julian Brown <julian@codesourcery.com> Date: Wed Sep 12 20:15:08 2018 -0700 Fortran "declare create"/allocate support for OpenACC gcc/ * omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare create, declare copyin and declare deviceptr to have local lifetimes. (convert_to_firstprivate_int): Handle pointer types. (convert_from_firstprivate_int): Likewise. Create local storage for the values being pointed to. Add new orig_type argument. (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. Add orig_type argument to convert_from_firstprivate_int call. Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize firstprivate VLAs. * tree-pretty-print.c (dump_omp_clause): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. gcc/fortran/ * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. (gfc_omp_clauses): Add update_allocatable. * trans-array.c (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate for decls that have oacc_declare_create attribute set. * trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC declare create, declare copyin and declare deviceptr clauses. (find_module_oacc_declare_clauses): Relax oacc_declare_create to OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to match OpenACC 2.5 semantics. (finish_oacc_declare): Reset module_oacc_clauses before scanning each namespace. * trans-openmp.c (gfc_trans_omp_clauses): Use GOMP_MAP_ALWAYS_POINTER (for update directive) or GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for allocatable scalar decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} clauses. (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER for allocatable scalar data clauses inside acc update directives. (gfc_trans_oacc_declare_allocate): New function. * trans-stmt.c (gfc_trans_allocate): Call gfc_trans_oacc_declare_allocate for decls with oacc_declare_create attribute set. (gfc_trans_deallocate): Likewise. * trans.h (gfc_trans_oacc_declare_allocate): Declare. gcc/testsuite/ * gfortran.dg/goacc/declare-allocatable-1.f90: New test. include/ * gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and GOMP_MAP_FLAG_SPECIAL_4. libgomp/ * oacc-mem.c (gomp_acc_declare_allocate): New function. * oacc-parallel.c (GOACC_enter_exit_data): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. * testsuite/libgomp.oacc-fortran/allocatable-array.f90: New test. * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index b0518e2..48dc3d5 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1188,7 +1188,9 @@ enum gfc_omp_map_op OMP_MAP_RELEASE, OMP_MAP_ALWAYS_TO, OMP_MAP_ALWAYS_FROM, - OMP_MAP_ALWAYS_TOFROM + OMP_MAP_ALWAYS_TOFROM, + OMP_MAP_DECLARE_ALLOCATE, + OMP_MAP_DECLARE_DEALLOCATE }; enum gfc_omp_linear_op @@ -1344,7 +1346,7 @@ typedef struct gfc_omp_clauses gfc_expr_list *tile_list; unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; unsigned wait:1, par_auto:1, gang_static:1; - unsigned if_present:1, finalize:1; + unsigned if_present:1, finalize:1, update_allocatable:1; locus loc; } diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 0d699ed..0f88241 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -5670,6 +5670,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_ref *ref, *prev_ref = NULL, *coref; bool allocatable, coarray, dimension, alloc_w_e3_arr_spec = false, non_ulimate_coarray_ptr_comp; + bool oacc_declare = false; ref = expr->ref; @@ -5684,6 +5685,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, allocatable = expr->symtree->n.sym->attr.allocatable; dimension = expr->symtree->n.sym->attr.dimension; non_ulimate_coarray_ptr_comp = false; + oacc_declare = expr->symtree->n.sym->attr.oacc_declare_create; } else { @@ -5845,7 +5847,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, /* Update the array descriptors. */ if (dimension) - gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + { + gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + + if (oacc_declare) + gfc_trans_oacc_declare_allocate (&set_descriptor_block, expr, true); + } /* Pointer arrays need the span field to be set. */ if (is_pointer_array (se->expr) diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 159c3db..37924d5 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1399,7 +1399,10 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list) if (sym_attr.omp_declare_target_link) list = tree_cons (get_identifier ("omp declare target link"), NULL_TREE, list); - else if (sym_attr.omp_declare_target) + else if (sym_attr.omp_declare_target + || sym_attr.oacc_declare_create + || sym_attr.oacc_declare_copyin + || sym_attr.oacc_declare_deviceptr) list = tree_cons (get_identifier ("omp declare target"), NULL_TREE, list); @@ -6240,10 +6243,10 @@ find_module_oacc_declare_clauses (gfc_symbol *sym) gfc_omp_map_op map_op; if (sym->attr.oacc_declare_create) - map_op = OMP_MAP_FORCE_ALLOC; + map_op = OMP_MAP_ALLOC; if (sym->attr.oacc_declare_copyin) - map_op = OMP_MAP_FORCE_TO; + map_op = OMP_MAP_TO; if (sym->attr.oacc_declare_deviceptr) map_op = OMP_MAP_FORCE_DEVICEPTR; @@ -6272,6 +6275,8 @@ finish_oacc_declare (gfc_namespace *ns, gfc_symbol *sym, bool block) gfc_omp_clauses *omp_clauses = NULL; gfc_omp_namelist *n, *p; + module_oacc_clauses = NULL; + gfc_traverse_ns (ns, find_module_oacc_declare_clauses); if (module_oacc_clauses && sym->attr.flavor == FL_PROGRAM) @@ -6283,7 +6288,6 @@ finish_oacc_declare (gfc_namespace *ns, gfc_symbol *sym, bool block) new_oc->clauses = module_oacc_clauses; ns->oacc_declare = new_oc; - module_oacc_clauses = NULL; } if (!ns->oacc_declare) diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index f038f4c..e18c0af 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -2119,9 +2119,18 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, (TREE_TYPE (TREE_TYPE (decl))))) { tree orig_decl = decl; + enum gomp_map_kind gmk = GOMP_MAP_POINTER; + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) + && n->sym->attr.oacc_declare_create) + { + if (clauses->update_allocatable) + gmk = GOMP_MAP_ALWAYS_POINTER; + else + gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; + } node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node4, gmk); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -2330,6 +2339,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, case OMP_MAP_FORCE_DEVICEPTR: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEVICEPTR); break; + case OMP_MAP_DECLARE_ALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_ALLOCATE); + break; + case OMP_MAP_DECLARE_DEALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_DEALLOCATE); + break; default: gcc_unreachable (); } @@ -3082,12 +3097,14 @@ gfc_trans_oacc_executable_directive (gfc_code *code) { stmtblock_t block; tree stmt, oacc_clauses; + gfc_omp_clauses *clauses = code->ext.omp_clauses; enum tree_code construct_code; switch (code->op) { case EXEC_OACC_UPDATE: construct_code = OACC_UPDATE; + clauses->update_allocatable = 1; break; case EXEC_OACC_ENTER_DATA: construct_code = OACC_ENTER_DATA; @@ -3103,8 +3120,7 @@ gfc_trans_oacc_executable_directive (gfc_code *code) } gfc_start_block (&block); - oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, - code->loc); + oacc_clauses = gfc_trans_omp_clauses (&block, clauses, code->loc); stmt = build1_loc (input_location, construct_code, void_type_node, oacc_clauses); gfc_add_expr_to_block (&block, stmt); @@ -5099,6 +5115,41 @@ gfc_trans_oacc_declare (gfc_code *code) return gfc_finish_block (&block); } +/* Create an OpenACC enter or exit data construct for an OpenACC declared + variable that has been allocated or deallocated. */ + +tree +gfc_trans_oacc_declare_allocate (stmtblock_t *block, gfc_expr *expr, + bool allocate) +{ + gfc_omp_clauses *clauses = gfc_get_omp_clauses (); + gfc_omp_namelist *p = gfc_get_omp_namelist (); + tree oacc_clauses, stmt; + enum tree_code construct_code; + + p->sym = expr->symtree->n.sym; + p->where = expr->where; + + if (allocate) + { + p->u.map_op = OMP_MAP_DECLARE_ALLOCATE; + construct_code = OACC_ENTER_DATA; + } + else + { + p->u.map_op = OMP_MAP_DECLARE_DEALLOCATE; + construct_code = OACC_EXIT_DATA; + } + clauses->lists[OMP_LIST_MAP] = p; + + oacc_clauses = gfc_trans_omp_clauses (block, clauses, expr->where); + stmt = build1_loc (input_location, construct_code, void_type_node, + oacc_clauses); + gfc_add_expr_to_block (block, stmt); + + return stmt; +} + tree gfc_trans_oacc_directive (gfc_code *code) { diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index ef9e519..9d61777 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -6421,6 +6421,10 @@ gfc_trans_allocate (gfc_code * code) label_finish, expr, 0); else gfc_allocate_using_malloc (&se.pre, se.expr, memsz, stat); + + /* Allocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, true); } else { @@ -6893,6 +6897,10 @@ gfc_trans_deallocate (gfc_code *code) if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr))) { + if (!is_coarray + && expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + gfc_coarray_deregtype caf_dtype; if (is_coarray) @@ -6946,6 +6954,10 @@ gfc_trans_deallocate (gfc_code *code) } else { + /* Deallocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, label_finish, false, al->expr, al->expr->ts, is_coarray); diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 4f33a89..f2677df 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -781,6 +781,7 @@ bool gfc_omp_private_debug_clause (tree, bool); bool gfc_omp_private_outer_ref (tree); struct gimplify_omp_ctx; void gfc_omp_firstprivatize_type_sizes (struct gimplify_omp_ctx *, tree); +tree gfc_trans_oacc_declare_allocate (stmtblock_t *, gfc_expr *, bool); /* Runtime library function decls. */ extern GTY(()) tree gfor_fndecl_pause_numeric; diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 5fc4a66..bc5a5dd 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1196,7 +1196,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) && varpool_node::get_create (decl)->offloadable && !lookup_attribute ("omp declare target link", - DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl)) + && !is_gimple_omp_oacc (ctx->stmt)) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) @@ -7501,7 +7502,7 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) { - if (omp_is_reference (var)) + if (omp_is_reference (var) || POINTER_TYPE_P (type)) { tmp = create_tmp_var (type); gimplify_assign (tmp, build_simple_mem_ref (var), gs); @@ -7533,7 +7534,8 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) /* Like convert_to_firstprivate_int, but restore the original type. */ static tree -convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) +convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref, + gimple_seq *gs) { tree type = TREE_TYPE (var); tree new_type = NULL_TREE; @@ -7542,7 +7544,31 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) gcc_assert (TREE_CODE (var) == MEM_REF); var = TREE_OPERAND (var, 0); - if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + if (is_ref || POINTER_TYPE_P (orig_type)) + { + tree_code code = NOP_EXPR; + + if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == COMPLEX_TYPE) + code = VIEW_CONVERT_EXPR; + + if (code == VIEW_CONVERT_EXPR + && TYPE_SIZE (type) != TYPE_SIZE (orig_type)) + { + tree ptype = build_pointer_type (type); + var = fold_build1 (code, ptype, build_fold_addr_expr (var)); + var = build_simple_mem_ref (var); + } + else + var = fold_build1 (code, type, var); + + tree inst = create_tmp_var (type); + gimplify_assign (inst, var, gs); + var = build_fold_addr_expr (inst); + + return var; + } + + if (INTEGRAL_TYPE_P (var)) return fold_convert (type, var); gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); @@ -7553,16 +7579,8 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) tmp = create_tmp_var (new_type); var = fold_convert (new_type, var); gimplify_assign (tmp, var, gs); - var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); - - if (is_ref) - { - tmp = create_tmp_var (build_pointer_type (type)); - gimplify_assign (tmp, build_fold_addr_expr (var), gs); - var = tmp; - } - return var; + return fold_build1 (VIEW_CONVERT_EXPR, type, tmp); } /* Lower the GIMPLE_OMP_TARGET in the current statement @@ -7665,6 +7683,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GOMP_MAP_FORCE_DEVICEPTR: case GOMP_MAP_DEVICE_RESIDENT: case GOMP_MAP_LINK: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: gcc_assert (is_gimple_omp_oacc (stmt)); break; default: @@ -7743,7 +7763,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) && !maybe_lookup_field_in_outer_ctx (var, ctx)) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - x = convert_from_firstprivate_int (x, omp_is_reference (var), + x = convert_from_firstprivate_int (x, TREE_TYPE (new_var), + omp_is_reference (var), &fplist); gimplify_assign (new_var, x, &fplist); map_cnt++; @@ -7760,13 +7781,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_is_reference (new_var) - && TREE_CODE (var_type) != POINTER_TYPE) + /* Accelerators may not have alloca, so it's not + possible to privatize local storage for those + objects. */ + && TREE_CONSTANT (TYPE_SIZE (TREE_TYPE (var_type)))) { /* Create a local object to hold the instance value. */ const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); tree inst = create_tmp_var (TREE_TYPE (var_type), id); - gimplify_assign (inst, fold_indirect_ref (x), &fplist); + if (TREE_CODE (var_type) == POINTER_TYPE) + gimplify_assign (inst, x, &fplist); + else + gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } gimplify_assign (new_var, x, &fplist); @@ -7996,8 +8023,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); + tree new_var = lookup_decl (var, ctx); tree type = TREE_TYPE (var); - tree inner_type = omp_is_reference (var) + tree inner_type = omp_is_reference (new_var) ? TREE_TYPE (type) : type; if ((TREE_CODE (inner_type) == REAL_TYPE || (!omp_is_reference (var) diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 new file mode 100644 index 0000000..5349e0d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 @@ -0,0 +1,25 @@ +! Verify that OpenACC declared allocatable arrays have implicit +! OpenACC enter and exit pragmas at the time of allocation and +! deallocation. + +! { dg-additional-options "-fdump-tree-original" } + +program allocate + implicit none + integer, allocatable :: a(:), b + integer, parameter :: n = 100 + integer i + !$acc declare create(a,b) + + allocate (a(n), b) + + !$acc parallel loop copyout(a, b) + do i = 1, n + a(i) = b + end do + + deallocate (a, b) +end program allocate + +! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 2 "original" } } +! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 2 "original" } } diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 75b939a..0bbd91a 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -755,6 +755,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case GOMP_MAP_LINK: pp_string (pp, "link"); break; + case GOMP_MAP_DECLARE_ALLOCATE: + pp_string (pp, "declare_allocate"); + break; + case GOMP_MAP_DECLARE_DEALLOCATE: + pp_string (pp, "declare_deallocate"); + break; default: gcc_unreachable (); } diff --git a/include/gomp-constants.h b/include/gomp-constants.h index ccfb657..9fc8767 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -40,6 +40,7 @@ #define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) #define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) #define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) +#define GOMP_MAP_FLAG_SPECIAL_4 (1 << 6) #define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ | GOMP_MAP_FLAG_SPECIAL_0) /* Flag to force a specific behavior (or else, trigger a run-time error). */ @@ -128,6 +129,11 @@ enum gomp_map_kind /* Decrement usage count and deallocate if zero. */ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_DELETE), + /* Mapping kinds for allocatable arrays. */ + GOMP_MAP_DECLARE_ALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_TO), + GOMP_MAP_DECLARE_DEALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_FROM), /* Internal to GCC, not used in libgomp. */ /* Do not map, but pointer assign a pointer instead. */ diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 3787ce4..c678a22 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -725,6 +725,34 @@ acc_update_self (void *h, size_t s) } void +gomp_acc_declare_allocate (bool allocate, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + gomp_debug (0, " %s: processing\n", __FUNCTION__); + + if (allocate) + { + assert (mapnum == 3); + + /* Allocate memory for the array data. */ + uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], sizes[0]); + + /* Update the PSET. */ + acc_update_device (hostaddrs[1], sizes[1]); + void *pset = acc_deviceptr (hostaddrs[1]); + acc_memcpy_to_device (pset, &data, sizeof (uintptr_t)); + } + else + { + /* Deallocate memory for the array data. */ + void *data = acc_deviceptr (hostaddrs[0]); + acc_free (data); + } + + gomp_debug (0, " %s: end\n", __FUNCTION__); +} + +void gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds) { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 070c5dc..f80b9a2 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -391,7 +391,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, || kind == GOMP_MAP_FORCE_PRESENT || kind == GOMP_MAP_FORCE_TO || kind == GOMP_MAP_TO - || kind == GOMP_MAP_ALLOC) + || kind == GOMP_MAP_ALLOC + || kind == GOMP_MAP_DECLARE_ALLOCATE) { data_enter = true; break; @@ -400,7 +401,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (kind == GOMP_MAP_RELEASE || kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FROM - || kind == GOMP_MAP_FORCE_FROM) + || kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_DECLARE_DEALLOCATE) break; gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -429,6 +431,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, { switch (kind) { + case GOMP_MAP_DECLARE_ALLOCATE: case GOMP_MAP_ALLOC: acc_present_or_create (hostaddrs[i], sizes[i]); break; @@ -449,8 +452,12 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i]); + if (kind == GOMP_MAP_DECLARE_ALLOCATE) + gomp_acc_declare_allocate (true, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else + gomp_acc_insert_pointer (pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -480,6 +487,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, acc_delete (hostaddrs[i], sizes[i]); } break; + case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: if (finalize) @@ -495,10 +503,16 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - bool copyfrom = (kind == GOMP_MAP_FORCE_FROM - || kind == GOMP_MAP_FROM); - gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, - finalize, pointer); + if (kind == GOMP_MAP_DECLARE_DEALLOCATE) + gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else + { + bool copyfrom = (kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_FROM); + gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, + async, finalize, pointer); + } /* See the above comment. */ i += pointer - 1; } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 new file mode 100644 index 0000000..3758031 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 @@ -0,0 +1,30 @@ +! Ensure that dummy arguments of allocatable arrays don't cause +! "libgomp: [...] is not mapped" errors. + +! { dg-do run } + +program main + integer, parameter :: n = 40 + integer, allocatable :: ar(:,:,:) + integer :: i + + allocate (ar(1:n,0:n-1,0:n-1)) + !$acc enter data copyin (ar) + + !$acc update host (ar) + + !$acc update device (ar) + + call update_ar (ar, n) + + !$acc exit data copyout (ar) +end program main + +subroutine update_ar (ar, n) + integer :: n + integer, dimension (1:n,0:n-1,0:n-1) :: ar + + !$acc update host (ar) + + !$acc update device (ar) +end subroutine update_ar diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 new file mode 100644 index 0000000..42b3408 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 @@ -0,0 +1,33 @@ +! Test non-declared allocatable scalars in OpenACC data clauses. + +! { dg-do run } + +program main + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + + allocate (a) + + a = 50 + + !$acc parallel loop + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) stop 1 + end do + + allocate (c) + + !$acc parallel copyout(c) num_gangs(1) + c = a + !$acc end parallel + + if (c /= a) stop 2 + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 new file mode 100644 index 0000000..6482698 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 @@ -0,0 +1,211 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: b(:) + !$acc declare create (b) +end module vars + +program test + use vars + use openacc + implicit none + real*8 :: a + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (b)) stop 1 + + ! Test local usage of an allocated declared array. + + allocate (b(n)) + + if (.not.allocated (b)) stop 2 + if (acc_is_present (b) .neqv. .true.) stop 3 + + a = 2.0 + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) stop 4 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) stop 5 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) stop 6 + if (acc_is_present (b) .neqv. .true.) stop 7 + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) stop 8 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*2) stop 9 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) stop 10 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) stop 11 + end do + + deallocate (b) + + if (allocated (b)) stop 12 + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) stop 13 + if (acc_is_present (b) .neqv. .true.) stop 14 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) stop 15 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) stop 16 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) stop 17 + if (acc_is_present (b) .neqv. .true.) stop 18 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) stop 19 + + do i = 1, n + if (b(i) /= i*i) stop 20 + end do + + deallocate (b) +end program test + +! Set each element in array 'b' at index i to i*2. + +subroutine sub1 + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) stop 21 + if (acc_is_present (b) .neqv. .true.) stop 22 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * i; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * i +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 new file mode 100644 index 0000000..df5ab26 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 @@ -0,0 +1,48 @@ +! Test declare create with allocatable scalars. + +! { dg-do run } + +program main + use openacc + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + !$acc declare create (c) + + allocate (a) + + a = 50 + + !$acc parallel loop firstprivate(a) + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) stop 1 + end do + + allocate (c) + a = 100 + + if (.not.acc_is_present(c)) stop 2 + + !$acc parallel num_gangs(1) present(c) + c = a + !$acc end parallel + + !$acc update host(c) + if (c /= a) stop 3 + + !$acc parallel loop + do i = 1, n + b(i) = c + end do + + do i = 1, n + if (b(i) /= a) stop 4 + end do + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 new file mode 100644 index 0000000..ec915aa --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 @@ -0,0 +1,218 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) stop 1 + if (allocated (b)) stop 2 + + ! Test local usage of an allocated declared array. + + allocate (a) + + if (.not.allocated (a)) stop 3 + if (acc_is_present (a) .neqv. .true.) stop 4 + + allocate (b(n)) + + if (.not.allocated (b)) stop 5 + if (acc_is_present (b) .neqv. .true.) stop 6 + + a = 2.0 + !$acc update device(a) + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) stop 7 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) stop 8 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) stop 9 + if (acc_is_present (b) .neqv. .true.) stop 10 + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) stop 11 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= a+i*2) stop 12 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) stop 13 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) stop 14 + end do + + deallocate (b) + + if (allocated (b)) stop 15 + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) stop 16 + if (acc_is_present (b) .neqv. .true.) stop 17 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) stop 18 + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) stop 19 + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) stop 20 + if (acc_is_present (b) .neqv. .true.) stop 21 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) stop 22 + + do i = 1, n + if (b(i) /= i*a) stop 23 + end do + + deallocate (a) + deallocate (b) +end program test + +! Set each element in array 'b' at index i to a+i*2. + +subroutine sub1 + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = a+i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) stop 24 + if (acc_is_present (b) .neqv. .true.) stop 25 + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * a; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * a +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 new file mode 100644 index 0000000..afbe52f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 @@ -0,0 +1,66 @@ +! Test declare create with allocatable arrays and scalars. The unused +! declared array 'b' caused an ICE in the past. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + implicit none + integer :: i + + interface + subroutine sub1 + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) stop 1 + if (allocated (b)) stop 2 + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (a) + allocate (b(n)) + + if (.not.allocated (b)) stop 3 + + call sub1 + + !$acc update self(a) + if (a /= 50) stop 4 + + deallocate (a) + deallocate (b) + +end program test + +! Set 'a' to 50. + +subroutine sub1 + use vars + implicit none + integer i + + a = 50 + !$acc update device(a) +end subroutine sub1
On Thu, Oct 04, 2018 at 02:04:13PM +0100, Julian Brown wrote: > gcc/ > * omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare > create, declare copyin and declare deviceptr to have local lifetimes. > (convert_to_firstprivate_int): Handle pointer types. > (convert_from_firstprivate_int): Likewise. Create local storage for > the values being pointed to. Add new orig_type argument. > (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. > Add orig_type argument to convert_from_firstprivate_int call. > Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize > firstprivate VLAs. > * tree-pretty-print.c (dump_omp_clause): Handle > GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. > > gcc/fortran/ > * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, > OMP_MAP_DECLARE_DEALLOCATE. > (gfc_omp_clauses): Add update_allocatable. > * trans-array.c (gfc_array_allocate): Call > gfc_trans_oacc_declare_allocate for decls that have oacc_declare_create > attribute set. > * trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC > declare create, declare copyin and declare deviceptr clauses. > (find_module_oacc_declare_clauses): Relax oacc_declare_create to > OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to > match OpenACC 2.5 semantics. > (finish_oacc_declare): Reset module_oacc_clauses before scanning each > namespace. > * trans-openmp.c (gfc_trans_omp_clauses): Use GOMP_MAP_ALWAYS_POINTER > (for update directive) or GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for > allocatable scalar decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} > clauses. > (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER > for allocatable scalar data clauses inside acc update directives. > (gfc_trans_oacc_declare_allocate): New function. > * trans-stmt.c (gfc_trans_allocate): Call > gfc_trans_oacc_declare_allocate for decls with oacc_declare_create > attribute set. > (gfc_trans_deallocate): Likewise. > * trans.h (gfc_trans_oacc_declare_allocate): Declare. > > gcc/testsuite/ > * gfortran.dg/goacc/declare-allocatable-1.f90: New test. > > include/ > * gomp-constants.h (enum gomp_map_kind): Define > GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and GOMP_MAP_FLAG_SPECIAL_4. > > libgomp/ > * oacc-mem.c (gomp_acc_declare_allocate): New function. > * oacc-parallel.c (GOACC_enter_exit_data): Handle > GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. > * testsuite/libgomp.oacc-fortran/allocatable-array.f90: New test. > * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. > * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. If Thomas is ok with this, it is fine for me. Jakub
From b63d0329fb73679b07f6318b8dd092113d5c8505 Mon Sep 17 00:00:00 2001 From: Julian Brown <julian@codesourcery.com> Date: Wed, 12 Sep 2018 20:15:08 -0700 Subject: [PATCH 2/2] Fortran "declare create"/allocate support for OpenACC gcc/ * omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare create, declare copyin and declare deviceptr to have local lifetimes. (convert_to_firstprivate_int): Handle pointer types. (convert_from_firstprivate_int): Likewise. Create local storage for the values being pointed to. Add new orig_type argument. (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. Add orig_type argument to convert_from_firstprivate_int call. Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. Don't privatize firstprivate VLAs. * tree-pretty-print.c (dump_omp_clause): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. gcc/fortran/ * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. (gfc_omp_clauses): Add update_allocatable. * trans-array.c (trans-stmt.h): Include. (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate for decls that have oacc_declare_create attribute set. * trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC declare create, declare copyin and declare deviceptr clauses. (add_clause): Don't duplicate OpenACC declare clauses. Populate sym->backend_decl so that it can be used to determine if two symbols are unique. (find_module_oacc_declare_clauses): Relax oacc_declare_create to OMP_MAP_ALLOC, and oacc_declare_copyin to OMP_MAP_TO, in order to match OpenACC 2.5 semantics. * trans-openmp.c (gfc_trans_omp_clauses): Use GOMP_MAP_ALWAYS_POINTER (for update directive) or GOMP_MAP_FIRSTPRIVATE_POINTER (otherwise) for allocatable scalar decls. Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} clauses. (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER for allocatable scalar data clauses inside acc update directives. (gfc_trans_oacc_declare_allocate): New function. * trans-stmt.c (gfc_trans_allocate): Call gfc_trans_oacc_declare_allocate for decls with oacc_declare_create attribute set. (gfc_trans_deallocate): Likewise. * trans-stmt.h (gfc_trans_oacc_declare_allocate): Declare. gcc/testsuite/ * gfortran.dg/goacc/declare-allocatable-1.f90: New test. include/ * gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} and GOMP_MAP_FLAG_SPECIAL_4. libgomp/ * oacc-mem.c (gomp_acc_declare_allocate): New function. * oacc-parallel.c (GOACC_enter_exit_data): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. * testsuite/libgomp.oacc-fortran/allocatable-array.f90: New test. * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. --- gcc/fortran/gfortran.h | 6 +- gcc/fortran/trans-array.c | 10 +- gcc/fortran/trans-decl.c | 22 ++- gcc/fortran/trans-openmp.c | 57 +++++- gcc/fortran/trans-stmt.c | 12 ++ gcc/fortran/trans-stmt.h | 1 + gcc/omp-low.c | 62 ++++-- .../gfortran.dg/goacc/declare-allocatable-1.f90 | 25 +++ gcc/tree-pretty-print.c | 6 + include/gomp-constants.h | 6 + libgomp/oacc-mem.c | 28 +++ libgomp/oacc-parallel.c | 30 ++- .../libgomp.oacc-fortran/allocatable-array-1.f90 | 30 +++ .../libgomp.oacc-fortran/allocatable-scalar.f90 | 33 ++++ .../libgomp.oacc-fortran/declare-allocatable-1.f90 | 211 ++++++++++++++++++++ .../libgomp.oacc-fortran/declare-allocatable-2.f90 | 48 +++++ .../libgomp.oacc-fortran/declare-allocatable-3.f90 | 218 +++++++++++++++++++++ .../libgomp.oacc-fortran/declare-allocatable-4.f90 | 66 +++++++ 18 files changed, 834 insertions(+), 37 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 3359974..92e13d9 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1188,7 +1188,9 @@ enum gfc_omp_map_op OMP_MAP_RELEASE, OMP_MAP_ALWAYS_TO, OMP_MAP_ALWAYS_FROM, - OMP_MAP_ALWAYS_TOFROM + OMP_MAP_ALWAYS_TOFROM, + OMP_MAP_DECLARE_ALLOCATE, + OMP_MAP_DECLARE_DEALLOCATE }; enum gfc_omp_linear_op @@ -1344,7 +1346,7 @@ typedef struct gfc_omp_clauses gfc_expr_list *tile_list; unsigned async:1, gang:1, worker:1, vector:1, seq:1, independent:1; unsigned wait:1, par_auto:1, gang_static:1; - unsigned if_present:1, finalize:1; + unsigned if_present:1, finalize:1, update_allocatable:1; locus loc; } diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 95ea615..2ac5908 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see #include "trans-types.h" #include "trans-array.h" #include "trans-const.h" +#include "trans-stmt.h" #include "dependency.h" static bool gfc_get_array_constructor_size (mpz_t *, gfc_constructor_base); @@ -5670,6 +5671,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_ref *ref, *prev_ref = NULL, *coref; bool allocatable, coarray, dimension, alloc_w_e3_arr_spec = false, non_ulimate_coarray_ptr_comp; + bool oacc_declare = false; ref = expr->ref; @@ -5684,6 +5686,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, allocatable = expr->symtree->n.sym->attr.allocatable; dimension = expr->symtree->n.sym->attr.dimension; non_ulimate_coarray_ptr_comp = false; + oacc_declare = expr->symtree->n.sym->attr.oacc_declare_create; } else { @@ -5845,7 +5848,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, /* Update the array descriptors. */ if (dimension) - gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + { + gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + + if (oacc_declare) + gfc_trans_oacc_declare_allocate (&set_descriptor_block, expr, true); + } /* Pointer arrays need the span field to be set. */ if (is_pointer_array (se->expr) diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 06066eb..df9bdaf 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1399,7 +1399,10 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list) if (sym_attr.omp_declare_target_link) list = tree_cons (get_identifier ("omp declare target link"), NULL_TREE, list); - else if (sym_attr.omp_declare_target) + else if (sym_attr.omp_declare_target + || sym_attr.oacc_declare_create + || sym_attr.oacc_declare_copyin + || sym_attr.oacc_declare_deviceptr) list = tree_cons (get_identifier ("omp declare target"), NULL_TREE, list); @@ -6218,13 +6221,20 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op) { gfc_omp_namelist *n; + if (!module_oacc_clauses) + module_oacc_clauses = gfc_get_omp_clauses (); + + if (sym->backend_decl == NULL) + gfc_get_symbol_decl (sym); + + for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = n->next) + if (n->sym->backend_decl == sym->backend_decl) + return; + n = gfc_get_omp_namelist (); n->sym = sym; n->u.map_op = map_op; - if (!module_oacc_clauses) - module_oacc_clauses = gfc_get_omp_clauses (); - if (module_oacc_clauses->lists[OMP_LIST_MAP]) n->next = module_oacc_clauses->lists[OMP_LIST_MAP]; @@ -6240,10 +6250,10 @@ find_module_oacc_declare_clauses (gfc_symbol *sym) gfc_omp_map_op map_op; if (sym->attr.oacc_declare_create) - map_op = OMP_MAP_FORCE_ALLOC; + map_op = OMP_MAP_ALLOC; if (sym->attr.oacc_declare_copyin) - map_op = OMP_MAP_FORCE_TO; + map_op = OMP_MAP_TO; if (sym->attr.oacc_declare_deviceptr) map_op = OMP_MAP_FORCE_DEVICEPTR; diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index f038f4c..e18c0af 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -2119,9 +2119,18 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, (TREE_TYPE (TREE_TYPE (decl))))) { tree orig_decl = decl; + enum gomp_map_kind gmk = GOMP_MAP_POINTER; + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) + && n->sym->attr.oacc_declare_create) + { + if (clauses->update_allocatable) + gmk = GOMP_MAP_ALWAYS_POINTER; + else + gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; + } node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node4, gmk); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -2330,6 +2339,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, case OMP_MAP_FORCE_DEVICEPTR: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEVICEPTR); break; + case OMP_MAP_DECLARE_ALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_ALLOCATE); + break; + case OMP_MAP_DECLARE_DEALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_DEALLOCATE); + break; default: gcc_unreachable (); } @@ -3082,12 +3097,14 @@ gfc_trans_oacc_executable_directive (gfc_code *code) { stmtblock_t block; tree stmt, oacc_clauses; + gfc_omp_clauses *clauses = code->ext.omp_clauses; enum tree_code construct_code; switch (code->op) { case EXEC_OACC_UPDATE: construct_code = OACC_UPDATE; + clauses->update_allocatable = 1; break; case EXEC_OACC_ENTER_DATA: construct_code = OACC_ENTER_DATA; @@ -3103,8 +3120,7 @@ gfc_trans_oacc_executable_directive (gfc_code *code) } gfc_start_block (&block); - oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, - code->loc); + oacc_clauses = gfc_trans_omp_clauses (&block, clauses, code->loc); stmt = build1_loc (input_location, construct_code, void_type_node, oacc_clauses); gfc_add_expr_to_block (&block, stmt); @@ -5099,6 +5115,41 @@ gfc_trans_oacc_declare (gfc_code *code) return gfc_finish_block (&block); } +/* Create an OpenACC enter or exit data construct for an OpenACC declared + variable that has been allocated or deallocated. */ + +tree +gfc_trans_oacc_declare_allocate (stmtblock_t *block, gfc_expr *expr, + bool allocate) +{ + gfc_omp_clauses *clauses = gfc_get_omp_clauses (); + gfc_omp_namelist *p = gfc_get_omp_namelist (); + tree oacc_clauses, stmt; + enum tree_code construct_code; + + p->sym = expr->symtree->n.sym; + p->where = expr->where; + + if (allocate) + { + p->u.map_op = OMP_MAP_DECLARE_ALLOCATE; + construct_code = OACC_ENTER_DATA; + } + else + { + p->u.map_op = OMP_MAP_DECLARE_DEALLOCATE; + construct_code = OACC_EXIT_DATA; + } + clauses->lists[OMP_LIST_MAP] = p; + + oacc_clauses = gfc_trans_omp_clauses (block, clauses, expr->where); + stmt = build1_loc (input_location, construct_code, void_type_node, + oacc_clauses); + gfc_add_expr_to_block (block, stmt); + + return stmt; +} + tree gfc_trans_oacc_directive (gfc_code *code) { diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 795d3cc..0b1a4b4 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -6422,6 +6422,10 @@ gfc_trans_allocate (gfc_code * code) label_finish, expr, 0); else gfc_allocate_using_malloc (&se.pre, se.expr, memsz, stat); + + /* Allocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, true); } else { @@ -6894,6 +6898,10 @@ gfc_trans_deallocate (gfc_code *code) if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (se.expr))) { + if (!is_coarray + && expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + gfc_coarray_deregtype caf_dtype; if (is_coarray) @@ -6947,6 +6955,10 @@ gfc_trans_deallocate (gfc_code *code) } else { + /* Deallocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, label_finish, false, al->expr, al->expr->ts, is_coarray); diff --git a/gcc/fortran/trans-stmt.h b/gcc/fortran/trans-stmt.h index 848c7d9..0597579 100644 --- a/gcc/fortran/trans-stmt.h +++ b/gcc/fortran/trans-stmt.h @@ -72,6 +72,7 @@ tree gfc_trans_omp_directive (gfc_code *); void gfc_trans_omp_declare_simd (gfc_namespace *); tree gfc_trans_oacc_directive (gfc_code *); tree gfc_trans_oacc_declare (gfc_namespace *); +tree gfc_trans_oacc_declare_allocate (stmtblock_t *, gfc_expr *, bool); /* trans-io.c */ tree gfc_trans_open (gfc_code *); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 5fc4a66..bc5a5dd 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1196,7 +1196,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) && varpool_node::get_create (decl)->offloadable && !lookup_attribute ("omp declare target link", - DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl)) + && !is_gimple_omp_oacc (ctx->stmt)) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) @@ -7501,7 +7502,7 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) { - if (omp_is_reference (var)) + if (omp_is_reference (var) || POINTER_TYPE_P (type)) { tmp = create_tmp_var (type); gimplify_assign (tmp, build_simple_mem_ref (var), gs); @@ -7533,7 +7534,8 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) /* Like convert_to_firstprivate_int, but restore the original type. */ static tree -convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) +convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref, + gimple_seq *gs) { tree type = TREE_TYPE (var); tree new_type = NULL_TREE; @@ -7542,7 +7544,31 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) gcc_assert (TREE_CODE (var) == MEM_REF); var = TREE_OPERAND (var, 0); - if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + if (is_ref || POINTER_TYPE_P (orig_type)) + { + tree_code code = NOP_EXPR; + + if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == COMPLEX_TYPE) + code = VIEW_CONVERT_EXPR; + + if (code == VIEW_CONVERT_EXPR + && TYPE_SIZE (type) != TYPE_SIZE (orig_type)) + { + tree ptype = build_pointer_type (type); + var = fold_build1 (code, ptype, build_fold_addr_expr (var)); + var = build_simple_mem_ref (var); + } + else + var = fold_build1 (code, type, var); + + tree inst = create_tmp_var (type); + gimplify_assign (inst, var, gs); + var = build_fold_addr_expr (inst); + + return var; + } + + if (INTEGRAL_TYPE_P (var)) return fold_convert (type, var); gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); @@ -7553,16 +7579,8 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) tmp = create_tmp_var (new_type); var = fold_convert (new_type, var); gimplify_assign (tmp, var, gs); - var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); - - if (is_ref) - { - tmp = create_tmp_var (build_pointer_type (type)); - gimplify_assign (tmp, build_fold_addr_expr (var), gs); - var = tmp; - } - return var; + return fold_build1 (VIEW_CONVERT_EXPR, type, tmp); } /* Lower the GIMPLE_OMP_TARGET in the current statement @@ -7665,6 +7683,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GOMP_MAP_FORCE_DEVICEPTR: case GOMP_MAP_DEVICE_RESIDENT: case GOMP_MAP_LINK: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: gcc_assert (is_gimple_omp_oacc (stmt)); break; default: @@ -7743,7 +7763,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) && !maybe_lookup_field_in_outer_ctx (var, ctx)) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - x = convert_from_firstprivate_int (x, omp_is_reference (var), + x = convert_from_firstprivate_int (x, TREE_TYPE (new_var), + omp_is_reference (var), &fplist); gimplify_assign (new_var, x, &fplist); map_cnt++; @@ -7760,13 +7781,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_is_reference (new_var) - && TREE_CODE (var_type) != POINTER_TYPE) + /* Accelerators may not have alloca, so it's not + possible to privatize local storage for those + objects. */ + && TREE_CONSTANT (TYPE_SIZE (TREE_TYPE (var_type)))) { /* Create a local object to hold the instance value. */ const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); tree inst = create_tmp_var (TREE_TYPE (var_type), id); - gimplify_assign (inst, fold_indirect_ref (x), &fplist); + if (TREE_CODE (var_type) == POINTER_TYPE) + gimplify_assign (inst, x, &fplist); + else + gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } gimplify_assign (new_var, x, &fplist); @@ -7996,8 +8023,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); + tree new_var = lookup_decl (var, ctx); tree type = TREE_TYPE (var); - tree inner_type = omp_is_reference (var) + tree inner_type = omp_is_reference (new_var) ? TREE_TYPE (type) : type; if ((TREE_CODE (inner_type) == REAL_TYPE || (!omp_is_reference (var) diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 new file mode 100644 index 0000000..5349e0d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 @@ -0,0 +1,25 @@ +! Verify that OpenACC declared allocatable arrays have implicit +! OpenACC enter and exit pragmas at the time of allocation and +! deallocation. + +! { dg-additional-options "-fdump-tree-original" } + +program allocate + implicit none + integer, allocatable :: a(:), b + integer, parameter :: n = 100 + integer i + !$acc declare create(a,b) + + allocate (a(n), b) + + !$acc parallel loop copyout(a, b) + do i = 1, n + a(i) = b + end do + + deallocate (a, b) +end program allocate + +! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 2 "original" } } +! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 2 "original" } } diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 2c089b1..47b8aaa 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -755,6 +755,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case GOMP_MAP_LINK: pp_string (pp, "link"); break; + case GOMP_MAP_DECLARE_ALLOCATE: + pp_string (pp, "declare_allocate"); + break; + case GOMP_MAP_DECLARE_DEALLOCATE: + pp_string (pp, "declare_deallocate"); + break; default: gcc_unreachable (); } diff --git a/include/gomp-constants.h b/include/gomp-constants.h index ccfb657..9fc8767 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -40,6 +40,7 @@ #define GOMP_MAP_FLAG_SPECIAL_0 (1 << 2) #define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) #define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) +#define GOMP_MAP_FLAG_SPECIAL_4 (1 << 6) #define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ | GOMP_MAP_FLAG_SPECIAL_0) /* Flag to force a specific behavior (or else, trigger a run-time error). */ @@ -128,6 +129,11 @@ enum gomp_map_kind /* Decrement usage count and deallocate if zero. */ GOMP_MAP_RELEASE = (GOMP_MAP_FLAG_SPECIAL_2 | GOMP_MAP_DELETE), + /* Mapping kinds for allocatable arrays. */ + GOMP_MAP_DECLARE_ALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_TO), + GOMP_MAP_DECLARE_DEALLOCATE = (GOMP_MAP_FLAG_SPECIAL_4 + | GOMP_MAP_FORCE_FROM), /* Internal to GCC, not used in libgomp. */ /* Do not map, but pointer assign a pointer instead. */ diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 3787ce4..c678a22 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -725,6 +725,34 @@ acc_update_self (void *h, size_t s) } void +gomp_acc_declare_allocate (bool allocate, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + gomp_debug (0, " %s: processing\n", __FUNCTION__); + + if (allocate) + { + assert (mapnum == 3); + + /* Allocate memory for the array data. */ + uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], sizes[0]); + + /* Update the PSET. */ + acc_update_device (hostaddrs[1], sizes[1]); + void *pset = acc_deviceptr (hostaddrs[1]); + acc_memcpy_to_device (pset, &data, sizeof (uintptr_t)); + } + else + { + /* Deallocate memory for the array data. */ + void *data = acc_deviceptr (hostaddrs[0]); + acc_free (data); + } + + gomp_debug (0, " %s: end\n", __FUNCTION__); +} + +void gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds) { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 070c5dc..f80b9a2 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -391,7 +391,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, || kind == GOMP_MAP_FORCE_PRESENT || kind == GOMP_MAP_FORCE_TO || kind == GOMP_MAP_TO - || kind == GOMP_MAP_ALLOC) + || kind == GOMP_MAP_ALLOC + || kind == GOMP_MAP_DECLARE_ALLOCATE) { data_enter = true; break; @@ -400,7 +401,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (kind == GOMP_MAP_RELEASE || kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FROM - || kind == GOMP_MAP_FORCE_FROM) + || kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_DECLARE_DEALLOCATE) break; gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -429,6 +431,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, { switch (kind) { + case GOMP_MAP_DECLARE_ALLOCATE: case GOMP_MAP_ALLOC: acc_present_or_create (hostaddrs[i], sizes[i]); break; @@ -449,8 +452,12 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i]); + if (kind == GOMP_MAP_DECLARE_ALLOCATE) + gomp_acc_declare_allocate (true, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else + gomp_acc_insert_pointer (pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -480,6 +487,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, acc_delete (hostaddrs[i], sizes[i]); } break; + case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: if (finalize) @@ -495,10 +503,16 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - bool copyfrom = (kind == GOMP_MAP_FORCE_FROM - || kind == GOMP_MAP_FROM); - gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, - finalize, pointer); + if (kind == GOMP_MAP_DECLARE_DEALLOCATE) + gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else + { + bool copyfrom = (kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_FROM); + gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, + async, finalize, pointer); + } /* See the above comment. */ i += pointer - 1; } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 new file mode 100644 index 0000000..3758031 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-array-1.f90 @@ -0,0 +1,30 @@ +! Ensure that dummy arguments of allocatable arrays don't cause +! "libgomp: [...] is not mapped" errors. + +! { dg-do run } + +program main + integer, parameter :: n = 40 + integer, allocatable :: ar(:,:,:) + integer :: i + + allocate (ar(1:n,0:n-1,0:n-1)) + !$acc enter data copyin (ar) + + !$acc update host (ar) + + !$acc update device (ar) + + call update_ar (ar, n) + + !$acc exit data copyout (ar) +end program main + +subroutine update_ar (ar, n) + integer :: n + integer, dimension (1:n,0:n-1,0:n-1) :: ar + + !$acc update host (ar) + + !$acc update device (ar) +end subroutine update_ar diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 new file mode 100644 index 0000000..be86d14 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 @@ -0,0 +1,33 @@ +! Test non-declared allocatable scalars in OpenACC data clauses. + +! { dg-do run } + +program main + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + + allocate (a) + + a = 50 + + !$acc parallel loop + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + allocate (c) + + !$acc parallel copyout(c) num_gangs(1) + c = a + !$acc end parallel + + if (c /= a) call abort + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 new file mode 100644 index 0000000..d68b124 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 @@ -0,0 +1,211 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: b(:) + !$acc declare create (b) +end module vars + +program test + use vars + use openacc + implicit none + real*8 :: a + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (b)) call abort + + ! Test local usage of an allocated declared array. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + a = 2.0 + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*2) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) call abort + end do + + deallocate (b) + + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) call abort + + do i = 1, n + if (b(i) /= i*i) call abort + end do + + deallocate (b) +end program test + +! Set each element in array 'b' at index i to i*2. + +subroutine sub1 + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * i; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * i +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 new file mode 100644 index 0000000..3521a7f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 @@ -0,0 +1,48 @@ +! Test declare create with allocatable scalars. + +! { dg-do run } + +program main + use openacc + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + !$acc declare create (c) + + allocate (a) + + a = 50 + + !$acc parallel loop firstprivate(a) + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + allocate (c) + a = 100 + + if (.not.acc_is_present(c)) call abort + + !$acc parallel num_gangs(1) present(c) + c = a + !$acc end parallel + + !$acc update host(c) + if (c /= a) call abort + + !$acc parallel loop + do i = 1, n + b(i) = c + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 new file mode 100644 index 0000000..5d12d75 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 @@ -0,0 +1,218 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) call abort + if (allocated (b)) call abort + + ! Test local usage of an allocated declared array. + + allocate (a) + + if (.not.allocated (a)) call abort + if (acc_is_present (a) .neqv. .true.) call abort + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + a = 2.0 + !$acc update device(a) + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= a+i*2) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) call abort + end do + + deallocate (b) + + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) call abort + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (a) + deallocate (b) +end program test + +! Set each element in array 'b' at index i to a+i*2. + +subroutine sub1 + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = a+i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * a; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * a +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 new file mode 100644 index 0000000..b4cf26e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 @@ -0,0 +1,66 @@ +! Test declare create with allocatable arrays and scalars. The unused +! declared array 'b' caused an ICE in the past. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + implicit none + integer :: i + + interface + subroutine sub1 + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) call abort + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (a) + allocate (b(n)) + + if (.not.allocated (b)) call abort + + call sub1 + + !$acc update self(a) + if (a /= 50) call abort + + deallocate (a) + deallocate (b) + +end program test + +! Set 'a' to 50. + +subroutine sub1 + use vars + implicit none + integer i + + a = 50 + !$acc update device(a) +end subroutine sub1 -- 1.8.1.1