From patchwork Tue Nov 20 21:54:47 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Julian Brown X-Patchwork-Id: 1000753 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-490566-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="j9rkkuvu"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42zzz41Cypz9s3q for ; Wed, 21 Nov 2018 08:55:27 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; q=dns; s=default; b=lZcBtvnhptYbjIut So8tbtB1mvjE3A4BNC9OBCiMS2xiZEB2Z/xtwnIxN5ekMUH/pyxygUpdKnuaqB2F ZA2GRZHBMUMwMD56cO47phoz8UQYd1S+9kjXjD0ovQYRcoXC6oz5+AC5yvlX6tHA G/eOU58mm0cKOOsDk0DoSYNM3lQ= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; s=default; bh=uuAv5080rAztv0xWIuYtAj B4HqE=; b=j9rkkuvuOJdP9TmvLldbTSx0hJE3VEN+K6cgmCJgtTxQAJj8TH8WGz 5SE8yi62JKRko/LyHOkWnKu2IpSsLjWmpqAsF+GpKHLFcV5RNpjzc58vEdn5oyvL M2ZUeq5ipZgCHEBZe4wjFK2ogLgew5nroniYacATMUaGrebEXRWjI= Received: (qmail 41043 invoked by alias); 20 Nov 2018 21:55:12 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 40919 invoked by uid 89); 20 Nov 2018 21:55:11 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-23.6 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_PASS, UNWANTED_LANGUAGE_BODY autolearn=ham version=3.3.2 spammy=transfers, coalesce, onto, Special X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 20 Nov 2018 21:55:08 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-MBX-04.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1gPDz8-0002vM-Rz from Julian_Brown@mentor.com ; Tue, 20 Nov 2018 13:55:07 -0800 Received: from localhost.localdomain (147.34.91.1) by SVR-IES-MBX-04.mgc.mentorg.com (139.181.222.4) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Tue, 20 Nov 2018 21:55:01 +0000 From: Julian Brown To: CC: , , Subject: [PATCH 1/6] [og8] Host-to-device transfer coalescing & magic offset value self-documentation Date: Tue, 20 Nov 2018 13:54:47 -0800 Message-ID: In-Reply-To: References: MIME-Version: 1.0 X-IsSubscribed: yes Previously posted upstream: https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00825.html libgomp/ * libgomp.h (OFFSET_INLINED, OFFSET_POINTER, OFFSET_STRUCT): Define. * target.c (FIELD_TGT_EMPTY): Define. (gomp_coalesce_chunk): New. (gomp_coalesce_buf): Use above instead of flat array of size_t pairs. (gomp_coalesce_buf_add): Adjust for above change. (gomp_copy_host2dev): Likewise. (gomp_map_val): Use OFFSET_* macros instead of magic constants. Write as switch instead of list of ifs. (gomp_map_vars_async): Adjust for gomp_coalesce_chunk change. Use OFFSET_* macros. --- libgomp/libgomp.h | 5 +++ libgomp/target.c | 101 +++++++++++++++++++++++++++++++--------------------- 2 files changed, 65 insertions(+), 41 deletions(-) diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 607f4c2..acf7f8f 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -842,6 +842,11 @@ struct target_mem_desc { artificial pointer to "omp declare target link" object. */ #define REFCOUNT_LINK (~(uintptr_t) 1) +/* Special offset values. */ +#define OFFSET_INLINED (~(uintptr_t) 0) +#define OFFSET_POINTER (~(uintptr_t) 1) +#define OFFSET_STRUCT (~(uintptr_t) 2) + struct splay_tree_key_s { /* Address of the host object. */ uintptr_t host_start; diff --git a/libgomp/target.c b/libgomp/target.c index ab17650..7220ac6 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -45,6 +45,8 @@ #include "plugin-suffix.h" #endif +#define FIELD_TGT_EMPTY (~(size_t) 0) + static void gomp_target_init (void); /* The whole initialization code for offloading plugins is only run one. */ @@ -206,8 +208,14 @@ goacc_device_copy_async (struct gomp_device_descr *devicep, } } -/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) - host to device memory transfers. */ +/* Infrastructure for coalescing adjacent or nearly adjacent (in device + addresses) host to device memory transfers. */ + +struct gomp_coalesce_chunk +{ + /* The starting and ending point of a coalesced chunk of memory. */ + size_t start, end; +}; struct gomp_coalesce_buf { @@ -215,10 +223,10 @@ struct gomp_coalesce_buf it will be copied to the device. */ void *buf; struct target_mem_desc *tgt; - /* Array with offsets, chunks[2 * i] is the starting offset and - chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address + /* Array with offsets, chunks[i].start is the starting offset and + chunks[i].end ending offset relative to tgt->tgt_start device address of chunks which are to be copied to buf and later copied to device. */ - size_t *chunks; + struct gomp_coalesce_chunk *chunks; /* Number of chunks in chunks array, or -1 if coalesce buffering should not be performed. */ long chunk_cnt; @@ -251,14 +259,14 @@ gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) { if (cbuf->chunk_cnt < 0) return; - if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1]) + if (start < cbuf->chunks[cbuf->chunk_cnt-1].end) { cbuf->chunk_cnt = -1; return; } - if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP) + if (start < cbuf->chunks[cbuf->chunk_cnt-1].end + MAX_COALESCE_BUF_GAP) { - cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len; + cbuf->chunks[cbuf->chunk_cnt-1].end = start + len; cbuf->use_cnt++; return; } @@ -268,8 +276,8 @@ gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) if (cbuf->use_cnt == 1) cbuf->chunk_cnt--; } - cbuf->chunks[2 * cbuf->chunk_cnt] = start; - cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len; + cbuf->chunks[cbuf->chunk_cnt].start = start; + cbuf->chunks[cbuf->chunk_cnt].end = start + len; cbuf->chunk_cnt++; cbuf->use_cnt = 1; } @@ -301,20 +309,20 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, if (cbuf) { uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; - if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1]) + if (doff < cbuf->chunks[cbuf->chunk_cnt-1].end) { long first = 0; long last = cbuf->chunk_cnt - 1; while (first <= last) { long middle = (first + last) >> 1; - if (cbuf->chunks[2 * middle + 1] <= doff) + if (cbuf->chunks[middle].end <= doff) first = middle + 1; - else if (cbuf->chunks[2 * middle] <= doff) + else if (cbuf->chunks[middle].start <= doff) { - if (doff + sz > cbuf->chunks[2 * middle + 1]) + if (doff + sz > cbuf->chunks[middle].end) gomp_fatal ("internal libgomp cbuf error"); - memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]), + memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), h, sz); return; } @@ -538,17 +546,25 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) return tgt->list[i].key->tgt->tgt_start + tgt->list[i].key->tgt_offset + tgt->list[i].offset; - if (tgt->list[i].offset == ~(uintptr_t) 0) - return (uintptr_t) hostaddrs[i]; - if (tgt->list[i].offset == ~(uintptr_t) 1) - return 0; - if (tgt->list[i].offset == ~(uintptr_t) 2) - return tgt->list[i + 1].key->tgt->tgt_start - + tgt->list[i + 1].key->tgt_offset - + tgt->list[i + 1].offset - + (uintptr_t) hostaddrs[i] - - (uintptr_t) hostaddrs[i + 1]; - return tgt->tgt_start + tgt->list[i].offset; + + switch (tgt->list[i].offset) + { + case OFFSET_INLINED: + return (uintptr_t) hostaddrs[i]; + + case OFFSET_POINTER: + return 0; + + case OFFSET_STRUCT: + return tgt->list[i + 1].key->tgt->tgt_start + + tgt->list[i + 1].key->tgt_offset + + tgt->list[i + 1].offset + + (uintptr_t) hostaddrs[i] + - (uintptr_t) hostaddrs[i + 1]; + + default: + return tgt->tgt_start + tgt->list[i].offset; + } } /* Dynamic array related data structures, interfaces with the compiler. */ @@ -758,8 +774,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, cbuf.buf = NULL; if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) { - cbuf.chunks - = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t)); + size_t chunk_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); + cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunk_size); cbuf.chunk_cnt = 0; } if (pragma_kind == GOMP_MAP_VARS_TARGET) @@ -769,8 +785,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, tgt_size = mapnum * sizeof (void *); cbuf.chunk_cnt = 1; cbuf.use_cnt = 1 + (mapnum > 1); - cbuf.chunks[0] = 0; - cbuf.chunks[1] = tgt_size; + cbuf.chunks[0].start = 0; + cbuf.chunks[0].end = tgt_size; } gomp_mutex_lock (&devicep->lock); @@ -788,7 +804,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) { tgt->list[i].key = NULL; - tgt->list[i].offset = ~(uintptr_t) 0; + tgt->list[i].offset = OFFSET_INLINED; continue; } else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) @@ -806,7 +822,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, = (void *) (n->tgt->tgt_start + n->tgt_offset + cur_node.host_start); tgt->list[i].key = NULL; - tgt->list[i].offset = ~(uintptr_t) 0; + tgt->list[i].offset = OFFSET_INLINED; continue; } else if ((kind & typemask) == GOMP_MAP_STRUCT) @@ -817,7 +833,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, cur_node.host_end = (uintptr_t) hostaddrs[last] + sizes[last]; tgt->list[i].key = NULL; - tgt->list[i].offset = ~(uintptr_t) 2; + tgt->list[i].offset = OFFSET_STRUCT; splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); if (n == NULL) { @@ -850,7 +866,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) { tgt->list[i].key = NULL; - tgt->list[i].offset = ~(uintptr_t) 1; + tgt->list[i].offset = OFFSET_POINTER; has_firstprivate = true; continue; } @@ -894,7 +910,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, if (!n) { tgt->list[i].key = NULL; - tgt->list[i].offset = ~(uintptr_t) 1; + tgt->list[i].offset = OFFSET_POINTER; continue; } } @@ -1018,7 +1034,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, if (cbuf.chunk_cnt > 0) { cbuf.buf - = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]); + = malloc (cbuf.chunks[cbuf.chunk_cnt-1].end - cbuf.chunks[0].start); if (cbuf.buf) { cbuf.tgt = tgt; @@ -1144,6 +1160,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, else k->host_end = k->host_start + sizeof (void *); splay_tree_key n = splay_tree_lookup (mem_map, k); + /* Need to account for the case where a struct field hasn't been + mapped onto the accelerator yet. */ if (n && n->refcount != REFCOUNT_LINK) gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], kind & typemask, cbufp); @@ -1160,12 +1178,12 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, size_t align = (size_t) 1 << (kind >> rshift); tgt->list[i].key = k; k->tgt = tgt; - if (field_tgt_clear != ~(size_t) 0) + if (field_tgt_clear != FIELD_TGT_EMPTY) { k->tgt_offset = k->host_start - field_tgt_base + field_tgt_offset; if (i == field_tgt_clear) - field_tgt_clear = ~(size_t) 0; + field_tgt_clear = FIELD_TGT_EMPTY; } else { @@ -1419,9 +1437,10 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, long c = 0; for (c = 0; c < cbuf.chunk_cnt; ++c) gomp_copy_host2dev (devicep, aq, - (void *) (tgt->tgt_start + cbuf.chunks[2 * c]), - (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]), - cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL); + (void *) (tgt->tgt_start + cbuf.chunks[c].start), + (char *) cbuf.buf + (cbuf.chunks[c].start + - cbuf.chunks[0].start), + cbuf.chunks[c].end - cbuf.chunks[c].start, NULL); free (cbuf.buf); } From patchwork Tue Nov 20 21:54:48 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Julian Brown X-Patchwork-Id: 1000754 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-490567-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="RFlDPkhs"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42zzzJ5Kysz9s3q for ; Wed, 21 Nov 2018 08:55:40 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; q=dns; s=default; b=qh86nmzokaGHW5VU q7/Vax7eANvVBf/iUs0v+97/n5IuRItMdlr6CsLnTVy3ibtPsS3vjeAdIGzq2OC8 ST8D1NRSpPeHlWlCWl9j5Sk9rmoJVFgqvPh62TDE5GBf1xNIX9Fzg9yzHUXN+RAC 3qczHtrYRUblcX3C4oRtNekmLj8= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; s=default; bh=jIZoUfR1jBJEF2iVOvigCB 8rqJU=; b=RFlDPkhsrwGrRBnmuyW9KabJt+giXFz/pHo3F50U011muDgg1tyMZD U9gaKFuEd4bzqK8gWlR5+kyZPbiH/ddd6ugs3idxrtIkvZ46tenM4okMNB3KXDPZ 5u9ZXIdyD/D0l3YhPizjYDHJn76xE+6gdHd5AXXkoZOXZkuXPP7pk= Received: (qmail 41615 invoked by alias); 20 Nov 2018 21:55:16 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 41505 invoked by uid 89); 20 Nov 2018 21:55:15 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.2 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=aforementioned, installing, newlycreated, BASE X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 20 Nov 2018 21:55:12 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-MBX-04.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1gPDzC-0002wQ-FQ from Julian_Brown@mentor.com ; Tue, 20 Nov 2018 13:55:10 -0800 Received: from localhost.localdomain (147.34.91.1) by SVR-IES-MBX-04.mgc.mentorg.com (139.181.222.4) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Tue, 20 Nov 2018 21:55:05 +0000 From: Julian Brown To: CC: , , Subject: [PATCH 2/6] [og8] Factor out duplicate code in gimplify_scan_omp_clauses Date: Tue, 20 Nov 2018 13:54:48 -0800 Message-ID: <6593e6032cba3f5f2c5c377e9cacb30544623c8d.1542748807.git.julian@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-IsSubscribed: yes Previously posted upstream: https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00824.html gcc/ * gimplify.c (insert_struct_component_mapping) (check_base_and_compare_lt): New. (gimplify_scan_omp_clauses): Outline duplicated code into calls to above two functions. --- gcc/gimplify.c | 307 ++++++++++++++++++++++++++++++++------------------------ 1 files changed, 174 insertions(+), 133 deletions(-) diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 9be0b70..824e020 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -7661,6 +7661,160 @@ demote_firstprivate_pointer (tree decl, gimplify_omp_ctx *ctx) } } +/* Insert a GOMP_MAP_ALLOC or GOMP_MAP_RELEASE node following a + GOMP_MAP_STRUCT mapping. C is an always_pointer mapping. STRUCT_NODE is + the struct node to insert the new mapping after (when the struct node is + initially created). PREV_NODE is the first of two or three mappings for a + pointer, and is either: + - the node before C, when a pair of mappings is used, e.g. for a C/C++ + array section. + - not the node before C. This is true when we have a reference-to-pointer + type (with a mapping for the reference and for the pointer), or for + Fortran derived-type mappings with a GOMP_MAP_TO_PSET. + If SCP is non-null, the new node is inserted before *SCP. + if SCP is null, the new node is inserted before PREV_NODE. + The return type is: + - PREV_NODE, if SCP is non-null. + - The newly-created ALLOC or RELEASE node, if SCP is null. + - The second newly-created ALLOC or RELEASE node, if we are mapping a + reference to a pointer. */ + +static tree +insert_struct_component_mapping (enum tree_code code, tree c, tree struct_node, + tree prev_node, tree *scp) +{ + enum gomp_map_kind mkind = (code == OMP_TARGET_EXIT_DATA + || code == OACC_EXIT_DATA) + ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC; + + tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); + tree cl = scp ? prev_node : c2; + OMP_CLAUSE_SET_MAP_KIND (c2, mkind); + OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (c)); + OMP_CLAUSE_CHAIN (c2) = scp ? *scp : prev_node; + OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (ptr_type_node); + if (struct_node) + OMP_CLAUSE_CHAIN (struct_node) = c2; + + /* We might need to create an additional mapping if we have a reference to a + pointer (in C++). Don't do this if we have something other than a + GOMP_MAP_ALWAYS_POINTER though, i.e. a GOMP_MAP_TO_PSET. */ + if (OMP_CLAUSE_CHAIN (prev_node) != c + && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (prev_node)) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (prev_node)) + == GOMP_MAP_ALWAYS_POINTER)) + { + tree c4 = OMP_CLAUSE_CHAIN (prev_node); + tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c3, mkind); + OMP_CLAUSE_DECL (c3) = unshare_expr (OMP_CLAUSE_DECL (c4)); + OMP_CLAUSE_SIZE (c3) = TYPE_SIZE_UNIT (ptr_type_node); + OMP_CLAUSE_CHAIN (c3) = prev_node; + if (!scp) + OMP_CLAUSE_CHAIN (c2) = c3; + else + cl = c3; + } + + if (scp) + *scp = c2; + + return cl; +} + +/* Called initially with ORIG_BASE non-null, sets PREV_BITPOS and PREV_POFFSET + to the offset of the field given in BASE. Return type is 1 if BASE is equal + to *ORIG_BASE after stripping off ARRAY_REF and INDIRECT_REF nodes and + calling get_inner_reference, else 0. + + Called subsequently with ORIG_BASE null, compares the offset of the field + given in BASE to PREV_BITPOS, PREV_POFFSET. Returns -1 if the base object + has changed, 0 if the new value has a higher bit position than that + described by the aforementioned arguments, or 1 if the new value is less + than them. Used for (insertion) sorting components after a GOMP_MAP_STRUCT + mapping. */ + +static int +check_base_and_compare_lt (tree base, tree *orig_base, tree decl, + poly_int64 *prev_bitpos, + poly_offset_int *prev_poffset) +{ + tree offset; + poly_int64 bitsize, bitpos; + machine_mode mode; + int unsignedp, reversep, volatilep = 0; + poly_offset_int poffset; + + if (orig_base) + { + while (TREE_CODE (base) == ARRAY_REF) + base = TREE_OPERAND (base, 0); + + if (TREE_CODE (base) == INDIRECT_REF) + base = TREE_OPERAND (base, 0); + } + else + { + if (TREE_CODE (base) == ARRAY_REF) + { + while (TREE_CODE (base) == ARRAY_REF) + base = TREE_OPERAND (base, 0); + if (TREE_CODE (base) != COMPONENT_REF + || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE) + return -1; + } + else if (TREE_CODE (base) == INDIRECT_REF + && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF + && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0))) + == REFERENCE_TYPE)) + base = TREE_OPERAND (base, 0); + } + + base = get_inner_reference (base, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &reversep, &volatilep); + + if (orig_base) + *orig_base = base; + + if ((TREE_CODE (base) == INDIRECT_REF + || (TREE_CODE (base) == MEM_REF + && integer_zerop (TREE_OPERAND (base, 1)))) + && DECL_P (TREE_OPERAND (base, 0)) + && TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0))) == REFERENCE_TYPE) + base = TREE_OPERAND (base, 0); + + gcc_assert (offset == NULL_TREE || poly_int_tree_p (offset)); + + if (offset) + poffset = wi::to_poly_offset (offset); + else + poffset = 0; + + if (maybe_ne (bitpos, 0)) + poffset += bits_to_bytes_round_down (bitpos); + + if (orig_base) + { + gcc_assert (base == decl); + + *prev_bitpos = bitpos; + *prev_poffset = poffset; + + return *orig_base == base; + } + else + { + if (base != decl) + return -1; + + return (maybe_lt (*prev_poffset, poffset) + || (known_eq (*prev_poffset, poffset) + && maybe_lt (*prev_bitpos, bitpos))); + } + + return 0; +} + /* Scan the OMP clauses in *LIST_P, installing mappings into a new and previous omp contexts. */ @@ -8131,29 +8285,13 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } } - tree offset; - poly_int64 bitsize, bitpos; - machine_mode mode; - int unsignedp, reversep, volatilep = 0; - tree base = OMP_CLAUSE_DECL (c); - while (TREE_CODE (base) == ARRAY_REF) - base = TREE_OPERAND (base, 0); - if (TREE_CODE (base) == INDIRECT_REF) - base = TREE_OPERAND (base, 0); - base = get_inner_reference (base, &bitsize, &bitpos, &offset, - &mode, &unsignedp, &reversep, - &volatilep); - tree orig_base = base; - if ((TREE_CODE (base) == INDIRECT_REF - || (TREE_CODE (base) == MEM_REF - && integer_zerop (TREE_OPERAND (base, 1)))) - && DECL_P (TREE_OPERAND (base, 0)) - && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0))) - == REFERENCE_TYPE)) - base = TREE_OPERAND (base, 0); - gcc_assert (base == decl - && (offset == NULL_TREE - || poly_int_tree_p (offset))); + tree orig_base; + poly_int64 bitpos1; + poly_offset_int offset1; + + int base_eq_orig_base + = check_base_and_compare_lt (OMP_CLAUSE_DECL (c), + &orig_base, decl, &bitpos1, &offset1); splay_tree_node n = splay_tree_lookup (ctx->variables, (splay_tree_key)decl); @@ -8165,7 +8303,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, tree l = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (l, GOMP_MAP_STRUCT); - if (orig_base != base) + if (!base_eq_orig_base) OMP_CLAUSE_DECL (l) = unshare_expr (orig_base); else OMP_CLAUSE_DECL (l) = decl; @@ -8175,32 +8313,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, struct_map_to_clause->put (decl, l); if (ptr) { - enum gomp_map_kind mkind - = code == OMP_TARGET_EXIT_DATA - ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC; - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (c2, mkind); - OMP_CLAUSE_DECL (c2) - = unshare_expr (OMP_CLAUSE_DECL (c)); - OMP_CLAUSE_CHAIN (c2) = *prev_list_p; - OMP_CLAUSE_SIZE (c2) - = TYPE_SIZE_UNIT (ptr_type_node); - OMP_CLAUSE_CHAIN (l) = c2; - if (OMP_CLAUSE_CHAIN (*prev_list_p) != c) - { - tree c4 = OMP_CLAUSE_CHAIN (*prev_list_p); - tree c3 - = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (c3, mkind); - OMP_CLAUSE_DECL (c3) - = unshare_expr (OMP_CLAUSE_DECL (c4)); - OMP_CLAUSE_SIZE (c3) - = TYPE_SIZE_UNIT (ptr_type_node); - OMP_CLAUSE_CHAIN (c3) = *prev_list_p; - OMP_CLAUSE_CHAIN (c2) = c3; - } + insert_struct_component_mapping (code, c, l, + *prev_list_p, NULL); *prev_list_p = l; prev_list_p = NULL; } @@ -8210,7 +8324,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, *list_p = l; list_p = &OMP_CLAUSE_CHAIN (l); } - if (orig_base != base && code == OMP_TARGET) + if (!base_eq_orig_base && code == OMP_TARGET) { tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); @@ -8233,13 +8347,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, tree *sc = NULL, *scp = NULL; if (GOMP_MAP_ALWAYS_P (OMP_CLAUSE_MAP_KIND (c)) || ptr) n->value |= GOVD_SEEN; - poly_offset_int o1, o2; - if (offset) - o1 = wi::to_poly_offset (offset); - else - o1 = 0; - if (maybe_ne (bitpos, 0)) - o1 += bits_to_bytes_round_down (bitpos); sc = &OMP_CLAUSE_CHAIN (*osc); if (*sc != c && (OMP_CLAUSE_MAP_KIND (*sc) @@ -8257,44 +8364,14 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, break; else { - tree offset2; - poly_int64 bitsize2, bitpos2; - base = OMP_CLAUSE_DECL (*sc); - if (TREE_CODE (base) == ARRAY_REF) - { - while (TREE_CODE (base) == ARRAY_REF) - base = TREE_OPERAND (base, 0); - if (TREE_CODE (base) != COMPONENT_REF - || (TREE_CODE (TREE_TYPE (base)) - != ARRAY_TYPE)) - break; - } - else if (TREE_CODE (base) == INDIRECT_REF - && (TREE_CODE (TREE_OPERAND (base, 0)) - == COMPONENT_REF) - && (TREE_CODE (TREE_TYPE - (TREE_OPERAND (base, 0))) - == REFERENCE_TYPE)) - base = TREE_OPERAND (base, 0); - base = get_inner_reference (base, &bitsize2, - &bitpos2, &offset2, - &mode, &unsignedp, - &reversep, &volatilep); - if ((TREE_CODE (base) == INDIRECT_REF - || (TREE_CODE (base) == MEM_REF - && integer_zerop (TREE_OPERAND (base, - 1)))) - && DECL_P (TREE_OPERAND (base, 0)) - && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, - 0))) - == REFERENCE_TYPE)) - base = TREE_OPERAND (base, 0); - if (base != decl) + int same_decl_offset_lt + = check_base_and_compare_lt ( + OMP_CLAUSE_DECL (*sc), NULL, decl, + &bitpos1, &offset1); + if (same_decl_offset_lt == -1) break; if (scp) continue; - gcc_assert (offset == NULL_TREE - || poly_int_tree_p (offset)); tree d1 = OMP_CLAUSE_DECL (*sc); tree d2 = OMP_CLAUSE_DECL (c); while (TREE_CODE (d1) == ARRAY_REF) @@ -8323,14 +8400,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, remove = true; break; } - if (offset2) - o2 = wi::to_poly_offset (offset2); - else - o2 = 0; - o2 += bits_to_bytes_round_down (bitpos2); - if (maybe_lt (o1, o2) - || (known_eq (o1, 2) - && maybe_lt (bitpos, bitpos2))) + if (same_decl_offset_lt) { if (ptr) scp = sc; @@ -8345,38 +8415,9 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, size_one_node); if (ptr) { - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - tree cl = NULL_TREE; - enum gomp_map_kind mkind - = code == OMP_TARGET_EXIT_DATA - ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC; - OMP_CLAUSE_SET_MAP_KIND (c2, mkind); - OMP_CLAUSE_DECL (c2) - = unshare_expr (OMP_CLAUSE_DECL (c)); - OMP_CLAUSE_CHAIN (c2) = scp ? *scp : *prev_list_p; - OMP_CLAUSE_SIZE (c2) - = TYPE_SIZE_UNIT (ptr_type_node); - cl = scp ? *prev_list_p : c2; - if (OMP_CLAUSE_CHAIN (*prev_list_p) != c) - { - tree c4 = OMP_CLAUSE_CHAIN (*prev_list_p); - tree c3 - = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (c3, mkind); - OMP_CLAUSE_DECL (c3) - = unshare_expr (OMP_CLAUSE_DECL (c4)); - OMP_CLAUSE_SIZE (c3) - = TYPE_SIZE_UNIT (ptr_type_node); - OMP_CLAUSE_CHAIN (c3) = *prev_list_p; - if (!scp) - OMP_CLAUSE_CHAIN (c2) = c3; - else - cl = c3; - } - if (scp) - *scp = c2; + tree cl + = insert_struct_component_mapping (code, c, NULL, + *prev_list_p, scp); if (sc == prev_list_p) { *sc = cl; From patchwork Tue Nov 20 21:54:49 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Julian Brown X-Patchwork-Id: 1000756 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-490569-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="syeTz3rc"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42zzzx5D0jz9s3q for ; Wed, 21 Nov 2018 08:56:13 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; q=dns; s=default; b=cibi4S0q79FizVXp N1jgkGnTU7R0KHQ34wLpxpS7ok/OJohIxPnr0EzWc3bBcnBduCknUf8Z93fnpgz3 gJt2Yl72CRvSrZx7GmGm33qeNeeScqG9FeIh48ZC8RZQSJc07s83pWCgxuHJQd1s mqOXUMO9oqEmZrcN0o/fcZBsgVw= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; s=default; bh=5XwTFdujf32s/6gqBHP3rl i56LA=; b=syeTz3rcul9STfYN98/fbYu9ecDw9VmLkA9vjay1GiOoOB5z4uoMWJ 5MLCnAQoL0zPB0nqtGaEAmLt3RJu7jm0wU8cXnA9eeMAaEllDpuQIDzedbJcB2mN ffwKhtNKgJm7om58PaEAtUEa2wjnFBplPJ5FpTd8BKxk//418UVbc= Received: (qmail 43150 invoked by alias); 20 Nov 2018 21:55:29 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 43024 invoked by uid 89); 20 Nov 2018 21:55:28 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.9 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS, TIME_LIMIT_EXCEEDED autolearn=unavailable version=3.3.2 spammy=UD:ar, 5n, se, sa X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 20 Nov 2018 21:55:17 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-MBX-04.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1gPDzG-0002wc-IS from Julian_Brown@mentor.com ; Tue, 20 Nov 2018 13:55:15 -0800 Received: from localhost.localdomain (147.34.91.1) by SVR-IES-MBX-04.mgc.mentorg.com (139.181.222.4) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Tue, 20 Nov 2018 21:55:09 +0000 From: Julian Brown To: CC: , , Subject: [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach) Date: Tue, 20 Nov 2018 13:54:49 -0800 Message-ID: <0b11e1202506af9e3978b7cab92bca0eb89f664d.1542748807.git.julian@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-IsSubscribed: yes Previously posted upstream: https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00826.html gcc/c/ * c-parser.c (c_parser_omp_variable_list): Allow deref (->) in variable lists. (c_parser_oacc_all_clauses): Re-alphabetize cases. * c-typeck.c (handle_omp_array_sections_1): Support deref. gcc/cp/ * parser.c (cp_parser_omp_var_list_no_open): Support deref. (cp_parser_oacc_all_clauses): Re-alphabetize cases. * semantics.c (finish_omp_clauses): Allow "this" for OpenACC data clauses. Support deref. gcc/fortran/ * gfortran.h (gfc_omp_map_op): Add OMP_MAP_ATTACH, OMP_MAP_DETACH. * openmp.c (omp_mask2): Add OMP_CLAUSE_ATTACH, OMP_CLAUSE_DETACH. (gfc_match_omp_clauses): Remove allow_derived parameter, infer from clause mask. Support attach and detach. Slight reformatting. (OACC_PARALLEL_CLAUSES, OACC_KERNELS_CLAUSES, OACC_DATA_CLAUSES) (OACC_ENTER_DATA_CLAUSES): Add OMP_CLAUSE_ATTACH. (OACC_EXIT_DATA_CLAUSES): Add OMP_CLAUSE_DETACH. (match_acc): Remove derived_types parameter, and don't pass to gfc_match_omp_clauses. (gfc_match_oacc_update): Don't pass allow_derived argument. (gfc_match_oacc_enter_data): Likewise. (gfc_match_oacc_exit_data): Likewise. (check_symbol_not_pointer): Don't disallow pointer objects of derived type. (resolve_oacc_data_clauses): Don't disallow allocatable derived types. (resolve_omp_clauses): Perform duplicate checking only for non-derived type component accesses (plain variables and arrays or array sections). Support component refs. * trans-openmp.c (gfc_omp_privatize_by_reference): Support component refs. (gfc_trans_omp_clauses_1): Support component refs, attach and detach clauses. gcc/ * gimplify.c (gimplify_omp_var_data): Add GOVD_MAP_HAS_ATTACHMENTS. (insert_struct_component_mapping): Support derived-type member mappings for arrays with descriptors which use GOMP_MAP_TO_PSET. (gimplify_scan_omp_clauses): Rewrite GOMP_MAP_ALWAYS_POINTER to GOMP_MAP_ATTACH for OpenACC struct/derived-type component pointers. Handle pointer mappings that use GOMP_MAP_TO_PSET. Handle attach/detach clauses. (gimplify_adjust_omp_clauses_1): Skip adjustments for explicit attach/detach clauses. (gimplify_omp_target_update): Handle finalize for detach. gcc/testsuite/ * c-c++-common/goacc/mdc-1.c: Update scan tests. * gfortran.dg/goacc/data-clauses.f95: Remove expected errors. * gfortran.dg/goacc/derived-types.f90: Likewise. * gfortran.dg/goacc/enter-exit-data.f95: Likewise. libgomp/ * libgomp.h (struct target_var_desc): Add do_detach flag. (struct splay_tree_key_s): Add attach_count field. (struct gomp_coalesce_buf): Add forward declaration. (gomp_map_val, gomp_attach_pointer, gomp_detach_pointer): Add prototypes. (gomp_unmap_vars): Add finalize parameter. * libgomp.map (OACC_2.6): New section. Add acc_attach, acc_attach_async, acc_detach, acc_detach_async, acc_detach_finalize, acc_detach_finalize_async. * oacc-async.c (goacc_async_copyout_unmap_vars): Add finalize parameter. Pass to gomp_unmap_vars_async. * oacc-init.c (acc_shutdown_1): Update call to gomp_unmap_vars. * oacc-int.h (goacc_async_copyout_unmap_vars): Add finalize parameter. * oacc-mem.c (acc_unmap_data): Update call to gomp_unmap_vars. (present_create_copy): Initialise attach_count. (delete_copyout): Likewise. (gomp_acc_insert_pointer): Likewise. (gomp_acc_remove_pointer): Update calls to gomp_unmap_vars, goacc_async_copyout_unmap_vars. (acc_attach_async, acc_attach, goacc_detach_internal, acc_detach) (acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): New functions. * oacc-parallel.c (find_pointer): Support attach/detach. Make a little more strict. (GOACC_parallel_keyed_internal): Use gomp_map_val to calculate device addresses. Update calls to gomp_unmap_vars, goacc_async_copyout_unmap_vars. (GOACC_data_end): Update call to gomp_unmap_vars. (GOACC_enter_exit_data): Support attach/detach and GOMP_MAP_STRUCT. * openacc.h (acc_attach, acc_attach_async, acc_detach) (acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): Add prototypes. * target.c (limits.h): Include. (gomp_map_vars_existing): Initialise do_detach field of tgt_var_desc. (gomp_attach_pointer, gomp_detach_pointer): New functions. (gomp_map_val): Make global. (gomp_map_vars_async): Support attach and detach. (gomp_remove_var): Free attach count array if present. (gomp_unmap_vars): Add finalize parameter. Update call to gomp_unmap_vars_async. (gomp_unmap_vars_async): Add finalize parameter. Add pointer detaching support. (GOMP_target): Update call to gomp_unmap_vars. (GOMP_target_ext): Likewise. (gomp_exit_data): Free attach count array if present. (gomp_target_task_fn): Update call to gomp_unmap_vars. * testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-1.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-2.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-3.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-4.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-5.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-6.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-7.c: New test. * testsuite/libgomp.oacc-fortran/deep-copy-8.c: New test. * testsuite/libgomp.oacc-fortran/derived-type-1.f90: Update test to use stop . * testsuite/libgomp.oacc-fortran/update-2.f90: Likewise. --- gcc/c/c-parser.c | 15 +- gcc/c/c-typeck.c | 4 + gcc/cp/parser.c | 16 +- gcc/cp/semantics.c | 6 +- gcc/fortran/gfortran.h | 2 + gcc/fortran/openmp.c | 126 +++++++++----- gcc/fortran/trans-openmp.c | 143 ++++++--------- gcc/gimplify.c | 82 +++++++-- gcc/testsuite/c-c++-common/goacc/mdc-1.c | 10 +- gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 | 38 ++-- gcc/testsuite/gfortran.dg/goacc/derived-types.f90 | 23 +-- .../gfortran.dg/goacc/enter-exit-data.f95 | 24 ++-- libgomp/libgomp.h | 23 ++- libgomp/libgomp.map | 10 + libgomp/oacc-async.c | 4 +- libgomp/oacc-init.c | 2 +- libgomp/oacc-int.h | 2 +- libgomp/oacc-mem.c | 86 +++++++++- libgomp/oacc-parallel.c | 190 +++++++++++++++----- libgomp/openacc.h | 6 + libgomp/target.c | 189 ++++++++++++++++++- .../libgomp.oacc-c-c++-common/deep-copy-1.c | 24 +++ .../libgomp.oacc-c-c++-common/deep-copy-2.c | 29 +++ .../libgomp.oacc-c-c++-common/deep-copy-3.c | 34 ++++ .../libgomp.oacc-c-c++-common/deep-copy-4.c | 87 +++++++++ .../libgomp.oacc-c-c++-common/deep-copy-5.c | 81 +++++++++ .../testsuite/libgomp.oacc-fortran/deep-copy-1.f90 | 35 ++++ .../testsuite/libgomp.oacc-fortran/deep-copy-2.f90 | 33 ++++ .../testsuite/libgomp.oacc-fortran/deep-copy-3.f90 | 34 ++++ .../testsuite/libgomp.oacc-fortran/deep-copy-4.f90 | 49 +++++ .../testsuite/libgomp.oacc-fortran/deep-copy-5.f90 | 57 ++++++ .../testsuite/libgomp.oacc-fortran/deep-copy-6.f90 | 61 +++++++ .../testsuite/libgomp.oacc-fortran/deep-copy-7.f90 | 89 +++++++++ .../testsuite/libgomp.oacc-fortran/deep-copy-8.f90 | 41 +++++ .../libgomp.oacc-fortran/derived-type-1.f90 | 6 +- .../testsuite/libgomp.oacc-fortran/update-2.f90 | 44 +++--- 36 files changed, 1407 insertions(+), 298 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90 diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index ffc5fe9..4b6ab84 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -11553,9 +11553,12 @@ c_parser_omp_variable_list (c_parser *parser, case OMP_CLAUSE_MAP: case OMP_CLAUSE_FROM: case OMP_CLAUSE_TO: - while (c_parser_next_token_is (parser, CPP_DOT)) + while (c_parser_next_token_is (parser, CPP_DOT) + || c_parser_next_token_is (parser, CPP_DEREF)) { location_t op_loc = c_parser_peek_token (parser)->location; + if (c_parser_next_token_is (parser, CPP_DEREF)) + t = build_simple_mem_ref (t); c_parser_consume_token (parser); if (!c_parser_next_token_is (parser, CPP_NAME)) { @@ -11679,7 +11682,7 @@ c_parser_omp_var_list_parens (c_parser *parser, enum omp_clause_code kind, } /* OpenACC 2.5: - attach (variable-list ) + attach ( variable-list ) copy ( variable-list ) copyin ( variable-list ) copyout ( variable-list ) @@ -14090,15 +14093,15 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_clause_async (parser, clauses); c_name = "async"; break; + case PRAGMA_OACC_CLAUSE_ATTACH: + clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "attach"; + break; case PRAGMA_OACC_CLAUSE_AUTO: clauses = c_parser_oacc_simple_clause (parser, here, OMP_CLAUSE_AUTO, clauses); c_name = "auto"; break; - case PRAGMA_OACC_CLAUSE_ATTACH: - clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); - c_name = "attach"; - break; case PRAGMA_OACC_CLAUSE_BIND: clauses = c_parser_oacc_clause_bind (parser, clauses); c_name = "bind"; diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index ab6819c..1a18867 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -12446,6 +12446,8 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, } t = TREE_OPERAND (t, 0); } + if (TREE_CODE (t) == MEM_REF) + t = TREE_OPERAND (t, 0); } if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) { @@ -13750,6 +13752,8 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) } if (remove) break; + if (TREE_CODE (t) == MEM_REF) + t = TREE_OPERAND (t, 0); if (VAR_P (t) || TREE_CODE (t) == PARM_DECL) { if (bitmap_bit_p (&map_field_head, DECL_UID (t))) diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 8161d63..79c03d2 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -31563,15 +31563,19 @@ cp_parser_omp_var_list_no_open (cp_parser *parser, enum omp_clause_code kind, case OMP_CLAUSE_MAP: case OMP_CLAUSE_FROM: case OMP_CLAUSE_TO: - while (cp_lexer_next_token_is (parser->lexer, CPP_DOT)) + while (cp_lexer_next_token_is (parser->lexer, CPP_DOT) + || cp_lexer_next_token_is (parser->lexer, CPP_DEREF)) { + cpp_ttype ttype + = cp_lexer_next_token_is (parser->lexer, CPP_DOT) + ? CPP_DOT : CPP_DEREF; location_t loc = cp_lexer_peek_token (parser->lexer)->location; cp_id_kind idk = CP_ID_KIND_NONE; cp_lexer_consume_token (parser->lexer); decl = convert_from_reference (decl); decl - = cp_parser_postfix_dot_deref_expression (parser, CPP_DOT, + = cp_parser_postfix_dot_deref_expression (parser, ttype, decl, false, &idk, loc); } @@ -33858,15 +33862,15 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_clause_async (parser, clauses); c_name = "async"; break; + case PRAGMA_OACC_CLAUSE_ATTACH: + clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); + c_name = "attach"; + break; case PRAGMA_OACC_CLAUSE_AUTO: clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_AUTO, clauses, here); c_name = "auto"; break; - case PRAGMA_OACC_CLAUSE_ATTACH: - clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); - c_name = "attach"; - break; case PRAGMA_OACC_CLAUSE_BIND: clauses = cp_parser_oacc_clause_bind (parser, clauses); c_name = "bind"; diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 796ae7f..7cbcb34 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6724,7 +6724,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) error ("%qE is not a variable in % clause", t); remove = true; } - else if (ort != C_ORT_ACC && t == current_class_ptr) + else if (t == current_class_ptr) { error ("% allowed in OpenMP only in %" " clauses"); @@ -6810,6 +6810,10 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) t = TREE_OPERAND (t, 0); OMP_CLAUSE_DECL (c) = t; } + if (ort == C_ORT_ACC + && TREE_CODE (t) == COMPONENT_REF + && TREE_CODE (TREE_OPERAND (t, 0)) == INDIRECT_REF) + t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); if (TREE_CODE (t) == COMPONENT_REF && ((ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP || ort == C_ORT_ACC) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 3a9e45b..14b5def 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1183,10 +1183,12 @@ enum gfc_omp_depend_op enum gfc_omp_map_op { OMP_MAP_ALLOC, + OMP_MAP_ATTACH, OMP_MAP_TO, OMP_MAP_FROM, OMP_MAP_TOFROM, OMP_MAP_DELETE, + OMP_MAP_DETACH, OMP_MAP_FORCE_ALLOC, OMP_MAP_FORCE_TO, OMP_MAP_FORCE_FROM, diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c index 654ceb6..f120e3d 100644 --- a/gcc/fortran/openmp.c +++ b/gcc/fortran/openmp.c @@ -808,7 +808,7 @@ enum omp_mask1 OMP_MASK1_LAST }; -/* OpenACC 2.0 specific clauses. */ +/* OpenACC 2.0+ specific clauses. */ enum omp_mask2 { OMP_CLAUSE_ASYNC, @@ -837,6 +837,8 @@ enum omp_mask2 OMP_CLAUSE_IF_PRESENT, OMP_CLAUSE_FINALIZE, OMP_CLAUSE_DEVICE_TYPE, + OMP_CLAUSE_ATTACH, + OMP_CLAUSE_DETACH, /* This must come last. */ OMP_MASK2_LAST }; @@ -964,10 +966,18 @@ static match gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, const omp_mask dtype_mask, bool first = true, bool needs_space = true, - bool openacc = false, bool allow_derived = false) + bool openacc = false) { gfc_omp_clauses *base_clauses, *c = gfc_get_omp_clauses (); locus old_loc; + /* Determine whether we're dealing with an OpenACC directive that permits + derived type member accesses. This in particular disallows + "!$acc declare" from using such accesses, because it's not clear if/how + that should work. */ + bool allow_derived = (openacc + && ((mask & OMP_CLAUSE_ATTACH) + || (mask & OMP_CLAUSE_DETACH) + || (mask & OMP_CLAUSE_HOST_SELF))); base_clauses = c; @@ -1043,6 +1053,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, needs_space = true; continue; } + if ((mask & OMP_CLAUSE_ATTACH) + && gfc_match ("attach ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_ATTACH, false, + allow_derived)) + continue; break; case 'b': if ((mask & OMP_CLAUSE_BIND) && c->routine_bind == NULL @@ -1098,8 +1114,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, if ((mask & OMP_CLAUSE_COPYOUT) && gfc_match ("copyout ( ") == MATCH_YES && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], - OMP_MAP_FROM, true, - allow_derived)) + OMP_MAP_FROM, true, allow_derived)) continue; if ((mask & OMP_CLAUSE_COPYPRIVATE) && gfc_match_omp_variable_list ("copyprivate (", @@ -1109,8 +1124,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, if ((mask & OMP_CLAUSE_CREATE) && gfc_match ("create ( ") == MATCH_YES && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], - OMP_MAP_ALLOC, true, - allow_derived)) + OMP_MAP_ALLOC, true, allow_derived)) continue; break; case 'd': @@ -1190,6 +1204,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, else gfc_current_locus = old_loc; } + if ((mask & OMP_CLAUSE_DETACH) + && gfc_match ("detach ( ") == MATCH_YES + && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP], + OMP_MAP_DETACH, false, + allow_derived)) + continue; if ((mask & OMP_CLAUSE_DEVICE) && !openacc && c->device == NULL @@ -1784,8 +1804,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, if (gfc_match_omp_variable_list (" :", &c->lists[OMP_LIST_REDUCTION], - false, NULL, &head, - openacc) == MATCH_YES) + false, NULL, &head, openacc, + allow_derived) == MATCH_YES) { gfc_omp_namelist *n; if (rop == OMP_REDUCTION_NONE) @@ -2053,7 +2073,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT \ | OMP_CLAUSE_DEVICEPTR \ | OMP_CLAUSE_PRIVATE | OMP_CLAUSE_FIRSTPRIVATE \ - | OMP_CLAUSE_DEFAULT) + | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_ATTACH) #define OACC_KERNELS_CLAUSES \ (omp_mask (OMP_CLAUSE_ASYNC) | OMP_CLAUSE_WAIT \ | OMP_CLAUSE_NUM_GANGS | OMP_CLAUSE_NUM_WORKERS \ @@ -2063,12 +2083,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \ | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT \ | OMP_CLAUSE_DEVICEPTR \ - | OMP_CLAUSE_DEFAULT) + | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_ATTACH) #define OACC_DATA_CLAUSES \ (omp_mask (OMP_CLAUSE_IF) \ | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT \ | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT \ - | OMP_CLAUSE_DEVICEPTR) + | OMP_CLAUSE_DEVICEPTR | OMP_CLAUSE_ATTACH) #define OACC_HOST_DATA_CLAUSES \ (omp_mask (OMP_CLAUSE_USE_DEVICE)) #define OACC_LOOP_CLAUSES \ @@ -2098,12 +2118,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, #define OACC_ENTER_DATA_CLAUSES \ (omp_mask (OMP_CLAUSE_IF) \ | OMP_CLAUSE_ASYNC | OMP_CLAUSE_WAIT \ - | OMP_CLAUSE_COPYIN | OMP_CLAUSE_CREATE) + | OMP_CLAUSE_COPYIN | OMP_CLAUSE_CREATE | OMP_CLAUSE_ATTACH) #define OACC_EXIT_DATA_CLAUSES \ (omp_mask (OMP_CLAUSE_IF) \ | OMP_CLAUSE_ASYNC | OMP_CLAUSE_WAIT \ | OMP_CLAUSE_COPYOUT | OMP_CLAUSE_DELETE \ - | OMP_CLAUSE_FINALIZE) + | OMP_CLAUSE_FINALIZE | OMP_CLAUSE_DETACH) #define OACC_ROUTINE_CLAUSES \ (omp_mask (OMP_CLAUSE_GANG) | OMP_CLAUSE_WORKER | OMP_CLAUSE_VECTOR \ | OMP_CLAUSE_SEQ \ @@ -2139,12 +2159,10 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask, static match -match_acc (gfc_exec_op op, const omp_mask mask, const omp_mask dtype_mask, - bool derived_types=false) +match_acc (gfc_exec_op op, const omp_mask mask, const omp_mask dtype_mask) { gfc_omp_clauses *c; - if (gfc_match_omp_clauses (&c, mask, dtype_mask, false, false, true, - derived_types) + if (gfc_match_omp_clauses (&c, mask, dtype_mask, false, false, true) != MATCH_YES) return MATCH_ERROR; new_st.op = op; @@ -2309,7 +2327,8 @@ gfc_match_oacc_update (void) if (gfc_match_omp_clauses (&c, OACC_UPDATE_CLAUSES, OACC_UPDATE_CLAUSE_DEVICE_TYPE_MASK, false, - false, true, true) != MATCH_YES) + false, true) + != MATCH_YES) return MATCH_ERROR; if (!c->lists[OMP_LIST_MAP]) @@ -2329,7 +2348,7 @@ match gfc_match_oacc_enter_data (void) { return match_acc (EXEC_OACC_ENTER_DATA, OACC_ENTER_DATA_CLAUSES, - OMP_MASK2_LAST, true); + OMP_MASK2_LAST); } @@ -2337,7 +2356,7 @@ match gfc_match_oacc_exit_data (void) { return match_acc (EXEC_OACC_EXIT_DATA, OACC_EXIT_DATA_CLAUSES, - OMP_MASK2_LAST, true); + OMP_MASK2_LAST); } @@ -4017,9 +4036,6 @@ resolve_nonnegative_int_expr (gfc_expr *expr, const char *clause) static void check_symbol_not_pointer (gfc_symbol *sym, locus loc, const char *name) { - if (sym->ts.type == BT_DERIVED && sym->attr.pointer) - gfc_error ("POINTER object %qs of derived type in %s clause at %L", - sym->name, name, &loc); if (sym->ts.type == BT_DERIVED && sym->attr.cray_pointer) gfc_error ("Cray pointer object %qs of derived type in %s clause at %L", sym->name, name, &loc); @@ -4060,9 +4076,6 @@ check_array_not_assumed (gfc_symbol *sym, locus loc, const char *name) static void resolve_oacc_data_clauses (gfc_symbol *sym, locus loc, const char *name) { - if (sym->ts.type == BT_DERIVED && sym->attr.allocatable) - gfc_error ("ALLOCATABLE object %qs of derived type in %s clause at %L", - sym->name, name, &loc); if ((sym->ts.type == BT_ASSUMED && sym->attr.allocatable) || (sym->ts.type == BT_CLASS && CLASS_DATA (sym) && CLASS_DATA (sym)->attr.allocatable)) @@ -4408,11 +4421,23 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, && (list != OMP_LIST_REDUCTION || !openacc)) for (n = omp_clauses->lists[list]; n; n = n->next) { - if (n->sym->mark) - gfc_error ("Symbol %qs present on multiple clauses at %L", - n->sym->name, &n->where); - else - n->sym->mark = 1; + bool array_only_p = true; + /* Disallow duplicate bare variable references and multiple + subarrays of the same array here, but allow multiple components of + the same (e.g. derived-type) variable. For the latter, duplicate + components are detected elsewhere. */ + if (openacc && n->expr && n->expr->expr_type == EXPR_VARIABLE) + for (gfc_ref *ref = n->expr->ref; ref; ref = ref->next) + if (ref->type != REF_ARRAY) + array_only_p = false; + if (array_only_p) + { + if (n->sym->mark) + gfc_error ("Symbol %qs present on multiple clauses at %L", + n->sym->name, &n->where); + else + n->sym->mark = 1; + } } gcc_assert (OMP_LIST_LASTPRIVATE == OMP_LIST_FIRSTPRIVATE + 1); @@ -4603,26 +4628,41 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, "are allowed on ORDERED directive at %L", &n->where); } + gfc_ref *array_ref = NULL; + bool resolved = false; if (n->expr) { - if (!gfc_resolve_expr (n->expr) + array_ref = n->expr->ref; + resolved = gfc_resolve_expr (n->expr); + + /* Look through component refs to find last array + reference. */ + while (resolved + && array_ref + && (array_ref->type == REF_COMPONENT + || (array_ref->type == REF_ARRAY + && array_ref->next + && array_ref->next->type == REF_COMPONENT))) + array_ref = array_ref->next; + } + if (array_ref + || (n->expr + && (!resolved || n->expr->expr_type != EXPR_VARIABLE))) + { + if (!resolved || n->expr->expr_type != EXPR_VARIABLE - || n->expr->ref == NULL - || n->expr->ref->next - || n->expr->ref->type != REF_ARRAY) - { - if (n->sym->ts.type != BT_DERIVED) - gfc_error ("%qs in %s clause at %L is not a proper " - "array section", n->sym->name, name, - &n->where); - } - else if (n->expr->ref->u.ar.codimen) + || array_ref->next + || array_ref->type != REF_ARRAY) + gfc_error ("%qs in %s clause at %L is not a proper " + "array section", n->sym->name, name, + &n->where); + else if (array_ref->u.ar.codimen) gfc_error ("Coarrays not supported in %s clause at %L", name, &n->where); else { int i; - gfc_array_ref *ar = &n->expr->ref->u.ar; + gfc_array_ref *ar = &array_ref->u.ar; for (i = 0; i < ar->dimen; i++) if (ar->stride[i]) { diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 8840fd2..98f40d1 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -62,6 +62,9 @@ gfc_omp_privatize_by_reference (const_tree decl) if (TREE_CODE (type) == POINTER_TYPE) { + while (TREE_CODE (decl) == COMPONENT_REF) + decl = TREE_OPERAND (decl, 1); + /* Array POINTER/ALLOCATABLE have aggregate types, all user variables that have POINTER_TYPE type and aren't scalar pointers, scalar allocatables, Cray pointees or C pointers are supposed to be @@ -2121,69 +2124,35 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, tree decl = gfc_get_symbol_decl (n->sym); if (DECL_P (decl)) TREE_ADDRESSABLE (decl) = 1; - /* Handle derived-typed members for OpenACC Update. */ - if (n->sym->ts.type == BT_DERIVED - && n->expr != NULL && n->expr->ref != NULL - && (n->expr->ref->next == NULL - || (n->expr->ref->next != NULL - && n->expr->ref->next->type == REF_ARRAY - && n->expr->ref->next->u.ar.type == AR_FULL)) - && (n->expr->ref->type == REF_ARRAY - && n->expr->ref->u.ar.type != AR_SECTION)) - { - gfc_ref *ref = n->expr->ref; - gfc_component *c = ref->u.c.component; - tree field; - tree context; - tree ptr; - tree type; - tree scratch; - if (c->backend_decl == NULL_TREE - && ref->u.c.sym != NULL) - gfc_get_derived_type (ref->u.c.sym); + gfc_ref *ref = n->expr ? n->expr->ref : NULL; + symbol_attribute *sym_attr = &n->sym->attr; + gomp_map_kind ptr_map_kind = GOMP_MAP_POINTER; - field = c->backend_decl; - gcc_assert (field && TREE_CODE (field) == FIELD_DECL); - context = DECL_FIELD_CONTEXT (field); - - type = TREE_TYPE (decl); - if (POINTER_TYPE_P (type)) - type = TREE_TYPE (type); + if (ref && n->sym->ts.type == BT_DERIVED) + { + if (gfc_omp_privatize_by_reference (decl)) + decl = build_fold_indirect_ref (decl); - if (context != type) + for (; ref && ref->type == REF_COMPONENT; ref = ref->next) { - tree f2 = c->norestrict_decl; - if (!f2 || DECL_FIELD_CONTEXT (f2) != type) - for (f2 = TYPE_FIELDS (TREE_TYPE (decl)); f2; - f2 = DECL_CHAIN (f2)) - if (TREE_CODE (f2) == FIELD_DECL - && DECL_NAME (f2) == DECL_NAME (field)) - break; - gcc_assert (f2); - c->norestrict_decl = f2; - field = f2; + tree field = ref->u.c.component->backend_decl; + gcc_assert (field && TREE_CODE (field) == FIELD_DECL); + decl = fold_build3 (COMPONENT_REF, TREE_TYPE (field), + decl, field, NULL_TREE); + sym_attr = &ref->u.c.component->attr; } - if (POINTER_TYPE_P (TREE_TYPE (decl))) - decl = build_fold_indirect_ref_loc (input_location, - decl); - - scratch = fold_build3_loc (input_location, COMPONENT_REF, - TREE_TYPE (field), decl, field, - NULL_TREE); - type = TREE_TYPE (scratch); - ptr = gfc_create_var (pvoid_type_node, NULL); - scratch = fold_convert (pvoid_type_node, - build_fold_addr_expr (scratch)); - gfc_add_modify (block, ptr, scratch); - OMP_CLAUSE_SIZE (node) = TYPE_SIZE_UNIT (type); - OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (ptr); + ptr_map_kind = GOMP_MAP_ALWAYS_POINTER; } - else if ((n->sym->ts.type == BT_DERIVED && n->expr == NULL) - || (n->expr == NULL - || n->expr->ref->u.ar.type == AR_FULL)) + + if (ref == NULL || ref->u.ar.type == AR_FULL) { + tree field = decl; + + while (TREE_CODE (field) == COMPONENT_REF) + field = TREE_OPERAND (field, 1); + if (POINTER_TYPE_P (TREE_TYPE (decl)) && n->u.map_op == OMP_MAP_FORCE_DEVICEPTR) { @@ -2192,18 +2161,18 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, } else if (POINTER_TYPE_P (TREE_TYPE (decl)) && (gfc_omp_privatize_by_reference (decl) - || GFC_DECL_GET_SCALAR_POINTER (decl) - || GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) - || GFC_DECL_CRAY_POINTEE (decl) + || GFC_DECL_GET_SCALAR_POINTER (field) + || GFC_DECL_GET_SCALAR_ALLOCATABLE (field) + || GFC_DECL_CRAY_POINTEE (field) || GFC_DESCRIPTOR_TYPE_P - (TREE_TYPE (TREE_TYPE (decl))))) + (TREE_TYPE (TREE_TYPE (field))))) { tree orig_decl = decl; enum gomp_map_kind gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) && (n->sym->attr.oacc_declare_create) && clauses->update_allocatable) - gmk = GOMP_MAP_ALWAYS_POINTER; + gmk = ptr_map_kind; node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node4, gmk); @@ -2216,7 +2185,7 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, { node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind); OMP_CLAUSE_DECL (node3) = decl; OMP_CLAUSE_SIZE (node3) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -2225,7 +2194,9 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); } - if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)) + && n->u.map_op != OMP_MAP_ATTACH + && n->u.map_op != OMP_MAP_DETACH) { tree type = TREE_TYPE (decl); tree ptr = gfc_conv_descriptor_data_get (decl); @@ -2238,14 +2209,16 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind); OMP_CLAUSE_DECL (node3) = gfc_conv_descriptor_data_get (decl); + if (ptr_map_kind == GOMP_MAP_ALWAYS_POINTER) + STRIP_NOPS (OMP_CLAUSE_DECL (node3)); OMP_CLAUSE_SIZE (node3) = size_int (0); /* We have to check for n->sym->attr.dimension because of scalar coarrays. */ - if (n->sym->attr.pointer && n->sym->attr.dimension) + if (sym_attr->pointer && sym_attr->dimension) { stmtblock_t cond_block; tree size @@ -2275,11 +2248,11 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, else_b)); OMP_CLAUSE_SIZE (node) = size; } - else if (n->sym->attr.dimension) + else if (sym_attr->dimension) OMP_CLAUSE_SIZE (node) = gfc_full_array_size (block, decl, GFC_TYPE_ARRAY_RANK (type)); - if (n->sym->attr.dimension) + if (sym_attr->dimension) { tree elemsz = TYPE_SIZE_UNIT (gfc_get_element_type (type)); @@ -2292,31 +2265,17 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, else OMP_CLAUSE_DECL (node) = decl; } - else + else if (ref) { tree ptr, ptr2; gfc_init_se (&se, NULL); - if ((n->sym->ts.type == BT_DERIVED - && n->expr->rank == 0) - || (n->sym->ts.type != BT_DERIVED - && n->expr->ref->u.ar.type == AR_ELEMENT)) + if (ref->u.ar.type == AR_ELEMENT) { gfc_conv_expr_reference (&se, n->expr); gfc_add_block_to_block (block, &se.pre); ptr = se.expr; - tree type = TREE_TYPE (ptr); - if (n->sym->ts.type == BT_DERIVED) - { - tree t = gfc_create_var (build_pointer_type - (void_type_node), - NULL); - ptr = fold_convert (pvoid_type_node, ptr); - gfc_add_modify (block, t, ptr); - ptr = t; - type = TREE_TYPE (type); - } OMP_CLAUSE_SIZE (node) - = TYPE_SIZE_UNIT (type); + = TYPE_SIZE_UNIT (TREE_TYPE (ptr)); } else { @@ -2337,14 +2296,12 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, gfc_add_block_to_block (block, &se.post); OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (ptr); - if (n->sym->ts.type == BT_DERIVED) - goto finalize_map_clause; if (POINTER_TYPE_P (TREE_TYPE (decl)) && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (decl)))) { node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node4, ptr_map_kind); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -2361,9 +2318,11 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind); OMP_CLAUSE_DECL (node3) = gfc_conv_descriptor_data_get (decl); + if (ptr_map_kind == GOMP_MAP_ALWAYS_POINTER) + STRIP_NOPS (OMP_CLAUSE_DECL (node3)); } else { @@ -2376,7 +2335,7 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, } node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER); + OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind); OMP_CLAUSE_DECL (node3) = decl; } ptr2 = fold_convert (sizetype, ptr2); @@ -2384,11 +2343,16 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, = fold_build2 (MINUS_EXPR, sizetype, ptr, ptr2); finalize_map_clause:; } + else + gcc_unreachable (); switch (n->u.map_op) { case OMP_MAP_ALLOC: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_ALLOC); break; + case OMP_MAP_ATTACH: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_ATTACH); + break; case OMP_MAP_TO: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_TO); break; @@ -2413,6 +2377,9 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, case OMP_MAP_DELETE: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DELETE); break; + case OMP_MAP_DETACH: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DETACH); + break; case OMP_MAP_FORCE_ALLOC: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_ALLOC); break; diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 824e020..40bf586 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -111,6 +111,10 @@ enum gimplify_omp_var_data /* Flag for OpenACC deviceptrs. */ GOVD_DEVICEPTR = (1<<21), + /* Flag for GOVD_MAP: (struct) vars that have pointer attachments for + fields. */ + GOVD_MAP_HAS_ATTACHMENTS = (1<<22), + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -7692,7 +7696,13 @@ insert_struct_component_mapping (enum tree_code code, tree c, tree struct_node, OMP_CLAUSE_SET_MAP_KIND (c2, mkind); OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (c)); OMP_CLAUSE_CHAIN (c2) = scp ? *scp : prev_node; - OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (ptr_type_node); + if (OMP_CLAUSE_CHAIN (prev_node) != c + && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (prev_node)) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (prev_node)) + == GOMP_MAP_TO_PSET)) + OMP_CLAUSE_SIZE (c2) = OMP_CLAUSE_SIZE (OMP_CLAUSE_CHAIN (prev_node)); + else + OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (ptr_type_node); if (struct_node) OMP_CLAUSE_CHAIN (struct_node) = c2; @@ -8245,7 +8255,9 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, remove = true; break; } - if (DECL_P (decl)) + if (DECL_P (decl) + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_TO_PSET + && code != OACC_UPDATE) { if (error_operand_p (decl)) { @@ -8297,17 +8309,36 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, = splay_tree_lookup (ctx->variables, (splay_tree_key)decl); bool ptr = (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_POINTER); - if ((n == NULL || (n->value & GOVD_MAP) == 0) - && code != OACC_UPDATE) + bool attach = OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH; + bool has_attachments = false; + /* For OpenACC, pointers in structs should trigger an + attach action. */ + if (ptr && (region_type & ORT_ACC) != 0) + { + /* Turning a GOMP_MAP_ALWAYS_POINTER clause into a + GOMP_MAP_ATTACH clause after we have detected a case + that needs a GOMP_MAP_STRUCT mapping adding. */ + OMP_CLAUSE_SET_MAP_KIND (c, + (code == OACC_EXIT_DATA) ? GOMP_MAP_DETACH + : GOMP_MAP_ATTACH); + has_attachments = true; + } + if (n == NULL || (n->value & GOVD_MAP) == 0) { tree l = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (l, GOMP_MAP_STRUCT); + OMP_CLAUSE_SET_MAP_KIND (l, attach + ? GOMP_MAP_FORCE_PRESENT : GOMP_MAP_STRUCT); if (!base_eq_orig_base) OMP_CLAUSE_DECL (l) = unshare_expr (orig_base); else OMP_CLAUSE_DECL (l) = decl; - OMP_CLAUSE_SIZE (l) = size_int (1); + OMP_CLAUSE_SIZE (l) = attach + ? (DECL_P (OMP_CLAUSE_DECL (l)) + ? DECL_SIZE_UNIT (OMP_CLAUSE_DECL (l)) + : TYPE_SIZE_UNIT (TREE_TYPE (OMP_CLAUSE_DECL (l)))) + : size_int (1); if (struct_map_to_clause == NULL) struct_map_to_clause = new hash_map; struct_map_to_clause->put (decl, l); @@ -8339,9 +8370,11 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, flags = GOVD_MAP | GOVD_EXPLICIT; if (GOMP_MAP_ALWAYS_P (OMP_CLAUSE_MAP_KIND (c)) || ptr) flags |= GOVD_SEEN; + if (has_attachments) + flags |= GOVD_MAP_HAS_ATTACHMENTS; goto do_add_decl; } - else + else if (struct_map_to_clause) { tree *osc = struct_map_to_clause->get (decl); tree *sc = NULL, *scp = NULL; @@ -8350,8 +8383,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, sc = &OMP_CLAUSE_CHAIN (*osc); if (*sc != c && (OMP_CLAUSE_MAP_KIND (*sc) - == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) + == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) sc = &OMP_CLAUSE_CHAIN (*sc); + /* Here "prev_list_p" is the end of the inserted + alloc/release nodes after the struct node, OSC. */ for (; *sc != c; sc = &OMP_CLAUSE_CHAIN (*sc)) if (ptr && sc == prev_list_p) break; @@ -8410,9 +8445,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } if (remove) break; - OMP_CLAUSE_SIZE (*osc) - = size_binop (PLUS_EXPR, OMP_CLAUSE_SIZE (*osc), - size_one_node); + if (!attach) + OMP_CLAUSE_SIZE (*osc) + = size_binop (PLUS_EXPR, OMP_CLAUSE_SIZE (*osc), + size_one_node); if (ptr) { tree cl @@ -8444,11 +8480,15 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } if (!remove && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ALWAYS_POINTER + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_TO_PSET && OMP_CLAUSE_CHAIN (c) && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (c)) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c)) - == GOMP_MAP_ALWAYS_POINTER)) + && ((OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c)) + == GOMP_MAP_ALWAYS_POINTER) + || (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c)) + == GOMP_MAP_TO_PSET))) prev_list_p = list_p; + break; } flags = GOVD_MAP | GOVD_EXPLICIT; @@ -9020,6 +9060,8 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) return 0; if ((flags & GOVD_SEEN) == 0) return 0; + if ((flags & GOVD_MAP_HAS_ATTACHMENTS) != 0) + return 0; if (flags & GOVD_DEBUG_PRIVATE) { gcc_assert ((flags & GOVD_DATA_SHARE_CLASS) == GOVD_SHARED); @@ -9509,8 +9551,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, } } else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_STRUCT - && (code == OMP_TARGET_EXIT_DATA - || code == OACC_EXIT_DATA)) + && code == OMP_TARGET_EXIT_DATA) remove = true; else if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST @@ -11218,10 +11259,15 @@ gimplify_omp_target_update (tree *expr_p, gimple_seq *pre_p) OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_DETACH); finalize_marked = true; break; + case GOMP_MAP_STRUCT: + case GOMP_MAP_FORCE_PRESENT: + /* Skip over an initial struct or force_present mapping. */ + break; default: - /* Check consistency: libgomp relies on the very first data - mapping clause being marked, so make sure we did that before - any other mapping clauses. */ + /* Check consistency: libgomp relies on the very first + non-struct, non-force-present data mapping clause being + marked, so make sure we did that before any other mapping + clauses. */ gcc_assert (finalize_marked); break; } diff --git a/gcc/testsuite/c-c++-common/goacc/mdc-1.c b/gcc/testsuite/c-c++-common/goacc/mdc-1.c index c20b94d..84a44af 100644 --- a/gcc/testsuite/c-c++-common/goacc/mdc-1.c +++ b/gcc/testsuite/c-c++-common/goacc/mdc-1.c @@ -42,13 +42,13 @@ t1 () } /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.to:s .len: 32.." 1 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.tofrom:.z .len: 40.. map.struct:s .len: 1.. map.alloc:s.a .len: 8.. map.tofrom:._1 .len: 40.. map.always_pointer:s.a .pointer assign, bias: 0.." 1 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.struct:s .len: 1.. map.attach:s.e .len: 8.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.tofrom:.z .len: 40.. map.struct:s .len: 1.. map.alloc:s.a .len: 8.. map.tofrom:._1 .len: 40.. map.attach:s.a .len: 0.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.force_present:s .len: 32.. map.attach:s.e .len: 8.." 1 "omplower" } } */ /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.attach:a .len: 8.." 1 "omplower" } } */ /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.detach:a .len: 8.." 1 "omplower" } } */ /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.to:a .len: 8.." 1 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.detach:s.e .len: 8.." 1 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.struct:s .len: 1.. map.attach:s.e .len: 8.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.force_present:s .len: 32.. map.detach:s.e .len: 8.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.force_present:s .len: 32.. map.attach:s.e .len: 8.." 1 "omplower" } } */ /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.release:a .len: 8.." 1 "omplower" } } */ /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data finalize map.force_detach:a .len: 8.." 1 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data finalize map.force_detach:s.a .len: 8.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data finalize map.force_present:s .len: 32.. map.force_detach:s.a .len: 8.." 1 "omplower" } } */ diff --git a/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 b/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 index b94214e..1a4a671 100644 --- a/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 @@ -39,9 +39,9 @@ contains !$acc end data - !$acc parallel copy (tip) ! { dg-error "POINTER" } + !$acc parallel copy (tip) !$acc end parallel - !$acc parallel copy (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel copy (tia) !$acc end parallel !$acc parallel deviceptr (i) copy (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -54,9 +54,9 @@ contains !$acc end data - !$acc parallel copyin (tip) ! { dg-error "POINTER" } + !$acc parallel copyin (tip) !$acc end parallel - !$acc parallel copyin (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel copyin (tia) !$acc end parallel !$acc parallel deviceptr (i) copyin (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -71,9 +71,9 @@ contains !$acc end data - !$acc parallel copyout (tip) ! { dg-error "POINTER" } + !$acc parallel copyout (tip) !$acc end parallel - !$acc parallel copyout (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel copyout (tia) !$acc end parallel !$acc parallel deviceptr (i) copyout (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -90,9 +90,9 @@ contains !$acc end data - !$acc parallel create (tip) ! { dg-error "POINTER" } + !$acc parallel create (tip) !$acc end parallel - !$acc parallel create (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel create (tia) !$acc end parallel !$acc parallel deviceptr (i) create (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -111,9 +111,9 @@ contains !$acc end data - !$acc parallel present (tip) ! { dg-error "POINTER" } + !$acc parallel present (tip) !$acc end parallel - !$acc parallel present (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel present (tia) !$acc end parallel !$acc parallel deviceptr (i) present (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -144,9 +144,9 @@ contains !$acc end parallel - !$acc parallel present_or_copy (tip) ! { dg-error "POINTER" } + !$acc parallel present_or_copy (tip) !$acc end parallel - !$acc parallel present_or_copy (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel present_or_copy (tia) !$acc end parallel !$acc parallel deviceptr (i) present_or_copy (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -169,9 +169,9 @@ contains !$acc end data - !$acc parallel present_or_copyin (tip) ! { dg-error "POINTER" } + !$acc parallel present_or_copyin (tip) !$acc end parallel - !$acc parallel present_or_copyin (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel present_or_copyin (tia) !$acc end parallel !$acc parallel deviceptr (i) present_or_copyin (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -196,9 +196,9 @@ contains !$acc end data - !$acc parallel present_or_copyout (tip) ! { dg-error "POINTER" } + !$acc parallel present_or_copyout (tip) !$acc end parallel - !$acc parallel present_or_copyout (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel present_or_copyout (tia) !$acc end parallel !$acc parallel deviceptr (i) present_or_copyout (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -225,9 +225,9 @@ contains !$acc end data - !$acc parallel present_or_create (tip) ! { dg-error "POINTER" } + !$acc parallel present_or_create (tip) !$acc end parallel - !$acc parallel present_or_create (tia) ! { dg-error "ALLOCATABLE" } + !$acc parallel present_or_create (tia) !$acc end parallel !$acc parallel deviceptr (i) present_or_create (i) ! { dg-error "multiple clauses" } !$acc end parallel @@ -256,4 +256,4 @@ contains !$acc end data end subroutine foo -end module test \ No newline at end of file +end module test diff --git a/gcc/testsuite/gfortran.dg/goacc/derived-types.f90 b/gcc/testsuite/gfortran.dg/goacc/derived-types.f90 index 11d055a..5fb2981 100644 --- a/gcc/testsuite/gfortran.dg/goacc/derived-types.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/derived-types.f90 @@ -33,48 +33,45 @@ program derived_acc !$acc exit data copyout(var) !$acc exit data copyout(var%a) - !$acc data copy(var%a) ! { dg-error "Syntax error in OpenMP" } - !$acc end data ! { dg-error "Unexpected ..ACC END DATA" } - !$acc data copy(var) !$acc end data - !$acc data copyout(var%a) ! { dg-error "Syntax error in OpenMP" } - !$acc end data ! { dg-error "Unexpected ..ACC END" } + !$acc data copyout(var%a) + !$acc end data !$acc parallel loop pcopyout(var) do i = 1, 10 end do !$acc end parallel loop - !$acc parallel loop copyout(var%a) ! { dg-error "Syntax error in OpenMP" } + !$acc parallel loop copyout(var%a) do i = 1, 10 end do - !$acc end parallel loop ! { dg-error "Unexpected ..ACC END" } + !$acc end parallel loop !$acc parallel pcopy(var) !$acc end parallel - !$acc parallel pcopy(var%a) ! { dg-error "Syntax error in OpenMP" } + !$acc parallel pcopy(var%a) do i = 1, 10 end do - !$acc end parallel ! { dg-error "Unexpected ..ACC END" } + !$acc end parallel !$acc kernels pcopyin(var) !$acc end kernels - !$acc kernels pcopy(var%a) ! { dg-error "Syntax error in OpenMP" } + !$acc kernels pcopy(var%a) do i = 1, 10 end do - !$acc end kernels ! { dg-error "Unexpected ..ACC END" } + !$acc end kernels !$acc kernels loop pcopyin(var) do i = 1, 10 end do !$acc end kernels loop - !$acc kernels loop pcopy(var%a) ! { dg-error "Syntax error in OpenMP" } + !$acc kernels loop pcopy(var%a) do i = 1, 10 end do - !$acc end kernels loop ! { dg-error "Unexpected ..ACC END" } + !$acc end kernels loop end program derived_acc diff --git a/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 index 805459c..b616b39 100644 --- a/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 @@ -44,14 +44,14 @@ contains !$acc enter data wait (i, 1) !$acc enter data wait (a) ! { dg-error "INTEGER" } !$acc enter data wait (b(5:6)) ! { dg-error "INTEGER" } - !$acc enter data copyin (tip) ! { dg-error "POINTER" } - !$acc enter data copyin (tia) ! { dg-error "ALLOCATABLE" } - !$acc enter data create (tip) ! { dg-error "POINTER" } - !$acc enter data create (tia) ! { dg-error "ALLOCATABLE" } - !$acc enter data present_or_copyin (tip) ! { dg-error "POINTER" } - !$acc enter data present_or_copyin (tia) ! { dg-error "ALLOCATABLE" } - !$acc enter data present_or_create (tip) ! { dg-error "POINTER" } - !$acc enter data present_or_create (tia) ! { dg-error "ALLOCATABLE" } + !$acc enter data copyin (tip) + !$acc enter data copyin (tia) + !$acc enter data create (tip) + !$acc enter data create (tia) + !$acc enter data present_or_copyin (tip) + !$acc enter data present_or_copyin (tia) + !$acc enter data present_or_create (tip) + !$acc enter data present_or_create (tia) !$acc enter data copyin (i) create (i) ! { dg-error "multiple clauses" } !$acc enter data copyin (i) present_or_copyin (i) ! { dg-error "multiple clauses" } !$acc enter data create (i) present_or_copyin (i) ! { dg-error "multiple clauses" } @@ -79,10 +79,10 @@ contains !$acc exit data wait (i, 1) !$acc exit data wait (a) ! { dg-error "INTEGER" } !$acc exit data wait (b(5:6)) ! { dg-error "INTEGER" } - !$acc exit data copyout (tip) ! { dg-error "POINTER" } - !$acc exit data copyout (tia) ! { dg-error "ALLOCATABLE" } - !$acc exit data delete (tip) ! { dg-error "POINTER" } - !$acc exit data delete (tia) ! { dg-error "ALLOCATABLE" } + !$acc exit data copyout (tip) + !$acc exit data copyout (tia) + !$acc exit data delete (tip) + !$acc exit data delete (tia) !$acc exit data copyout (i) delete (i) ! { dg-error "multiple clauses" } !$acc exit data finalize !$acc exit data finalize copyout (i) diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index acf7f8f..17fe0d3 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -806,6 +806,8 @@ struct target_var_desc { bool copy_from; /* True if data always should be copied from device to host at the end. */ bool always_copy_from; + /* True if variable should be detached at end of region. */ + bool do_detach; /* Relative offset against key host_start. */ uintptr_t offset; /* Actual length. */ @@ -860,6 +862,8 @@ struct splay_tree_key_s { uintptr_t refcount; /* Dynamic reference count. */ uintptr_t dynamic_refcount; + /* For a block with attached pointers, the attachment counters for each. */ + unsigned short *attach_count; /* Pointer to the original mapping of "omp declare target link" object. */ splay_tree_key link_key; }; @@ -1003,6 +1007,8 @@ enum gomp_map_vars_kind GOMP_MAP_VARS_ENTER_DATA }; +struct gomp_coalesce_buf; + extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int); extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int); extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, @@ -1013,8 +1019,17 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *, void *, const void *, size_t, struct gomp_coalesce_buf *); extern void gomp_copy_dev2host (struct gomp_device_descr *, - struct goacc_asyncqueue *, - void *, const void *, size_t); + struct goacc_asyncqueue *, void *, const void *, + size_t); +extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t); +extern void gomp_attach_pointer (struct gomp_device_descr *, + struct goacc_asyncqueue *, splay_tree, + splay_tree_key, uintptr_t, size_t, + struct gomp_coalesce_buf *); +extern void gomp_detach_pointer (struct gomp_device_descr *, + struct goacc_asyncqueue *, splay_tree_key, + uintptr_t, bool, struct gomp_coalesce_buf *); + extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, size_t, void **, void **, size_t *, void *, bool, @@ -1025,9 +1040,9 @@ extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *, size_t *, void *, bool, enum gomp_map_vars_kind); extern void gomp_unmap_tgt (struct target_mem_desc *); -extern void gomp_unmap_vars (struct target_mem_desc *, bool); +extern void gomp_unmap_vars (struct target_mem_desc *, bool, bool); extern void gomp_unmap_vars_async (struct target_mem_desc *, bool, - struct goacc_asyncqueue *); + struct goacc_asyncqueue *, bool); extern void gomp_init_device (struct gomp_device_descr *); extern bool gomp_fini_device (struct gomp_device_descr *); extern void gomp_unload_device (struct gomp_device_descr *); diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 595b988..cc1ce2a 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -440,6 +440,16 @@ OACC_2.5 { acc_update_self_async_array_h_; } OACC_2.0.1; +OACC_2.6 { + global: + acc_attach; + acc_attach_async; + acc_detach; + acc_detach_async; + acc_detach_finalize; + acc_detach_finalize_async; +} OACC_2.5; + GOACC_2.0 { global: GOACC_data_end; diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c index bb00279..6c12c82 100644 --- a/libgomp/oacc-async.c +++ b/libgomp/oacc-async.c @@ -373,14 +373,14 @@ goacc_async_unmap_tgt (void *ptr) attribute_hidden void goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt, - struct goacc_asyncqueue *aq) + struct goacc_asyncqueue *aq, bool finalize) { struct gomp_device_descr *devicep = tgt->device_descr; /* Increment reference to delay freeing of device memory until callback has triggered. */ tgt->refcount++; - gomp_unmap_vars_async (tgt, true, aq); + gomp_unmap_vars_async (tgt, true, aq, finalize); devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt, (void *) tgt); } diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 48c9646..e1938c5 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -391,7 +391,7 @@ acc_shutdown_1 (acc_device_t d) { struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt; - gomp_unmap_vars (tgt, false); + gomp_unmap_vars (tgt, false, false); } walk->dev = NULL; diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h index 1f6c62c..878f0f4 100644 --- a/libgomp/oacc-int.h +++ b/libgomp/oacc-int.h @@ -112,7 +112,7 @@ void goacc_host_init (void); void goacc_init_asyncqueues (struct gomp_device_descr *); bool goacc_fini_asyncqueues (struct gomp_device_descr *); void goacc_async_copyout_unmap_vars (struct target_mem_desc *, - struct goacc_asyncqueue *); + struct goacc_asyncqueue *, bool); void goacc_async_free (struct gomp_device_descr *, struct goacc_asyncqueue *, void *); struct goacc_asyncqueue *get_goacc_asyncqueue (int); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index e5ee956..76ba914 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -518,7 +518,7 @@ acc_unmap_data (void *h) gomp_mutex_unlock (&acc_dev->lock); - gomp_unmap_vars (t, true); + gomp_unmap_vars (t, true, false); if (profiling_setup_p) { @@ -612,6 +612,7 @@ present_create_copy (unsigned f, void *h, size_t s, int async) &kinds, true, GOMP_MAP_VARS_OPENACC); /* Initialize dynamic refcount. */ tgt->list[0].key->dynamic_refcount = 1; + tgt->list[0].key->attach_count = NULL; gomp_mutex_lock (&acc_dev->lock); @@ -750,6 +751,7 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) { n->refcount = 0; n->dynamic_refcount = 0; + n->attach_count = NULL; } if (n->refcount < n->dynamic_refcount) { @@ -997,6 +999,7 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, /* Initialize dynamic refcount. */ tgt->list[0].key->dynamic_refcount = 1; + tgt->list[0].key->attach_count = NULL; gomp_mutex_lock (&acc_dev->lock); tgt->prev = acc_dev->openacc.data_environ; @@ -1084,11 +1087,11 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, /* If running synchronously, unmap immediately. */ if (async < acc_async_noval) - gomp_unmap_vars (t, true); + gomp_unmap_vars (t, true, finalize); else { goacc_aq aq = get_goacc_asyncqueue (async); - goacc_async_copyout_unmap_vars (t, aq); + goacc_async_copyout_unmap_vars (t, aq, finalize); } } @@ -1096,3 +1099,80 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); } + + +void +acc_attach_async (void **hostaddr, int async) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + goacc_aq aq = get_goacc_asyncqueue (async); + + struct splay_tree_key_s cur_node; + splay_tree_key n; + + if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return; + + cur_node.host_start = (uintptr_t) hostaddr; + cur_node.host_end = cur_node.host_start + sizeof (void *); + n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); + + if (n == NULL) + gomp_fatal ("struct not mapped for acc_attach"); + + gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr, + 0, NULL); +} + +void +acc_attach (void **hostaddr) +{ + acc_attach_async (hostaddr, acc_async_sync); +} + +static void +goacc_detach_internal (void **hostaddr, int async, bool finalize) +{ + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + struct splay_tree_key_s cur_node; + splay_tree_key n; + struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async); + + if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return; + + cur_node.host_start = (uintptr_t) hostaddr; + cur_node.host_end = cur_node.host_start + sizeof (void *); + n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); + + if (n == NULL) + gomp_fatal ("struct not mapped for acc_detach"); + + gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL); +} + +void +acc_detach (void **hostaddr) +{ + goacc_detach_internal (hostaddr, acc_async_sync, false); +} + +void +acc_detach_async (void **hostaddr, int async) +{ + goacc_detach_internal (hostaddr, async, false); +} + +void +acc_detach_finalize (void **hostaddr) +{ + goacc_detach_internal (hostaddr, acc_async_sync, true); +} + +void +acc_detach_finalize_async (void **hostaddr, int async) +{ + goacc_detach_internal (hostaddr, async, true); +} diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 15b1462..f6c9114 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -50,12 +50,29 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds) if (pos + 1 >= mapnum) return 0; - unsigned char kind = kinds[pos+1] & 0xff; + unsigned char kind0 = kinds[pos] & 0xff; - if (kind == GOMP_MAP_TO_PSET) - return 3; - else if (kind == GOMP_MAP_POINTER) - return 2; + switch (kind0) + { + case GOMP_MAP_TO: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALLOC: + case GOMP_MAP_RELEASE: + { + unsigned char kind1 = kinds[pos + 1] & 0xff; + if (kind1 == GOMP_MAP_POINTER + || kind1 == GOMP_MAP_ALWAYS_POINTER + || kind1 == GOMP_MAP_ATTACH + || kind1 == GOMP_MAP_DETACH) + return 2; + else if (kind1 == GOMP_MAP_TO_PSET) + return 3; + } + default: + /* empty. */; + } return 0; } @@ -355,14 +372,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *), devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) - { - if (tgt->list[i].key != NULL) - devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start - + tgt->list[i].key->tgt_offset - + tgt->list[i].offset); - else - devaddrs[i] = NULL; - } + devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); if (aq == NULL) { @@ -382,7 +392,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *), &api_info); } /* If running synchronously, unmap immediately. */ - gomp_unmap_vars (tgt, true); + gomp_unmap_vars (tgt, true, false); if (profiling_dispatch_p) { prof_info.event_type = acc_ev_exit_data_end; @@ -400,7 +410,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *), else acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, tgt, aq); - goacc_async_copyout_unmap_vars (tgt, aq); + goacc_async_copyout_unmap_vars (tgt, aq, false); } out: @@ -637,7 +647,7 @@ GOACC_data_end (void) gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); thr->mapped_data = tgt->prev; - gomp_unmap_vars (tgt, true); + gomp_unmap_vars (tgt, true, false); gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); if (profiling_dispatch_p) @@ -668,6 +678,10 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (mapnum > 0) { unsigned char kind = kinds[0] & 0xff; + + if (kind == GOMP_MAP_STRUCT || kind == GOMP_MAP_FORCE_PRESENT) + kind = kinds[1] & 0xff; + if (kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FORCE_FROM) finalize = true; @@ -678,11 +692,14 @@ GOACC_enter_exit_data (int device, size_t mapnum, { unsigned char kind = kinds[i] & 0xff; - if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) + if (kind == GOMP_MAP_POINTER + || kind == GOMP_MAP_TO_PSET + || kind == GOMP_MAP_STRUCT + || kind == GOMP_MAP_FORCE_PRESENT) continue; if (kind == GOMP_MAP_FORCE_ALLOC - || kind == GOMP_MAP_FORCE_PRESENT + || kind == GOMP_MAP_ATTACH || kind == GOMP_MAP_FORCE_TO || kind == GOMP_MAP_TO || kind == GOMP_MAP_ALLOC @@ -694,6 +711,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (kind == GOMP_MAP_RELEASE || kind == GOMP_MAP_DELETE + || kind == GOMP_MAP_DETACH + || kind == GOMP_MAP_FORCE_DETACH || kind == GOMP_MAP_FROM || kind == GOMP_MAP_FORCE_FROM || kind == GOMP_MAP_DECLARE_DEALLOCATE) @@ -809,6 +828,9 @@ GOACC_enter_exit_data (int device, size_t mapnum, case GOMP_MAP_ALLOC: acc_present_or_create (hostaddrs[i], sizes[i]); break; + case GOMP_MAP_ATTACH: + case GOMP_MAP_FORCE_PRESENT: + break; case GOMP_MAP_FORCE_ALLOC: acc_create (hostaddrs[i], sizes[i]); break; @@ -818,6 +840,27 @@ GOACC_enter_exit_data (int device, size_t mapnum, case GOMP_MAP_FORCE_TO: acc_copyin (hostaddrs[i], sizes[i]); break; + case GOMP_MAP_STRUCT: + { + int elems = sizes[i]; + struct splay_tree_key_s k; + splay_tree_key str; + k.host_start = (uintptr_t) hostaddrs[i]; + k.host_end = k.host_start + 1; + gomp_mutex_lock (&acc_dev->lock); + str = splay_tree_lookup (&acc_dev->mem_map, &k); + gomp_mutex_unlock (&acc_dev->lock); + /* We increment the dynamic reference count for the struct + itself by the number of struct elements that we + mapped. */ + if (str->refcount != REFCOUNT_INFINITY) + { + str->refcount += elems; + str->dynamic_refcount += elems; + } + i += elems; + } + break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", kind); @@ -839,16 +882,57 @@ GOACC_enter_exit_data (int device, size_t mapnum, i += pointer - 1; } } + + /* This loop only handles explicit "attach" clauses that are not an + implicit part of a copy{,in,out}, etc. mapping. */ + for (i = 0; i < mapnum; i++) + { + unsigned char kind = kinds[i] & 0xff; + + /* Scan for pointers and PSETs. */ + int pointer = find_pointer (i, mapnum, kinds); + + if (!pointer) + { + if (kind == GOMP_MAP_ATTACH) + acc_attach (hostaddrs[i]); + else if (kind == GOMP_MAP_STRUCT) + i += sizes[i]; + } + else + i += pointer - 1; + } } else - for (i = 0; i < mapnum; ++i) - { - unsigned char kind = kinds[i] & 0xff; + { + /* This loop only handles explicit "detach" clauses that are not an + implicit part of a copy{,in,out}, etc. mapping. */ + for (i = 0; i < mapnum; i++) + { + unsigned char kind = kinds[i] & 0xff; - int pointer = find_pointer (i, mapnum, kinds); + int pointer = find_pointer (i, mapnum, kinds); - if (!pointer) - { + if (!pointer) + { + if (kind == GOMP_MAP_DETACH) + acc_detach (hostaddrs[i]); + else if (kind == GOMP_MAP_FORCE_DETACH) + acc_detach_finalize (hostaddrs[i]); + else if (kind == GOMP_MAP_STRUCT) + i += sizes[i]; + } + else + i += pointer - 1; + } + + for (i = 0; i < mapnum; ++i) + { + unsigned char kind = kinds[i] & 0xff; + + int pointer = find_pointer (i, mapnum, kinds); + + if (!pointer) switch (kind) { case GOMP_MAP_RELEASE: @@ -861,6 +945,10 @@ GOACC_enter_exit_data (int device, size_t mapnum, acc_delete_async (hostaddrs[i], sizes[i], async); } break; + case GOMP_MAP_DETACH: + case GOMP_MAP_FORCE_DETACH: + case GOMP_MAP_FORCE_PRESENT: + break; case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: @@ -869,28 +957,48 @@ GOACC_enter_exit_data (int device, size_t mapnum, else acc_copyout_async (hostaddrs[i], sizes[i], async); break; + case GOMP_MAP_STRUCT: + { + int elems = sizes[i]; + struct splay_tree_key_s k; + splay_tree_key str; + k.host_start = (uintptr_t) hostaddrs[i]; + k.host_end = k.host_start + 1; + gomp_mutex_lock (&acc_dev->lock); + str = splay_tree_lookup (&acc_dev->mem_map, &k); + gomp_mutex_unlock (&acc_dev->lock); + /* Decrement dynamic reference count for the struct by the + number of elements that we are unmapping. */ + if (str->dynamic_refcount >= elems) + { + str->dynamic_refcount -= elems; + str->refcount -= elems; + } + i += elems; + } + break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", kind); break; } - } - else - { - if (kind == GOMP_MAP_DECLARE_DEALLOCATE) - gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], - &sizes[i], &kinds[i]); - else - { - bool copyfrom = (kind == GOMP_MAP_FORCE_FROM - || kind == GOMP_MAP_FROM); - gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, - finalize, pointer); - /* See the above comment. */ - } - i += pointer - 1; - } - } + else + { + if (kind == GOMP_MAP_DECLARE_DEALLOCATE) + gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else + { + bool copyfrom = (kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_FROM); + gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, + async, finalize, pointer); + /* See the above comment. */ + } + i += pointer - 1; + } + } + } out: if (profiling_dispatch_p) diff --git a/libgomp/openacc.h b/libgomp/openacc.h index 261636c..41dd514 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -113,6 +113,10 @@ void *acc_hostptr (void *) __GOACC_NOTHROW; int acc_is_present (void *, size_t) __GOACC_NOTHROW; void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; +void acc_attach (void **) __GOACC_NOTHROW; +void acc_attach_async (void **, int) __GOACC_NOTHROW; +void acc_detach (void **) __GOACC_NOTHROW; +void acc_detach_async (void **, int) __GOACC_NOTHROW; /* Async functions, specified in OpenACC 2.5. */ void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW; @@ -129,6 +133,8 @@ void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW; void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW; void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW; void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; +void acc_detach_finalize (void **) __GOACC_NOTHROW; +void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW; /* CUDA-specific routines. */ void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; diff --git a/libgomp/target.c b/libgomp/target.c index 7220ac6..d9d42eb 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef PLUGIN_SUPPORT #include @@ -373,6 +374,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, tgt_var->key = oldn; tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); + tgt_var->do_detach = false; tgt_var->offset = newn->host_start - oldn->host_start; tgt_var->length = newn->host_end - newn->host_start; @@ -539,7 +541,128 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, (void *) cur_node.host_end); } -static inline uintptr_t +void +gomp_attach_pointer (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, splay_tree mem_map, + splay_tree_key n, uintptr_t attach_to, size_t bias, + struct gomp_coalesce_buf *cbufp) +{ + struct splay_tree_key_s s; + size_t size, idx; + + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("enclosing struct not mapped for attach"); + } + + size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *); + /* We might have a pointer in a packed struct: however we cannot have more + than one such pointer in each pointer-sized portion of the struct, so + this is safe. */ + idx = (attach_to - n->host_start) / sizeof (void *); + + if (!n->attach_count) + n->attach_count = gomp_malloc_cleared (sizeof (*n->attach_count) * size); + + if (n->attach_count[idx] < USHRT_MAX) + n->attach_count[idx]++; + else + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("attach count overflow"); + } + + if (n->attach_count[idx] == 1) + { + uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to + - n->host_start; + uintptr_t target = (uintptr_t) *(void **) attach_to; + splay_tree_key tn; + uintptr_t data; + + if ((void *) target == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("attempt to attach null pointer"); + } + + s.host_start = target + bias; + s.host_end = s.host_start + 1; + tn = splay_tree_lookup (mem_map, &s); + + if (!tn) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("pointer target not mapped for attach"); + } + + data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; + + gomp_debug (1, + "%s: attaching host %p, target %p (struct base %p) to %p\n", + __FUNCTION__, (void *) attach_to, (void *) devptr, + (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); + + gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, + sizeof (void *), cbufp); + } + else + gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, + (void *) attach_to, n->attach_count[idx]); +} + +void +gomp_detach_pointer (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, splay_tree_key n, + uintptr_t detach_from, bool finalize, + struct gomp_coalesce_buf *cbufp) +{ + size_t idx; + + if (n == NULL) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("enclosing struct not mapped for detach"); + } + + idx = (detach_from - n->host_start) / sizeof (void *); + + if (!n->attach_count) + gomp_fatal ("no attachment counters for struct"); + + if (finalize) + n->attach_count[idx] = 1; + + if (n->attach_count[idx] == 0) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("attach count underflow"); + } + else + n->attach_count[idx]--; + + if (n->attach_count[idx] == 0) + { + uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from + - n->host_start; + uintptr_t target = (uintptr_t) *(void **) detach_from; + + gomp_debug (1, + "%s: detaching host %p, target %p (struct base %p) to %p\n", + __FUNCTION__, (void *) detach_from, (void *) devptr, + (void *) (n->tgt->tgt_start + n->tgt_offset), + (void *) target); + + gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, + sizeof (void *), cbufp); + } + else + gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, + (void *) detach_from, n->attach_count[idx]); +} + +uintptr_t gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) { if (tgt->list[i].key != NULL) @@ -883,7 +1006,12 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, da->map_index = i; continue; } - + else if ((kind & typemask) == GOMP_MAP_ATTACH) + { + tgt->list[i].key = NULL; + has_firstprivate = true; + continue; + } cur_node.host_start = (uintptr_t) hostaddrs[i]; if (!GOMP_MAP_POINTER_P (kind & typemask)) cur_node.host_end = cur_node.host_start + sizes[i]; @@ -1141,6 +1269,30 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start; continue; + case GOMP_MAP_ATTACH: + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizeof (void *); + splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); + if (n != NULL) + { + tgt->list[i].key = n; + tgt->list[i].offset = cur_node.host_start - n->host_start; + tgt->list[i].length = n->host_end - n->host_start; + tgt->list[i].copy_from = false; + tgt->list[i].always_copy_from = false; + tgt->list[i].do_detach = true; + } + else + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("outer struct not mapped for attach"); + } + gomp_attach_pointer (devicep, aq, mem_map, n, + (uintptr_t) hostaddrs[i], sizes[i], + cbufp); + continue; + } default: break; } @@ -1194,10 +1346,12 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); tgt->list[i].always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); + tgt->list[i].do_detach = false; tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; k->dynamic_refcount = 0; + k->attach_count = NULL; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1482,6 +1636,8 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) is_tgt_unmapped = true; gomp_unmap_tgt (k->tgt); } + if (k->attach_count) + free (k->attach_count); return is_tgt_unmapped; } @@ -1490,14 +1646,14 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) has been done already. */ attribute_hidden void -gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) +gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, bool finalize) { - gomp_unmap_vars_async (tgt, do_copyfrom, NULL); + gomp_unmap_vars_async (tgt, do_copyfrom, NULL, finalize); } attribute_hidden void gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, - struct goacc_asyncqueue *aq) + struct goacc_asyncqueue *aq, bool finalize) { struct gomp_device_descr *devicep = tgt->device_descr; @@ -1517,10 +1673,23 @@ gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, } size_t i; + + /* We must perform detachments before any copies back to the host. */ for (i = 0; i < tgt->list_count; i++) { splay_tree_key k = tgt->list[i].key; - if (k == NULL) + + if (k != NULL && tgt->list[i].do_detach) + gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start + + tgt->list[i].offset, finalize, + NULL); + } + + for (i = 0; i < tgt->list_count; i++) + { + splay_tree_key k = tgt->list[i].key; + + if (k == NULL || tgt->list[i].do_detach) continue; bool do_unmap = false; @@ -2139,7 +2308,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, GOMP_MAP_VARS_TARGET); devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, NULL); - gomp_unmap_vars (tgt_vars, true); + gomp_unmap_vars (tgt_vars, true, false); } /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, @@ -2283,7 +2452,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, args); if (tgt_vars) - gomp_unmap_vars (tgt_vars, true); + gomp_unmap_vars (tgt_vars, true, false); } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -2352,7 +2521,7 @@ GOMP_target_end_data (void) { struct target_mem_desc *tgt = icv->target_data; icv->target_data = tgt->prev; - gomp_unmap_vars (tgt, true); + gomp_unmap_vars (tgt, true, false); } } @@ -2587,7 +2756,7 @@ gomp_target_task_fn (void *data) if (ttask->state == GOMP_TARGET_TASK_FINISHED) { if (ttask->tgt) - gomp_unmap_vars (ttask->tgt, true); + gomp_unmap_vars (ttask->tgt, true, false); return false; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c new file mode 100644 index 0000000..d8d7067 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c @@ -0,0 +1,24 @@ +#include +#include + +struct dc +{ + int a; + int *b; +}; + +int +main () +{ + int n = 100, i; + struct dc v = { .a = 3, .b = (int *) malloc (sizeof (int) * n) }; + +#pragma acc parallel loop copy(v.a, v.b[:n]) + for (i = 0; i < n; i++) + v.b[i] = v.a; + + for (i = 0; i < 10; i++) + assert (v.b[i] == v.a); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c new file mode 100644 index 0000000..7e26e9a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c @@ -0,0 +1,29 @@ +#include +#include + +int +main(int argc, char* argv[]) +{ + struct foo { + int *a, *b, c, d, *e; + } s; + + s.a = (int *) malloc (16 * sizeof (int)); + s.b = (int *) malloc (16 * sizeof (int)); + s.e = (int *) malloc (16 * sizeof (int)); + + #pragma acc data copy(s) + { + #pragma acc data copy(s.a[0:10]) + { + #pragma acc parallel loop attach(s.a) + for (int i = 0; i < 10; i++) + s.a[i] = i; + } + } + + for (int i = 0; i < 10; i++) + assert (s.a[i] == i); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c new file mode 100644 index 0000000..cec764b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c @@ -0,0 +1,34 @@ +#include +#include +#include + +int +main () +{ + int n = 100, i; + int *a = (int *) malloc (sizeof (int) * n); + int *b; + + for (i = 0; i < n; i++) + a[i] = i+1; + +#pragma acc enter data copyin(a[:n]) create(b) + + b = a; + acc_attach ((void **)&b); + +#pragma acc parallel loop present (b[:n]) + for (i = 0; i < n; i++) + b[i] = i+1; + + acc_detach ((void **)&b); + +#pragma acc exit data copyout(a[:n], b) + + for (i = 0; i < 10; i++) + assert (a[i] == b[i]); + + free (a); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c new file mode 100644 index 0000000..8874ca0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c @@ -0,0 +1,87 @@ +#include +#include + +#define LIST_LENGTH 10 + +struct node +{ + struct node *next; + int val; +}; + +int +sum_nodes (struct node *head) +{ + int i = 0, sum = 0; + +#pragma acc parallel reduction(+:sum) present(head[:1]) + { + for (; head != NULL; head = head->next) + sum += head->val; + } + + return sum; +} + +void +insert (struct node *head, int val) +{ + struct node *n = (struct node *) malloc (sizeof (struct node)); + + if (head->next) + { +#pragma acc exit data detach(head->next) + } + + n->val = val; + n->next = head->next; + head->next = n; + +#pragma acc enter data copyin(n[:1]) +#pragma acc enter data attach(head->next) + if (n->next) + { +#pragma acc enter data attach(n->next) + } +} + +void +destroy (struct node *head) +{ + while (head->next != NULL) + { +#pragma acc exit data detach(head->next) + struct node * n = head->next; + head->next = n->next; + if (n->next) + { +#pragma acc exit data detach(n->next) + } +#pragma acc exit data delete (n[:1]) + if (head->next) + { +#pragma acc enter data attach(head->next) + } + free (n); + } +} + +int +main () +{ + struct node list = { .next = NULL, .val = 0 }; + int i; + +#pragma acc enter data copyin(list) + + for (i = 0; i < LIST_LENGTH; i++) + insert (&list, i + 1); + + assert (sum_nodes (&list) == (LIST_LENGTH * LIST_LENGTH + LIST_LENGTH) / 2); + + destroy (&list); + +#pragma acc exit data delete(list) + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c new file mode 100644 index 0000000..89cafbb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c @@ -0,0 +1,81 @@ +#include +#include +#include + +struct node +{ + struct node *next; + int val; +}; + +int +sum_nodes (struct node *head) +{ + int i = 0, sum = 0; + +#pragma acc parallel reduction(+:sum) present(head[:1]) + { + for (; head != NULL; head = head->next) + sum += head->val; + } + + return sum; +} + +void +insert (struct node *head, int val) +{ + struct node *n = (struct node *) malloc (sizeof (struct node)); + + if (head->next) + acc_detach ((void **) &head->next); + + n->val = val; + n->next = head->next; + head->next = n; + + acc_copyin (n, sizeof (struct node)); + acc_attach((void **) &head->next); + + if (n->next) + acc_attach ((void **) &n->next); +} + +void +destroy (struct node *head) +{ + while (head->next != NULL) + { + acc_detach ((void **) &head->next); + struct node * n = head->next; + head->next = n->next; + if (n->next) + acc_detach ((void **) &n->next); + + acc_delete (n, sizeof (struct node)); + if (head->next) + acc_attach((void **) &head->next); + + free (n); + } +} + +int +main () +{ + struct node list = { .next = NULL, .val = 0 }; + int i; + + acc_copyin (&list, sizeof (struct node)); + + for (i = 0; i < 10; i++) + insert (&list, 2); + + assert (sum_nodes (&list) == 10 * 2); + + destroy (&list); + + acc_delete (&list, sizeof (struct node)); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90 new file mode 100644 index 0000000..c4cea11 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +! Test of attach/detach with "acc data". + +program dtype + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + +!$acc data copy(var) +!$acc data copy(var%a) + +!$acc parallel loop + do i = 1,n + var%a(i) = i + end do +!$acc end parallel loop + +!$acc end data +!$acc end data + + do i = 1,n + if (i .ne. var%a(i)) stop 1 + end do + + deallocate(var%a) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 new file mode 100644 index 0000000..3593661 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 @@ -0,0 +1,33 @@ +! { dg-do run } + +! Test of attach/detach with "acc data", two clauses at once. + +program dtype + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + +!$acc data copy(var) copy(var%a) + +!$acc parallel loop + do i = 1,n + var%a(i) = i + end do +!$acc end parallel loop + +!$acc end data + + do i = 1,n + if (i .ne. var%a(i)) stop 1 + end do + + deallocate(var%a) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 new file mode 100644 index 0000000..667d944 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 @@ -0,0 +1,34 @@ +! { dg-do run } + +! Test of attach/detach with "acc parallel". + +program dtype + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + integer, allocatable :: b(:) + end type mytype + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + allocate(var%b(1:n)) + +!$acc parallel loop copy(var) copy(var%a(1:n)) copy(var%b(1:n)) + do i = 1,n + var%a(i) = i + var%b(i) = i + end do +!$acc end parallel loop + + do i = 1,n + if (i .ne. var%a(i)) stop 1 + if (i .ne. var%b(i)) stop 2 + end do + + deallocate(var%a) + deallocate(var%b) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90 new file mode 100644 index 0000000..6949e12 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90 @@ -0,0 +1,49 @@ +! { dg-do run } + +! Test of attach/detach with "acc enter/exit data". + +program dtype + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + integer, allocatable :: b(:) + end type mytype + integer, allocatable :: r(:) + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + allocate(var%b(1:n)) + allocate(r(1:n)) + +!$acc enter data copyin(var) + +!$acc enter data copyin(var%a, var%b, r) + +!$acc parallel loop + do i = 1,n + var%a(i) = i + var%b(i) = i * 2 + r(i) = i * 3 + end do +!$acc end parallel loop + +!$acc exit data copyout(var%a) +!$acc exit data copyout(var%b) +!$acc exit data copyout(r) + + do i = 1,n + if (i .ne. var%a(i)) stop 1 + if (i * 2 .ne. var%b(i)) stop 2 + if (i * 3 .ne. r(i)) stop 3 + end do + +!$acc exit data delete(var) + + deallocate(var%a) + deallocate(var%b) + deallocate(r) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90 new file mode 100644 index 0000000..6843cf1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90 @@ -0,0 +1,57 @@ +! { dg-do run } + +! Test of attach/detach, "enter data" inside "data", and subarray. + +program dtype + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + integer, allocatable :: b(:) + end type mytype + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + allocate(var%b(1:n)) + +!$acc data copy(var) + + do i = 1, n + var%a(i) = 0 + var%b(i) = 0 + end do + +!$acc enter data copyin(var%a(5:n - 5), var%b(5:n - 5)) + +!$acc parallel loop + do i = 5,n - 5 + var%a(i) = i + var%b(i) = i * 2 + end do +!$acc end parallel loop + +!$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) + +!$acc end data + + do i = 1,4 + if (var%a(i) .ne. 0) stop 1 + if (var%b(i) .ne. 0) stop 2 + end do + + do i = 5,n - 5 + if (i .ne. var%a(i)) stop 3 + if (i * 2 .ne. var%b(i)) stop 4 + end do + + do i = n - 4,n + if (var%a(i) .ne. 0) stop 5 + if (var%b(i) .ne. 0) stop 6 + end do + + deallocate(var%a) + deallocate(var%b) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 new file mode 100644 index 0000000..12910d0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 @@ -0,0 +1,61 @@ +! { dg-do run } + +! Test of attachment counters and finalize. + +program dtype + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + integer, allocatable :: b(:) + end type mytype + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + allocate(var%b(1:n)) + +!$acc data copy(var) + + do i = 1, n + var%a(i) = 0 + var%b(i) = 0 + end do + +!$acc enter data copyin(var%a(5:n - 5), var%b(5:n - 5)) + + do i = 1,20 + !$acc enter data attach(var%a) + end do + +!$acc parallel loop + do i = 5,n - 5 + var%a(i) = i + var%b(i) = i * 2 + end do +!$acc end parallel loop + +!$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) finalize + +!$acc end data + + do i = 1,4 + if (var%a(i) .ne. 0) stop 1 + if (var%b(i) .ne. 0) stop 2 + end do + + do i = 5,n - 5 + if (i .ne. var%a(i)) stop 3 + if (i * 2 .ne. var%b(i)) stop 4 + end do + + do i = n - 4,n + if (var%a(i) .ne. 0) stop 5 + if (var%b(i) .ne. 0) stop 6 + end do + + deallocate(var%a) + deallocate(var%b) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90 new file mode 100644 index 0000000..ab44f0a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90 @@ -0,0 +1,89 @@ +! { dg-do run } + +! Test of attach/detach with scalar elements and nested derived types. + +program dtype + implicit none + integer, parameter :: n = 512 + type subtype + integer :: g, h + integer, allocatable :: q(:) + end type subtype + type mytype + integer, allocatable :: a(:) + integer, allocatable :: c, d + integer, allocatable :: b(:) + integer :: f + type(subtype) :: s + end type mytype + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + allocate(var%b(1:n)) + allocate(var%c) + allocate(var%d) + allocate(var%s%q(1:n)) + + var%c = 16 + var%d = 20 + var%f = 7 + var%s%g = 21 + var%s%h = 38 + +!$acc enter data copyin(var) + + do i = 1, n + var%a(i) = 0 + var%b(i) = 0 + var%s%q(i) = 0 + end do + +!$acc data copy(var%a(5:n - 5), var%b(5:n - 5), var%c, var%d) & +!$acc & copy(var%s%q) + +!$acc parallel loop default(none) present(var) + do i = 5,n - 5 + var%a(i) = i + var%b(i) = i * 2 + var%s%q(i) = i * 3 + var%s%g = 100 + var%s%h = 101 + end do +!$acc end parallel loop + +!$acc end data + +!$acc exit data copyout(var) + + do i = 1,4 + if (var%a(i) .ne. 0) stop 1 + if (var%b(i) .ne. 0) stop 2 + if (var%s%q(i) .ne. 0) stop 3 + end do + + do i = 5,n - 5 + if (i .ne. var%a(i)) stop 4 + if (i * 2 .ne. var%b(i)) stop 5 + if (i * 3 .ne. var%s%q(i)) stop 6 + end do + + do i = n - 4,n + if (var%a(i) .ne. 0) stop 7 + if (var%b(i) .ne. 0) stop 8 + if (var%s%q(i) .ne. 0) stop 9 + end do + + if (var%c .ne. 16) stop 10 + if (var%d .ne. 20) stop 11 + if (var%s%g .ne. 100 .or. var%s%h .ne. 101) stop 12 + if (var%f .ne. 7) stop 13 + + deallocate(var%a) + deallocate(var%b) + deallocate(var%c) + deallocate(var%d) + deallocate(var%s%q) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90 new file mode 100644 index 0000000..d142763 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90 @@ -0,0 +1,41 @@ +! { dg-do run } + +! Test of explicit attach/detach clauses and attachment counters. There are no +! acc_attach/acc_detach API routines in Fortran. + +program dtype + use openacc + implicit none + integer, parameter :: n = 512 + type mytype + integer, allocatable :: a(:) + end type mytype + integer i + + type(mytype) :: var + + allocate(var%a(1:n)) + + call acc_copyin(var) + call acc_copyin(var%a) + + !$acc enter data attach(var%a) + +!$acc parallel loop attach(var%a) + do i = 1,n + var%a(i) = i + end do +!$acc end parallel loop + + !$acc exit data detach(var%a) + + call acc_copyout(var%a) + call acc_copyout(var) + + do i = 1,n + if (i .ne. var%a(i)) stop 1 + end do + + deallocate(var%a) + +end program dtype diff --git a/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90 index 1ec4784..eb7812d 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90 @@ -20,9 +20,9 @@ !$acc end data do i = 1, n - if (d(i)%a /= i) call abort - if (d(i)%b /= i-1) call abort - if (d(i)%c /= i+1) call abort + if (d(i)%a /= i) stop 1 + if (d(i)%b /= i-1) stop 2 + if (d(i)%c /= i+1) stop 3 end do end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90 index a37d526..c3c8a07 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90 @@ -37,7 +37,7 @@ program derived_acc !$acc update host(var%a) - if (var%a /= var%b) call abort + if (var%a /= var%b) stop 1 var%b = 100 @@ -51,7 +51,7 @@ program derived_acc !$acc update host(var%a) - if (var%a /= var%b) call abort + if (var%a /= var%b) stop 2 !$acc parallel loop present (var) do i = 1, n @@ -64,7 +64,7 @@ program derived_acc var%a = -1 do i = 1, n - if (var%c(i) /= i) call abort + if (var%c(i) /= i) stop 3 var%c(i) = var%a end do @@ -78,7 +78,7 @@ program derived_acc if (var%c(i) /= var%a) res = res + 1 end do - if (res /= 0) call abort + if (res /= 0) stop 4 var%c(:) = 0 @@ -93,8 +93,8 @@ program derived_acc !$acc update host(var%c(5)) do i = 1, n - if (i /= 5 .and. var%c(i) /= 0) call abort - if (i == 5 .and. var%c(i) /= 1) call abort + if (i /= 5 .and. var%c(i) /= 0) stop 5 + if (i == 5 .and. var%c(i) /= 1) stop 6 end do !$acc parallel loop present(var) @@ -106,7 +106,7 @@ program derived_acc !$acc update host(var%in%d) do i = 1, n - if (var%in%d(i) /= var%a) call abort + if (var%in%d(i) /= var%a) stop 7 end do var%c(:) = 0 @@ -124,8 +124,8 @@ program derived_acc !$acc update host(var%c(n/2:n)) do i = 1,n - if (i < n/2 .and. var%c(i) /= -1) call abort - if (i >= n/2 .and. var%c(i) /= i) call abort + if (i < n/2 .and. var%c(i) /= -1) stop 8 + if (i >= n/2 .and. var%c(i) /= i) stop 9 end do var%in%d(:) = 0 @@ -140,8 +140,8 @@ program derived_acc !$acc update host(var%in%d(5)) do i = 1, n - if (i /= 5 .and. var%in%d(i) /= 0) call abort - if (i == 5 .and. var%in%d(i) /= 1) call abort + if (i /= 5 .and. var%in%d(i) /= 0) stop 10 + if (i == 5 .and. var%in%d(i) /= 1) stop 11 end do !$acc exit data delete(var) @@ -173,7 +173,7 @@ subroutine derived_acc_subroutine(var) !$acc update host(var%a) - if (var%a /= var%b) call abort + if (var%a /= var%b) stop 12 var%b = 100 @@ -187,7 +187,7 @@ subroutine derived_acc_subroutine(var) !$acc update host(var%a) - if (var%a /= var%b) call abort + if (var%a /= var%b) stop 13 !$acc parallel loop present (var) do i = 1, n @@ -200,7 +200,7 @@ subroutine derived_acc_subroutine(var) var%a = -1 do i = 1, n - if (var%c(i) /= i) call abort + if (var%c(i) /= i) stop 14 var%c(i) = var%a end do @@ -214,7 +214,7 @@ subroutine derived_acc_subroutine(var) if (var%c(i) /= var%a) res = res + 1 end do - if (res /= 0) call abort + if (res /= 0) stop 15 var%c(:) = 0 @@ -229,8 +229,8 @@ subroutine derived_acc_subroutine(var) !$acc update host(var%c(5)) do i = 1, n - if (i /= 5 .and. var%c(i) /= 0) call abort - if (i == 5 .and. var%c(i) /= 1) call abort + if (i /= 5 .and. var%c(i) /= 0) stop 16 + if (i == 5 .and. var%c(i) /= 1) stop 17 end do !$acc parallel loop present(var) @@ -242,7 +242,7 @@ subroutine derived_acc_subroutine(var) !$acc update host(var%in%d) do i = 1, n - if (var%in%d(i) /= var%a) call abort + if (var%in%d(i) /= var%a) stop 18 end do var%c(:) = 0 @@ -260,8 +260,8 @@ subroutine derived_acc_subroutine(var) !$acc update host(var%c(n/2:n)) do i = 1,n - if (i < n/2 .and. var%c(i) /= -1) call abort - if (i >= n/2 .and. var%c(i) /= i) call abort + if (i < n/2 .and. var%c(i) /= -1) stop 19 + if (i >= n/2 .and. var%c(i) /= i) stop 20 end do var%in%d(:) = 0 @@ -276,8 +276,8 @@ subroutine derived_acc_subroutine(var) !$acc update host(var%in%d(5)) do i = 1, n - if (i /= 5 .and. var%in%d(i) /= 0) call abort - if (i == 5 .and. var%in%d(i) /= 1) call abort + if (i /= 5 .and. var%in%d(i) /= 0) stop 21 + if (i == 5 .and. var%in%d(i) /= 1) stop 22 end do !$acc exit data delete(var) From patchwork Tue Nov 20 21:54:50 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Julian Brown X-Patchwork-Id: 1000755 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-490568-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="FVfVY4RH"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 42zzzb4VCKz9s3q for ; Wed, 21 Nov 2018 08:55:55 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; q=dns; s=default; b=S8nU/iqbyIMN/dik RZuDME7lB/RpoikVwIPanej2z4fSt+I2ursyFGnVtlTTBYEma3jOcoT78zZWae+8 siVPMtUKE7TbDB45qGcmKOyoSx8kOEFda+W2nqS+iEwOSqVnlijTHxDbTm7zbAQZ GTmDw4mO65SwCx+Mmx7BXpXAQZg= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; s=default; bh=LSI2kWiY57qh+JeqN5QUis Db/6c=; b=FVfVY4RHU81+8pBlWdvc+Pi9HB2YUQywto2ErnprXZ4KIBg4/GLjyK vxAb9ob+gOYoSzIbwqPanBYe645Fh9DNU3C4wRTyE5hrxvIRkyufHFg5NXCCXEW0 42Y2Lx8VZdA4+T8WjPzevOlgo5KR24oI2rupPts1JiGc6R7e5C/I4= Received: (qmail 42436 invoked by alias); 20 Nov 2018 21:55:23 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 42254 invoked by uid 89); 20 Nov 2018 21:55:21 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.3 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=va, neatly X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 20 Nov 2018 21:55:19 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-MBX-04.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1gPDzJ-0002wh-Vf from Julian_Brown@mentor.com ; Tue, 20 Nov 2018 13:55:18 -0800 Received: from localhost.localdomain (147.34.91.1) by SVR-IES-MBX-04.mgc.mentorg.com (139.181.222.4) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Tue, 20 Nov 2018 21:55:13 +0000 From: Julian Brown To: CC: , , Subject: [PATCH 4/6] [og8] Interaction of dynamic/multidimensional arrays with attach/detach. Date: Tue, 20 Nov 2018 13:54:50 -0800 Message-ID: <1335fb57035f2e356ecba4f877a82da47d2152fe.1542748807.git.julian@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-IsSubscribed: yes OpenACC multidimensional (or "dynamic") arrays do not seem to fit very neatly into the attach/detach mechanism described for OpenACC 2.6, that is if the user tries to use a multidimensional array as a field in a struct. This patch disallows that combination, for now at least. Multidimensional array support in general has been submitted upstream here but not yet accepted: https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00937.html gcc/ * omp-low.c (scan_sharing_clauses): Disallow dynamic (multidimensional) arrays within structs. gcc/testsuite/ * c-c++-common/goacc/deep-copy-multidim.c: Add test. libgomp/ * target.c (gomp_map_vars_async, gomp_load_image_to_device): Zero-initialise do_detach, dynamic_refcount and attach_count in more places. --- gcc/omp-low.c | 10 +++++- .../c-c++-common/goacc/deep-copy-multidim.c | 32 ++++++++++++++++++++ libgomp/target.c | 6 ++++ 3 files changed, 47 insertions(+), 1 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c diff --git a/gcc/omp-low.c b/gcc/omp-low.c index e559211..1726451 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1481,7 +1481,15 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, t = TREE_TYPE (t); } - install_var_field (da_decl, by_ref, 3, ctx); + if (DECL_P (decl)) + install_var_field (da_decl, by_ref, 3, ctx); + else + { + error_at (OMP_CLAUSE_LOCATION (c), + "dynamic arrays cannot be used within structs"); + break; + } + tree new_var = install_var_local (da_decl, ctx); bool existed = ctx->dynamic_arrays->put (new_var, da_dimensions); diff --git a/gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c b/gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c new file mode 100644 index 0000000..1696f0c --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ + +#include +#include + +struct dc +{ + int a; + int **b; +}; + +int +main () +{ + int n = 100, i, j; + struct dc v = { .a = 3 }; + + v.b = (int **) malloc (sizeof (int *) * n); + for (i = 0; i < n; i++) + v.b[i] = (int *) malloc (sizeof (int) * n); + +#pragma acc parallel loop copy(v.a, v.b[:n][:n]) /* { dg-error "dynamic arrays cannot be used within structs" } */ + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + v.b[i][j] = v.a + i + j; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + assert (v.b[i][j] == v.a + i + j); + + return 0; +} diff --git a/libgomp/target.c b/libgomp/target.c index d9d42eb..da51291 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1484,6 +1484,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, set to false here. */ tgt->list[i].copy_from = false; tgt->list[i].always_copy_from = false; + tgt->list[i].do_detach = false; size_t align = (size_t) 1 << (kind >> rshift); tgt_size = (tgt_size + align - 1) & ~(align - 1); @@ -1521,6 +1522,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, k->tgt = tgt; k->refcount = 1; + k->dynamic_refcount = 0; + k->attach_count = NULL; k->link_key = NULL; tgt_size = (tgt_size + align - 1) & ~(align - 1); target_row_addr = tgt->tgt_start + tgt_size; @@ -1532,6 +1535,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, = GOMP_MAP_COPY_FROM_P (kind & typemask); row_desc->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); + row_desc->do_detach = false; row_desc->offset = 0; row_desc->length = da->data_row_size; @@ -1839,6 +1843,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_table[i].start; k->refcount = REFCOUNT_INFINITY; + k->attach_count = NULL; k->link_key = NULL; tgt->list[i].key = k; tgt->refcount++; @@ -1873,6 +1878,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_var->start; k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; + k->attach_count = NULL; k->link_key = NULL; tgt->list[i].key = k; tgt->refcount++; From patchwork Tue Nov 20 21:56:41 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Julian Brown X-Patchwork-Id: 1000757 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-490570-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="ttMwRusY"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 43000y156Hz9s9h for ; Wed, 21 Nov 2018 08:57:05 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; q=dns; s=default; b=GwwT2R3/gtRDHbbT 9xN+sU8jCRQxm+hE5xcHis+vnicR9zw4EaVLiF22lARA1UE73SIZuzojJ0CxLWu9 W6869z25xrFDFAKp+saowtGmsI92QKfgeVWzQePOi4DyO9PeuW4Rf4qDsSeZT0fp o38fmjBmi9KsTUbq0gKNrq/xNQY= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; s=default; bh=zowzxac4CcybVQyXVL/GDZ MrYkU=; b=ttMwRusYNzDbxZfX6LXJ+NMQy3m/HSQcJrf+SgvMWy1EuNNprlGkGO R1NmKKs6lRtdLH5ULIRJ2ua5tPPaZepqfqG6/NCXFxZOS5OMDo9cvN7PrhODnAiP 7vSCZP2doO59/8XrDf+IpHIO3aWsUbpa4eF6E0KqEbT+zKh1zmlQE= Received: (qmail 48984 invoked by alias); 20 Nov 2018 21:56:58 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 48973 invoked by uid 89); 20 Nov 2018 21:56:58 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.4 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=clique X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 20 Nov 2018 21:56:56 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-MBX-04.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1gPE0s-000323-9m from Julian_Brown@mentor.com ; Tue, 20 Nov 2018 13:56:54 -0800 Received: from localhost.localdomain (147.34.91.1) by SVR-IES-MBX-04.mgc.mentorg.com (139.181.222.4) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Tue, 20 Nov 2018 21:56:49 +0000 From: Julian Brown To: CC: , , Subject: [PATCH 5/6] [og8] Backport parts of upstream declare-allocate patch Date: Tue, 20 Nov 2018 13:56:41 -0800 Message-ID: <8899e93bb7b0b0c6160bcb67125eed0c4be26ff5.1542748807.git.julian@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-IsSubscribed: yes This patch adjusts mappings used for some special cases in Fortran (e.g. allocatable scalars) on og8 to match code that is already upstream, or that has been submitted but not yet reviewed. Parts taken from https://gcc.gnu.org/ml/gcc-patches/2018-09/msg01205.html and parts reverted from https://gcc.gnu.org/ml/gcc-patches/2017-01/msg02188.html. gcc/fortran/ * trans-openmp.c (gfc_omp_finish_clause): Don't use GOMP_MAP_FIRSTPRIVATE_POINTER. (gfc_trans_omp_clauses_1): Adjust handling of allocatable scalars. gcc/ * gimplify.c (demote_firstprivate_pointer): Remove. (gimplify_scan_omp_clauses): Remove special handling for OpenACC. Don't call demote_firstprivate_pointer. (gimplify_adjust_omp_clauses): Adjust promotion of reduction clauses. * omp-low.c (lower_omp_target): Remove special handling for Fortran. gcc/testsuite/ * gfortran.dg/goacc/kernels-alias-3.f95: Revert comment changes and XFAIL. libgomp/ * testsuite/libgomp.oacc-fortran/non-scalar-data.f90: Remove XFAIL for -O2 and -O3 and explanatory comment. --- gcc/fortran/trans-openmp.c | 22 ++++----- gcc/gimplify.c | 49 ++----------------- gcc/omp-low.c | 3 +- .../gfortran.dg/goacc/kernels-alias-3.f95 | 4 +- .../libgomp.oacc-fortran/non-scalar-data.f90 | 6 +-- 5 files changed, 20 insertions(+), 64 deletions(-) diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 98f40d1..71a3ebb 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -1084,7 +1084,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p) return; tree orig_decl = decl; c4 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_FIRSTPRIVATE_POINTER); + OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER); OMP_CLAUSE_DECL (c4) = decl; OMP_CLAUSE_SIZE (c4) = size_int (0); decl = build_fold_indirect_ref (decl); @@ -1100,10 +1100,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p) OMP_CLAUSE_SIZE (c3) = size_int (0); decl = build_fold_indirect_ref (decl); OMP_CLAUSE_DECL (c) = decl; - OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER); } - if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) - OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER); } if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) { @@ -2168,11 +2165,15 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, (TREE_TYPE (TREE_TYPE (field))))) { tree orig_decl = decl; - enum gomp_map_kind gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; - if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) - && (n->sym->attr.oacc_declare_create) - && clauses->update_allocatable) - gmk = ptr_map_kind; + enum gomp_map_kind gmk = GOMP_MAP_POINTER; + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (field) + && n->sym->attr.oacc_declare_create) + { + if (clauses->update_allocatable) + gmk = GOMP_MAP_ALWAYS_POINTER; + else + gmk = GOMP_MAP_FIRSTPRIVATE_POINTER; + } node4 = build_omp_clause (input_location, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node4, gmk); @@ -2189,10 +2190,7 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_DECL (node3) = decl; OMP_CLAUSE_SIZE (node3) = size_int (0); decl = build_fold_indirect_ref (decl); - OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); } - if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) - OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER); } if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)) && n->u.map_op != OMP_MAP_ATTACH diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 40bf586..7f55cfd 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -7634,37 +7634,6 @@ find_decl_expr (tree *tp, int *walk_subtrees, void *data) return NULL_TREE; } -static void -demote_firstprivate_pointer (tree decl, gimplify_omp_ctx *ctx) -{ - if (!lang_GNU_Fortran ()) - return; - - while (ctx) - { - if (ctx->region_type == ORT_ACC_PARALLEL - || ctx->region_type == ORT_ACC_KERNELS) - break; - ctx = ctx->outer_context; - } - - if (ctx == NULL) - return; - - tree clauses = ctx->clauses; - - for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) - { - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER - && OMP_CLAUSE_DECL (c) == decl) - { - OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_POINTER); - return; - } - } -} - /* Insert a GOMP_MAP_ALLOC or GOMP_MAP_RELEASE node following a GOMP_MAP_STRUCT mapping. C is an always_pointer mapping. STRUCT_NODE is the struct node to insert the new mapping after (when the struct node is @@ -7843,7 +7812,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, outer_ctx = ctx->outer_context; if (code == OMP_TARGET) { - if (!lang_GNU_Fortran () || (region_type & ORT_ACC)) + if (!lang_GNU_Fortran ()) ctx->target_map_pointers_as_0len_arrays = true; ctx->target_map_scalars_firstprivate = true; } @@ -7971,7 +7940,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, if (!(region_type & ORT_ACC)) check_non_private = "reduction"; decl = OMP_CLAUSE_DECL (c); - demote_firstprivate_pointer (decl, ctx->outer_context); if (TREE_CODE (decl) == MEM_REF) { tree type = TREE_TYPE (decl); @@ -9491,16 +9459,11 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, && kind != GOMP_MAP_FORCE_PRESENT && kind != GOMP_MAP_POINTER) { - if (lang_hooks.decls.omp_privatize_by_reference (decl)) - OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_POINTER); - else - { - warning_at (OMP_CLAUSE_LOCATION (c), 0, - "incompatible data clause with reduction " - "on %qE; promoting to present_or_copy", - DECL_NAME (t)); - OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM); - } + warning_at (OMP_CLAUSE_LOCATION (c), 0, + "incompatible data clause with reduction " + "on %qE; promoting to present_or_copy", + DECL_NAME (t)); + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM); } } } diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 1726451..a5fc2b1 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -9112,8 +9112,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else is_ref = omp_is_reference (var); - if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE - || (lang_GNU_Fortran () && TREE_CODE (var) == PARM_DECL)) + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE) is_ref = false; bool ref_to_array = false; if (is_ref) diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95 index 09f0264..36b06d3 100644 --- a/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95 @@ -17,6 +17,4 @@ end program main ! Only the omp_data_i related loads should be annotated with cliques. ! { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } } -! The following FAILs since/needs to be updated for the "Partially enable -! GOMP_MAP_FIRSTPRIVATE_POINTER in gfortran" changes. -! { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" { xfail *-*-* } } } +! { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90 b/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90 index 7562571..99bd692 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90 @@ -3,11 +3,9 @@ ! present. ! { dg-do run } -! TODO, for "-Os" see , and for the others, this -! regressed with the "Partially enable GOMP_MAP_FIRSTPRIVATE_POINTER in -! gfortran" changes. +! TODO, . ! warning: OpenACC kernels construct will be executed sequentially; will by default avoid offloading to prevent data copy penalty -! { dg-xfail-if "TODO" { openacc_nvidia_accel_selected } { "-Os" "-O2" "-O3" } { "" } } +! { dg-xfail-if "TODO" { openacc_nvidia_accel_selected } { "-Os" } { "" } } program main implicit none From patchwork Tue Nov 20 21:56:42 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Julian Brown X-Patchwork-Id: 1000758 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-490571-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=none (p=none dis=none) header.from=codesourcery.com Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="el+uEItm"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 43001C3f9Kz9s9m for ; Wed, 21 Nov 2018 08:57:19 +1100 (AEDT) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; q=dns; s=default; b=dariAEQt7+BU/VEx n9+mat9W98+XUp5szD9rqeQpYuwqrNuf/oJXGc1YoWFH1zR87oaSB7Vf/v8l7dm7 FU6Qk5jhFQRwQRVO+/INu6h1paiFWlyS++pYIW5qqyC8EhCm2TtiSyRpKNImRxJ5 JaAsRXtrqGkW/V2Zd6iA0KaNiWA= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-type; s=default; bh=UyMIq1ZWV+PsYgePtJbnym /6+Y8=; b=el+uEItmO2R4ye0BgrEw76l1GAuQ5YzxM1lsnpb9FQHwNG+5GwqN8P jI0w3oMVj3eCBqPKSG5IqJMVLH6NXl/5/bQs/K3k52P73UzyaHXtBD94VNRDzyaw lsfCg9bU7f08/CxW2FDqRSsa6y2cUbX/f+6WXYWyvhsFbDFFWLX10= Received: (qmail 50058 invoked by alias); 20 Nov 2018 21:57:07 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 49927 invoked by uid 89); 20 Nov 2018 21:57:06 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.5 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=vc X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 20 Nov 2018 21:57:00 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-MBX-04.mgc.mentorg.com) by relay1.mentorg.com with esmtps (TLSv1.2:ECDHE-RSA-AES256-SHA384:256) id 1gPE0w-00032X-4W from Julian_Brown@mentor.com ; Tue, 20 Nov 2018 13:56:58 -0800 Received: from localhost.localdomain (147.34.91.1) by SVR-IES-MBX-04.mgc.mentorg.com (139.181.222.4) with Microsoft SMTP Server (TLS) id 15.0.1320.4; Tue, 20 Nov 2018 21:56:53 +0000 From: Julian Brown To: CC: , , Subject: [PATCH 6/6] [og8] OpenACC refcounting refresh Date: Tue, 20 Nov 2018 13:56:42 -0800 Message-ID: <32a5b94840558034f99b260bf5f02e9c5a083020.1542748807.git.julian@codesourcery.com> In-Reply-To: References: MIME-Version: 1.0 X-IsSubscribed: yes This patch represents a mild overhaul of reference counting for OpenACC in libgomp. It's been partly automatically checked (using code not yet quite finished nor submitted upstream), but it's already more precise than the pre-patch implementation (as demonstrated by adjustments to previously-erroneous tests, included). I have a few more changes planned, but those are still tbd. libgomp/ * libgomp.h (gomp_device_descr): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA. (gomp_acc_remove_pointer): Update prototype. (gomp_acc_data_env_remove_tgt): Add prototype. (gomp_unmap_vars, gomp_map_vars_async): Update prototype. * oacc-int.h (goacc_async_copyout_unmap_vars): Update prototype. * oacc-async.c (goacc_async_copyout_unmap_vars): Remove finalize parameter. * oacc-init.c (acc_shutdown_1): Remove finalize argument to gomp_unmap_vars call. * oacc-mem.c (lookup_dev_1): New helper function. (lookup_dev): Rewrite in terms of above. (acc_free): Update calls to lookup_dev. (acc_map_data): Likewise. Don't add data mapped this way to OpenACC data environment list. (gomp_acc_data_env_remove, gomp_acc_data_env_remove_tgt): New functions. (acc_unmap_data): Rewrite using splay tree functions directly. Don't call gomp_unmap_vars. Fix refcount handling. (present_create_copy): Use GOMP_MAP_VARS_OPENACC_ENTER_DATA in gomp_map_vars_async call. Adjust refcount handling. (delete_copyout): Remove dubious handling of target_mem_desc refcount. (gomp_acc_insert_pointer): Use GOMP_MAP_VARS_OPENACC_ENTER_DATA in gomp_map_vars_async call. Update refcount handling. (gomp_acc_remove_pointer): Reimplement. Fix detach and refcount handling. * oacc-parallel.c (find_pointer): Handle more mapping types. Update calls to gomp_unmap_vars and goacc_async_copyout_unmap_vars. (GOACC_enter_exit_data): Update refcount handling. libgomp/ * target.c (gomp_detach_pointer): Unlock device on error path. (gomp_map_vars_async): Support GOMP_MAP_VARS_OPENACC_ENTER_DATA and mapping size fix GOMP_MAP_ATTACH. (gomp_unmap_tgt): Call gomp_acc_data_env_remove_tgt. (gomp_unmap_vars): Remove finalize parameter. (gomp_unmap_vars_async): Likewise. Adjust detach handling. (GOMP_target, GOMP_target_ext, GOMP_target_end_data) (gomp_target_task_fn): Update calls to gomp_unmap_vars. * testsuite/libgomp.oacc-c-c++-common/context-2.c: Use correct API to unmap data. * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c: New test. * testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c: New test. * testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c: New test. * testsuite/libgomp.oacc-fortran/data-2.f90: Fix for unmap semantics. --- libgomp/libgomp.h | 10 +- libgomp/oacc-async.c | 4 +- libgomp/oacc-init.c | 2 +- libgomp/oacc-int.h | 2 +- libgomp/oacc-mem.c | 387 ++++++++++---------- libgomp/oacc-parallel.c | 76 +++-- libgomp/target.c | 35 ++- .../libgomp.oacc-c-c++-common/context-2.c | 6 +- .../libgomp.oacc-c-c++-common/context-4.c | 6 +- .../libgomp.oacc-c-c++-common/deep-copy-6.c | 59 +++ .../libgomp.oacc-c-c++-common/deep-copy-7.c | 42 +++ .../libgomp.oacc-c-c++-common/deep-copy-8.c | 53 +++ libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 | 20 +- 13 files changed, 445 insertions(+), 257 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 17fe0d3..568e260 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1002,6 +1002,7 @@ struct gomp_device_descr enum gomp_map_vars_kind { GOMP_MAP_VARS_OPENACC, + GOMP_MAP_VARS_OPENACC_ENTER_DATA, GOMP_MAP_VARS_TARGET, GOMP_MAP_VARS_DATA, GOMP_MAP_VARS_ENTER_DATA @@ -1010,7 +1011,8 @@ enum gomp_map_vars_kind struct gomp_coalesce_buf; extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int); -extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int); +extern void gomp_acc_remove_pointer (void **, size_t *, unsigned short *, + int, void *, bool, int); extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, unsigned short *); struct gomp_coalesce_buf; @@ -1039,10 +1041,12 @@ extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *, size_t, void **, void **, size_t *, void *, bool, enum gomp_map_vars_kind); +extern void gomp_acc_data_env_remove_tgt (struct target_mem_desc **, + struct target_mem_desc *); extern void gomp_unmap_tgt (struct target_mem_desc *); -extern void gomp_unmap_vars (struct target_mem_desc *, bool, bool); +extern void gomp_unmap_vars (struct target_mem_desc *, bool); extern void gomp_unmap_vars_async (struct target_mem_desc *, bool, - struct goacc_asyncqueue *, bool); + struct goacc_asyncqueue *); extern void gomp_init_device (struct gomp_device_descr *); extern bool gomp_fini_device (struct gomp_device_descr *); extern void gomp_unload_device (struct gomp_device_descr *); diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c index 6c12c82..bb00279 100644 --- a/libgomp/oacc-async.c +++ b/libgomp/oacc-async.c @@ -373,14 +373,14 @@ goacc_async_unmap_tgt (void *ptr) attribute_hidden void goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt, - struct goacc_asyncqueue *aq, bool finalize) + struct goacc_asyncqueue *aq) { struct gomp_device_descr *devicep = tgt->device_descr; /* Increment reference to delay freeing of device memory until callback has triggered. */ tgt->refcount++; - gomp_unmap_vars_async (tgt, true, aq, finalize); + gomp_unmap_vars_async (tgt, true, aq); devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt, (void *) tgt); } diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index e1938c5..48c9646 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -391,7 +391,7 @@ acc_shutdown_1 (acc_device_t d) { struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt; - gomp_unmap_vars (tgt, false, false); + gomp_unmap_vars (tgt, false); } walk->dev = NULL; diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h index 878f0f4..1f6c62c 100644 --- a/libgomp/oacc-int.h +++ b/libgomp/oacc-int.h @@ -112,7 +112,7 @@ void goacc_host_init (void); void goacc_init_asyncqueues (struct gomp_device_descr *); bool goacc_fini_asyncqueues (struct gomp_device_descr *); void goacc_async_copyout_unmap_vars (struct target_mem_desc *, - struct goacc_asyncqueue *, bool); + struct goacc_asyncqueue *); void goacc_async_free (struct gomp_device_descr *, struct goacc_asyncqueue *, void *); struct goacc_asyncqueue *get_goacc_asyncqueue (int); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 76ba914..3202f06 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -52,6 +52,25 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s) return key; } +/* Helper for lookup_dev. Iterate over splay tree. */ + +static splay_tree_key +lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s) +{ + splay_tree_key k = &node->key; + struct target_mem_desc *t = k->tgt; + + if (d >= t->tgt_start && d + s <= t->tgt_end) + return k; + + if (node->left) + return lookup_dev_1 (node->left, d, s); + if (node->right) + return lookup_dev_1 (node->right, d, s); + + return NULL; +} + /* Return block containing [D->S), or NULL if not contained. The list isn't ordered by device address, so we have to iterate over the whole array. This is not expected to be a common @@ -59,35 +78,12 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s) remains locked on exit. */ static splay_tree_key -lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) +lookup_dev (splay_tree mem_map, void *d, size_t s) { - int i; - struct target_mem_desc *t; - - if (!tgt) - return NULL; - - for (t = tgt; t != NULL; t = t->prev) - { - if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) - break; - } - - if (!t) + if (!mem_map || !mem_map->root) return NULL; - for (i = 0; i < t->list_count; i++) - { - void * offset; - - splay_tree_key k = &t->array[i].key; - offset = d - t->tgt_start + k->tgt_offset; - - if (k->host_start + offset <= (void *) k->host_end) - return k; - } - - return NULL; + return lookup_dev_1 (mem_map->root, (uintptr_t) d, s); } /* OpenACC is silent on how memory exhaustion is indicated. We return @@ -165,7 +161,7 @@ acc_free (void *d) /* We don't have to call lazy open here, as the ptr value must have been returned by acc_malloc. It's not permitted to pass NULL in (unless you got that null from acc_malloc). */ - if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1))) + if ((k = lookup_dev (&acc_dev->mem_map, d, 1))) { void *offset; @@ -325,7 +321,7 @@ acc_hostptr (void *d) gomp_mutex_lock (&acc_dev->lock); - n = lookup_dev (acc_dev->openacc.data_environ, d, 1); + n = lookup_dev (&acc_dev->mem_map, d, 1); if (!n) { @@ -422,7 +418,7 @@ acc_map_data (void *h, void *d, size_t s) (int)s); } - if (lookup_dev (thr->dev->openacc.data_environ, d, s)) + if (lookup_dev (&thr->dev->mem_map, d, s)) { gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, @@ -436,11 +432,6 @@ acc_map_data (void *h, void *d, size_t s) tgt->list[0].key->refcount = REFCOUNT_INFINITY; } - gomp_mutex_lock (&acc_dev->lock); - tgt->prev = acc_dev->openacc.data_environ; - acc_dev->openacc.data_environ = tgt; - gomp_mutex_unlock (&acc_dev->lock); - if (profiling_setup_p) { thr->prof_info = NULL; @@ -448,11 +439,83 @@ acc_map_data (void *h, void *d, size_t s) } } +/* Remove the target_mem_desc holding the mapping for MAPNUM HOSTADDRS from + the OpenACC data environment pointed to by DATA_ENV. The device lock + should be held before calling, and remains locked on exit. */ + +static void +gomp_acc_data_env_remove (struct gomp_device_descr *acc_dev, + struct target_mem_desc **data_env, void **hostaddrs, + int mapnum) +{ + struct target_mem_desc *t, *tp; + + for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev) + { + bool all_match = true; + + /* We must locate the target descriptor by "value", matching each + hostaddr that it describes. */ + if (t->list_count != mapnum) + continue; + + for (int i = 0; i < t->list_count; i++) + if (t->list[i].key + && (t->list[i].key->host_start + t->list[i].offset + != (uintptr_t) hostaddrs[i])) + { + all_match = false; + break; + } + + if (all_match) + { + if (t->refcount > 1) + t->refcount--; + else + { + if (tp) + tp->prev = t->prev; + else + *data_env = t->prev; + } + return; + } + } + + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("cannot find data mapping to remove in data environment"); +} + +/* Similar, but removes target_mem_desc REMOVE from the DATA_ENV, in case its + reference count drops to zero resulting in it being unmapped (in + target.c:gomp_unmap_tgt). Unlike the above function it is not an error if + REMOVE is not present in the environment. The device lock should be held + before calling, and remains locked on exit. */ + +attribute_hidden void +gomp_acc_data_env_remove_tgt (struct target_mem_desc **data_env, + struct target_mem_desc *remove) +{ + struct target_mem_desc *t, *tp; + + for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev) + if (t == remove) + { + if (tp) + tp->prev = t->prev; + else + *data_env = t->prev; + return; + } +} + void acc_unmap_data (void *h) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + struct splay_tree_key_s cur_node; /* No need to call lazy open, as the address must have been mapped. */ @@ -466,12 +529,11 @@ acc_unmap_data (void *h) = __builtin_expect (goacc_profiling_setup_p (thr, &prof_info, &api_info), false); - size_t host_size; - gomp_mutex_lock (&acc_dev->lock); - splay_tree_key n = lookup_host (acc_dev, h, 1); - struct target_mem_desc *t; + cur_node.host_start = (uintptr_t) h; + cur_node.host_end = cur_node.host_start + 1; + splay_tree_key n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); if (!n) { @@ -479,47 +541,28 @@ acc_unmap_data (void *h) gomp_fatal ("%p is not a mapped block", (void *)h); } - host_size = n->host_end - n->host_start; - if (n->host_start != (uintptr_t) h) { + size_t host_size = n->host_end - n->host_start; gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("[%p,%d] surrounds %p", (void *) n->host_start, (int) host_size, (void *) h); } - /* Mark for removal. */ - n->refcount = 1; + splay_tree_remove (&acc_dev->mem_map, n); - t = n->tgt; + struct target_mem_desc *tgt = n->tgt; - if (t->refcount == 2) + if (tgt->refcount > 0) + tgt->refcount--; + else { - struct target_mem_desc *tp; - - /* This is the last reference, so pull the descriptor off the - chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from - freeing the device memory. */ - t->tgt_end = 0; - t->to_free = 0; - - for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; - tp = t, t = t->prev) - if (n->tgt == t) - { - if (tp) - tp->prev = t->prev; - else - acc_dev->openacc.data_environ = t->prev; - - break; - } + free (tgt->array); + free (tgt); } gomp_mutex_unlock (&acc_dev->lock); - gomp_unmap_vars (t, true, false); - if (profiling_setup_p) { thr->prof_info = NULL; @@ -585,6 +628,24 @@ present_create_copy (unsigned f, void *h, size_t s, int async) n->refcount++; n->dynamic_refcount++; } + + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + + sizeof (tgt->list[0])); + tgt->refcount = 1; + tgt->tgt_start = 0; + tgt->tgt_end = 0; + tgt->to_free = NULL; + tgt->prev = acc_dev->openacc.data_environ; + tgt->list_count = 1; + tgt->device_descr = acc_dev; + tgt->list[0].key = n; + tgt->list[0].copy_from = false; + tgt->list[0].always_copy_from = false; + tgt->list[0].do_detach = false; + tgt->list[0].offset = (uintptr_t) h - n->host_start; + tgt->list[0].length = 0; + acc_dev->openacc.data_environ = tgt; + gomp_mutex_unlock (&acc_dev->lock); } else if (!(f & FLAG_CREATE)) @@ -609,18 +670,19 @@ present_create_copy (unsigned f, void *h, size_t s, int async) goacc_aq aq = get_goacc_asyncqueue (async); tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, - &kinds, true, GOMP_MAP_VARS_OPENACC); - /* Initialize dynamic refcount. */ - tgt->list[0].key->dynamic_refcount = 1; - tgt->list[0].key->attach_count = NULL; + &kinds, true, + GOMP_MAP_VARS_OPENACC_ENTER_DATA); - gomp_mutex_lock (&acc_dev->lock); + for (int i = 0; i < tgt->list_count; i++) + if (tgt->list[i].key) + tgt->list[i].key->dynamic_refcount++; - d = tgt->to_free; + gomp_mutex_lock (&acc_dev->lock); tgt->prev = acc_dev->openacc.data_environ; acc_dev->openacc.data_environ = tgt; - gomp_mutex_unlock (&acc_dev->lock); + + d = tgt->to_free; } if (profiling_setup_p) @@ -753,11 +815,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) n->dynamic_refcount = 0; n->attach_count = NULL; } - if (n->refcount < n->dynamic_refcount) - { - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("Dynamic reference counting assert fail\n"); - } if (f & FLAG_FINALIZE) { @@ -772,21 +829,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) if (n->refcount == 0) { - if (n->tgt->refcount == 2) - { - struct target_mem_desc *tp, *t; - for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; - tp = t, t = t->prev) - if (n->tgt == t) - { - if (tp) - tp->prev = t->prev; - else - acc_dev->openacc.data_environ = t->prev; - break; - } - } - if (f & FLAG_COPYOUT) { goacc_aq aq = get_goacc_asyncqueue (async); @@ -968,38 +1010,16 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; - if (acc_is_present (*hostaddrs, *sizes)) - { - splay_tree_key n; - gomp_mutex_lock (&acc_dev->lock); - n = lookup_host (acc_dev, *hostaddrs, *sizes); - gomp_mutex_unlock (&acc_dev->lock); - - tgt = n->tgt; - for (size_t i = 0; i < tgt->list_count; i++) - if (tgt->list[i].key == n) - { - for (size_t j = 0; j < mapnum; j++) - if (i + j < tgt->list_count && tgt->list[i + j].key) - { - tgt->list[i + j].key->refcount++; - tgt->list[i + j].key->dynamic_refcount++; - } - return; - } - /* Should not reach here. */ - gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset"); - } - gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); goacc_aq aq = get_goacc_asyncqueue (async); tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, - NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); + NULL, sizes, kinds, true, + GOMP_MAP_VARS_OPENACC_ENTER_DATA); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); - /* Initialize dynamic refcount. */ - tgt->list[0].key->dynamic_refcount = 1; - tgt->list[0].key->attach_count = NULL; + for (size_t i = 0; i < tgt->list_count; i++) + if (tgt->list[i].key) + tgt->list[i].key->dynamic_refcount++; gomp_mutex_lock (&acc_dev->lock); tgt->prev = acc_dev->openacc.data_environ; @@ -1008,96 +1028,83 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, } void -gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, - int finalize, int mapnum) +gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds, + int async, void *detach_from, bool finalize, + int mapnum) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + struct splay_tree_key_s cur_node; splay_tree_key n; - struct target_mem_desc *t; - int minrefs = (mapnum == 1) ? 2 : 3; - - if (!acc_is_present (h, s)) - return; gomp_mutex_lock (&acc_dev->lock); - n = lookup_host (acc_dev, h, 1); - - if (!n) + if (detach_from) { - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("%p is not a mapped block", (void *)h); + splay_tree_key n2 = lookup_host (acc_dev, detach_from, 1); + goacc_aq aq = get_goacc_asyncqueue (async); + gomp_detach_pointer (acc_dev, aq, n2, (uintptr_t) detach_from, finalize, + NULL); } - gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); - - t = n->tgt; + gomp_acc_data_env_remove (acc_dev, &acc_dev->openacc.data_environ, hostaddrs, + mapnum); - if (n->refcount < n->dynamic_refcount) + for (int i = 0; i < mapnum; i++) { - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("Dynamic reference counting assert fail\n"); - } - - if (finalize) - { - n->refcount -= n->dynamic_refcount; - n->dynamic_refcount = 0; - } - else if (n->dynamic_refcount) - { - n->dynamic_refcount--; - n->refcount--; - } + int kind = kinds[i] & 0xff; + bool copyfrom = false; - gomp_mutex_unlock (&acc_dev->lock); - - if (n->refcount == 0) - { - if (t->refcount == minrefs) - { - /* This is the last reference, so pull the descriptor off the - chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from - freeing the device memory. */ - struct target_mem_desc *tp; - for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; - tp = t, t = t->prev) + switch (kind) + { + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALWAYS_FROM: + copyfrom = true; + /* Fallthrough. */ + case GOMP_MAP_TO_PSET: + case GOMP_MAP_POINTER: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + case GOMP_MAP_DETACH: + case GOMP_MAP_FORCE_DETACH: + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + + ((kind == GOMP_MAP_DETACH + || kind == GOMP_MAP_FORCE_DETACH + || kind == GOMP_MAP_POINTER) + ? sizeof (void *) : sizes[i]); + n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); + if (n == NULL) + continue; + if (finalize) { - if (n->tgt == t) - { - if (tp) - tp->prev = t->prev; - else - acc_dev->openacc.data_environ = t->prev; - break; - } + n->refcount -= n->dynamic_refcount; + n->dynamic_refcount = 0; } - } - - /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */ - n->refcount = 1; - t->refcount = minrefs; - for (size_t i = 0; i < t->list_count; i++) - if (t->list[i].key == n) - { - t->list[i].copy_from = force_copyfrom ? 1 : 0; - break; - } - - /* If running synchronously, unmap immediately. */ - if (async < acc_async_noval) - gomp_unmap_vars (t, true, finalize); - else - { - goacc_aq aq = get_goacc_asyncqueue (async); - goacc_async_copyout_unmap_vars (t, aq, finalize); + else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) + { + n->refcount--; + n->dynamic_refcount--; + } + if (copyfrom) + gomp_copy_dev2host (acc_dev, NULL, (void *) cur_node.host_start, + (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start + - n->host_start), + cur_node.host_end - cur_node.host_start); + if (n->refcount == 0) + gomp_remove_var (acc_dev, n); + break; + default: + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("gomp_acc_remove_pointer unhandled kind 0x%.2x", + kind); } } - gomp_mutex_unlock (&acc_dev->lock); - gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); + gomp_mutex_unlock (&acc_dev->lock); } diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index f6c9114..8a3c65b 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -58,8 +58,12 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds) case GOMP_MAP_FORCE_TO: case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_TOFROM: + case GOMP_MAP_FORCE_TOFROM: case GOMP_MAP_ALLOC: case GOMP_MAP_RELEASE: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: { unsigned char kind1 = kinds[pos + 1] & 0xff; if (kind1 == GOMP_MAP_POINTER @@ -392,7 +396,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *), &api_info); } /* If running synchronously, unmap immediately. */ - gomp_unmap_vars (tgt, true, false); + gomp_unmap_vars (tgt, true); if (profiling_dispatch_p) { prof_info.event_type = acc_ev_exit_data_end; @@ -410,7 +414,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *), else acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, tgt, aq); - goacc_async_copyout_unmap_vars (tgt, aq, false); + goacc_async_copyout_unmap_vars (tgt, aq); } out: @@ -647,7 +651,7 @@ GOACC_data_end (void) gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); thr->mapped_data = tgt->prev; - gomp_unmap_vars (tgt, true, false); + gomp_unmap_vars (tgt, true); gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); if (profiling_dispatch_p) @@ -845,18 +849,39 @@ GOACC_enter_exit_data (int device, size_t mapnum, int elems = sizes[i]; struct splay_tree_key_s k; splay_tree_key str; - k.host_start = (uintptr_t) hostaddrs[i]; - k.host_end = k.host_start + 1; + uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1]; + uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems] + + sizes[i + elems]; + k.host_start = elems_lo; + k.host_end = elems_hi; gomp_mutex_lock (&acc_dev->lock); str = splay_tree_lookup (&acc_dev->mem_map, &k); gomp_mutex_unlock (&acc_dev->lock); - /* We increment the dynamic reference count for the struct - itself by the number of struct elements that we - mapped. */ - if (str->refcount != REFCOUNT_INFINITY) + if (str == NULL) { - str->refcount += elems; - str->dynamic_refcount += elems; + size_t mapsize = elems_hi - elems_lo; + goacc_aq aq = get_goacc_asyncqueue (async); + struct target_mem_desc *tgt; + unsigned short thiskind = GOMP_MAP_ALLOC; + int j; + for (j = 0; j < elems; j++) + if ((kinds[i + j] & 0xff) != GOMP_MAP_ALLOC) + { + thiskind = GOMP_MAP_TO; + break; + } + tgt = gomp_map_vars_async (acc_dev, aq, 1, + &hostaddrs[i + 1], NULL, &mapsize, &thiskind, + true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); + + for (j = 0; j < tgt->list_count; j++) + if (tgt->list[j].key) + tgt->list[j].key->dynamic_refcount++; + + gomp_mutex_lock (&acc_dev->lock); + tgt->prev = acc_dev->openacc.data_environ; + acc_dev->openacc.data_environ = tgt; + gomp_mutex_unlock (&acc_dev->lock); } i += elems; } @@ -962,18 +987,17 @@ GOACC_enter_exit_data (int device, size_t mapnum, int elems = sizes[i]; struct splay_tree_key_s k; splay_tree_key str; - k.host_start = (uintptr_t) hostaddrs[i]; - k.host_end = k.host_start + 1; + uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1]; + uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems] + + sizes[i + elems]; + k.host_start = elems_lo; + k.host_end = elems_hi; gomp_mutex_lock (&acc_dev->lock); str = splay_tree_lookup (&acc_dev->mem_map, &k); gomp_mutex_unlock (&acc_dev->lock); - /* Decrement dynamic reference count for the struct by the - number of elements that we are unmapping. */ - if (str->dynamic_refcount >= elems) - { - str->dynamic_refcount -= elems; - str->refcount -= elems; - } + if (str == NULL) + gomp_fatal ("[%p,%ld] is not mapped", (void *) elems_lo, + (unsigned long) (elems_hi - elems_lo)); i += elems; } break; @@ -989,10 +1013,14 @@ GOACC_enter_exit_data (int device, size_t mapnum, &sizes[i], &kinds[i]); else { - bool copyfrom = (kind == GOMP_MAP_FORCE_FROM - || kind == GOMP_MAP_FROM); - gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, - async, finalize, pointer); + unsigned short ptrkind = kinds[i + pointer - 1] & 0xff; + bool detach = (ptrkind == GOMP_MAP_DETACH + || ptrkind == GOMP_MAP_FORCE_DETACH); + void *detach_from = detach ? hostaddrs[i + pointer - 1] + : NULL; + gomp_acc_remove_pointer (&hostaddrs[i], &sizes[i], &kinds[i], + async, detach_from, finalize, + pointer); /* See the above comment. */ } i += pointer - 1; diff --git a/libgomp/target.c b/libgomp/target.c index da51291..bb5e1e9 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -629,7 +629,10 @@ gomp_detach_pointer (struct gomp_device_descr *devicep, idx = (detach_from - n->host_start) / sizeof (void *); if (!n->attach_count) - gomp_fatal ("no attachment counters for struct"); + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("no attachment counters for struct"); + } if (finalize) n->attach_count[idx] = 1; @@ -1013,7 +1016,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, continue; } cur_node.host_start = (uintptr_t) hostaddrs[i]; - if (!GOMP_MAP_POINTER_P (kind & typemask)) + if (!GOMP_MAP_POINTER_P (kind & typemask) + && (kind & typemask) != GOMP_MAP_ATTACH) cur_node.host_end = cur_node.host_start + sizes[i]; else cur_node.host_end = cur_node.host_start + sizeof (void *); @@ -1281,7 +1285,9 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, tgt->list[i].length = n->host_end - n->host_start; tgt->list[i].copy_from = false; tgt->list[i].always_copy_from = false; - tgt->list[i].do_detach = true; + tgt->list[i].do_detach + = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA); + n->refcount++; } else { @@ -1622,6 +1628,8 @@ gomp_unmap_tgt (struct target_mem_desc *tgt) if (tgt->tgt_end) gomp_free_device_memory (tgt->device_descr, tgt->to_free); + gomp_acc_data_env_remove_tgt (&tgt->device_descr->openacc.data_environ, tgt); + free (tgt->array); free (tgt); } @@ -1650,17 +1658,18 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) has been done already. */ attribute_hidden void -gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, bool finalize) +gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) { - gomp_unmap_vars_async (tgt, do_copyfrom, NULL, finalize); + gomp_unmap_vars_async (tgt, do_copyfrom, NULL); } attribute_hidden void gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, - struct goacc_asyncqueue *aq, bool finalize) + struct goacc_asyncqueue *aq) { struct gomp_device_descr *devicep = tgt->device_descr; + if (tgt->list_count == 0) { free (tgt); @@ -1685,15 +1694,15 @@ gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, if (k != NULL && tgt->list[i].do_detach) gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start - + tgt->list[i].offset, finalize, - NULL); + + tgt->list[i].offset, + k->refcount == 1, NULL); } for (i = 0; i < tgt->list_count; i++) { splay_tree_key k = tgt->list[i].key; - if (k == NULL || tgt->list[i].do_detach) + if (k == NULL) continue; bool do_unmap = false; @@ -2314,7 +2323,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, GOMP_MAP_VARS_TARGET); devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, NULL); - gomp_unmap_vars (tgt_vars, true, false); + gomp_unmap_vars (tgt_vars, true); } /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, @@ -2458,7 +2467,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, args); if (tgt_vars) - gomp_unmap_vars (tgt_vars, true, false); + gomp_unmap_vars (tgt_vars, true); } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -2527,7 +2536,7 @@ GOMP_target_end_data (void) { struct target_mem_desc *tgt = icv->target_data; icv->target_data = tgt->prev; - gomp_unmap_vars (tgt, true, false); + gomp_unmap_vars (tgt, true); } } @@ -2762,7 +2771,7 @@ gomp_target_task_fn (void *data) if (ttask->state == GOMP_TARGET_TASK_FINISHED) { if (ttask->tgt) - gomp_unmap_vars (ttask->tgt, true, false); + gomp_unmap_vars (ttask->tgt, true); return false; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c index 6a52f74..6bdcfe7 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c @@ -182,13 +182,13 @@ main (int argc, char **argv) exit (EXIT_FAILURE); } + acc_delete (&h_X[0], N * sizeof (float)); + acc_delete (&h_Y1[0], N * sizeof (float)); + free (h_X); free (h_Y1); free (h_Y2); - acc_free (d_X); - acc_free (d_Y); - context_check (pctx); s = cublasDestroy (h); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c index 71365e8..b403a5c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c @@ -176,13 +176,13 @@ main (int argc, char **argv) exit (EXIT_FAILURE); } + acc_delete (&h_X[0], N * sizeof (float)); + acc_delete (&h_Y1[0], N * sizeof (float)); + free (h_X); free (h_Y1); free (h_Y2); - acc_free (d_X); - acc_free (d_Y); - context_check (pctx); s = cublasDestroy (h); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c new file mode 100644 index 0000000..81c1c5e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c @@ -0,0 +1,59 @@ +/* { dg-do run { target { ! openacc_host_selected } } } */ + +#include +#include +#include + +struct dc +{ + int a; + int **b; +}; + +int +main () +{ + int n = 100, i, j, k; + struct dc v = { .a = 3 }; + + v.b = (int **) malloc (sizeof (int *) * n); + for (i = 0; i < n; i++) + v.b[i] = (int *) malloc (sizeof (int) * n); + + for (k = 0; k < 16; k++) + { +#pragma acc data copy(v) + { +#pragma acc data copy(v.b[:n]) + { + for (i = 0; i < n; i++) + { + acc_copyin (v.b[i], sizeof (int) * n); + acc_attach ((void **) &v.b[i]); + } + +#pragma acc parallel loop + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + v.b[i][j] = v.a + i + j; + + for (i = 0; i < n; i++) + { + acc_detach ((void **) &v.b[i]); + acc_copyout (v.b[i], sizeof (int) * n); + } + } + } + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + assert (v.b[i][j] == v.a + i + j); + + assert (!acc_is_present (&v, sizeof (v))); + assert (!acc_is_present (v.b, sizeof (int *) * n)); + for (i = 0; i < n; i++) + assert (!acc_is_present (v.b[i], sizeof (int) * n)); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c new file mode 100644 index 0000000..3a970a0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c @@ -0,0 +1,42 @@ +/* { dg-do run { target { ! openacc_host_selected } } } */ + +#include +#include +#include + +struct dc +{ + int a; + int *b; +}; + +int +main () +{ + int n = 100, i, j, k; + struct dc v = { .a = 3 }; + + v.b = (int *) malloc (sizeof (int) * n); + + for (k = 0; k < 16; k++) + { +#pragma acc enter data copyin(v.a, v.b[0:n]) + +#pragma acc enter data pcopyin(v.b[0:n]) + +#pragma acc parallel loop attach(v.b) + for (i = 0; i < n; i++) + v.b[i] = v.a + i; + +#pragma acc exit data copyout(v.b[:n]) +#pragma acc exit data delete(v) finalize + + for (i = 0; i < n; i++) + assert (v.b[i] == v.a + i); + + assert (!acc_is_present (&v, sizeof (v))); + assert (!acc_is_present (v.b, sizeof (int *) * n)); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c new file mode 100644 index 0000000..54f553b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c @@ -0,0 +1,53 @@ +/* { dg-do run { target { ! openacc_host_selected } } } */ + +#include +#include +#include + +struct dc +{ + int a; + int *b; + int *c; + int *d; +}; + +int +main () +{ + int n = 100, i, j, k; + struct dc v = { .a = 3 }; + + v.b = (int *) malloc (sizeof (int) * n); + v.c = (int *) malloc (sizeof (int) * n); + v.d = (int *) malloc (sizeof (int) * n); + +#pragma acc enter data copyin(v) + + for (k = 0; k < 16; k++) + { +#pragma acc enter data copyin(v.a, v.b[:n], v.c[:n], v.d[:n]) + +#pragma acc parallel loop + for (i = 0; i < n; i++) + v.b[i] = v.a + i; + +#pragma acc exit data copyout(v.b[:n]) +#pragma acc exit data copyout(v.c[:n]) +#pragma acc exit data copyout(v.d[:n]) + + for (i = 0; i < n; i++) + assert (v.b[i] == v.a + i); + + assert (acc_is_present (&v, sizeof (v))); + assert (!acc_is_present (v.b, sizeof (int *) * n)); + assert (!acc_is_present (v.c, sizeof (int *) * n)); + assert (!acc_is_present (v.d, sizeof (int *) * n)); + } + +#pragma acc exit data copyout(v) + + assert (!acc_is_present (&v, sizeof (v))); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 index db80413..a58e465 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 @@ -92,10 +92,6 @@ program test if (acc_is_present (c) .eqv. .TRUE.) call abort - !$acc exit data delete (c(0:N)) - - if (acc_is_present (c) .eqv. .TRUE.) call abort - do i = 1, N if (c(i) .ne. 3.0) call abort end do @@ -113,11 +109,6 @@ program test if (acc_is_present (c) .eqv. .TRUE.) call abort if (acc_is_present (d) .eqv. .TRUE.) call abort - !$acc exit data delete (c(0:N), d(0:N)) - - if (acc_is_present (c) .eqv. .TRUE.) call abort - if (acc_is_present (d) .eqv. .TRUE.) call abort - do i = 1, N if (c(i) .ne. 5.0) call abort if (d(i) .ne. 9.0) call abort @@ -177,8 +168,8 @@ program test !$acc exit data delete (c(0:N), d(0:N)) - !if (acc_is_present (c) .eqv. .TRUE.) call abort - !if (acc_is_present (d) .eqv. .TRUE.) call abort + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort !$acc exit data delete (c(0:N), d(0:N)) @@ -190,12 +181,7 @@ program test if (acc_is_present (c) .eqv. .FALSE.) call abort if (acc_is_present (d) .eqv. .TRUE.) call abort - !$acc exit data delete (c(0:N), d(0:N)) - - if (acc_is_present (c) .eqv. .TRUE.) call abort - if (acc_is_present (d) .eqv. .TRUE.) call abort - - !$acc exit data delete (c(0:N), d(0:N)) + !$acc exit data delete (c(0:N)) if (acc_is_present (c) .eqv. .TRUE.) call abort if (acc_is_present (d) .eqv. .TRUE.) call abort