From patchwork Thu Aug 28 06:13:32 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tobias Burnus X-Patchwork-Id: 383704 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 2701314009B for ; Thu, 28 Aug 2014 16:13:58 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; q= dns; s=default; b=rCBN0VYq4ZHjstSW305P1Fby+USmrXKR72tyL/23m3Rr/U cMEu8vbo9C6sl2oDXiWYMs+M/q7qXz91EufPnoA6cO3bC9/1zuEQV7vgusAM+3Gk stZQlrtBNPknIu2O6xKLQmsbzQP78rYUnXPAr+4yOtavCxnikoTITblmLxSVQ= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; s= default; bh=u8Uw2BsN/wveqLIns/KZbLZCVP0=; b=vnqK5RuzJat96bSXJHpa vfnlpsYXjGHjqZ/XDlt/WfbgC/ND6uMBICsSd8TB6KoJgC5GCcX9n3yJKy2yYNrb z8YIF37Y2RIn7BHmPCP7r/6bYHXzFV4MRBp1HLczQWE2omQ4Fz6zhXOQFm7g9+ds EuXd1nq3a11WkLdOqsnerI4= Received: (qmail 31718 invoked by alias); 28 Aug 2014 06:13:41 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 31693 invoked by uid 89); 28 Aug 2014 06:13:40 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=1.6 required=5.0 tests=AWL, BAYES_50, RCVD_IN_DNSWL_NONE, SUBJ_OBFU_PUNCT_FEW, SUBJ_OBFU_PUNCT_MANY autolearn=no version=3.3.2 X-Spam-User: qpsmtpd, 2 recipients X-HELO: mx02.qsc.de Received: from mx02.qsc.de (HELO mx02.qsc.de) (213.148.130.14) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES256-SHA encrypted) ESMTPS; Thu, 28 Aug 2014 06:13:36 +0000 Received: from tux.net-b.de (port-92-194-32-4.dynamic.qsc.de [92.194.32.4]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx02.qsc.de (Postfix) with ESMTPSA id 9F03327665; Thu, 28 Aug 2014 08:13:32 +0200 (CEST) Message-ID: <53FEC88C.2080204@net-b.de> Date: Thu, 28 Aug 2014 08:13:32 +0200 From: Tobias Burnus User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.7.0 MIME-Version: 1.0 To: gcc-patches , gfortran , opencoarrays@googlegroups.com Subject: [Patch, Fortran] CAF dep (3/3): coarrays - pass may_require_tmp informtion for CAF_get/send/sendget to the library This patch is based on 1/2 and 2/2 on the series. When the patch is approved, OpenCoarrays needs to be adapted; however, as surplus arguments of the callee are ignored, no immediate action is required. (And some delay avoids issues with compilers being older than the library.) The issue comes up in the context of having a coarray access on the same image, e.g. "a[this_image()] = a", where alias questions play a role. While one can leave the general handling to the library - such as switching to memmove in case of local memory access, this patch tries to help the library to decide whether it has to create a temporary variable or not. For that reason, it passes an may_require_temporary argument to the library. may_require_temporary is false if the source and target variables are disjunct, or if they are such overlapping that walking them in element order will not require a temporary (special case: identical). If the compiler cannot tell at compile time, the value is always one. Of course, if the memory access is for a different image than the current image (or for sendget: when the two image indexes are for different images), the library can ignore the argument "may_require_temporary" and use the normal remote memory access. Build and regtested on x86-64-gnu-linux. OK for the trunk? Tobias PS: I image code like the following in the library: if (image_index == this_image) { if (contiguous LHS and RHS): use memmove // With special case: LHS and RHS identical if (!may_require_temporary) for-loop assigning LHS = RHS in element order else { tmp = malloc() if (RHS contiguous or scalar) tmp = memcpy(RHS) else for loop assigning RHS to tmp if (LHS contiguous) LHS = memcpy(tmp) else for loop assigning tmp to LHS } } else { do normal remote-image assignment } 2014-08-28 Tobias Burnus gcc/fortran/ * trans-decl.c (gfc_build_builtin_function_decls): Add may_require_tmp dummy argument. * trans-intrinsic.c (gfc_conv_intrinsic_caf_get, conv_caf_send): Handle may_require_tmp argument. (gfc_conv_intrinsic_function): Update call. * gfortran.texi (_gfortran_caf_send, _gfortran_caf_get, _gfortran_caf_sendget): Update interface description. gcc/testsuite/ * gfortran.dg/coarray_lib_comm_1.f90: New. libgfortran/ * caf/libcaf.h (_gfortran_caf_send, _gfortran_caf_get, _gfortran_caf_sendget): Update prototype. * caf/single.c (_gfortran_caf_send, _gfortran_caf_get, _gfortran_caf_sendget): Handle may_require_tmp. diff --git a/gcc/testsuite/gfortran.dg/coarray_lib_comm_1.f90 b/gcc/testsuite/gfortran.dg/coarray_lib_comm_1.f90 new file mode 100644 index 0000000..1db40feb7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/coarray_lib_comm_1.f90 @@ -0,0 +1,46 @@ +! { dg-do run } +! { dg-options "-fdump-tree-original -fcoarray=lib -lcaf_single" } +! +! Some dependency-analysis check for coarray communication +! +integer, target, save :: A(10)[*] +integer, pointer :: P(:) +integer, save :: B(10)[*] + +A = [1,2,3,4,5,6,7,8,9,10] +B = [1,2,3,4,5,6,7,8,9,10] +A(10:2:-1) = A(9:1:-1)[1] ! 0 +B(10:2:-1) = B(9:1:-1) +if (any (A-B /= 0)) call abort + +A = [1,2,3,4,5,6,7,8,9,10] +B = [1,2,3,4,5,6,7,8,9,10] +A(9:1:-1) = A(10:2:-1)[1] ! 1 +B(9:1:-1) = B(10:2:-1) +if (any (A-B /= 0)) call abort + +A = [1,2,3,4,5,6,7,8,9,10] +B = [1,2,3,4,5,6,7,8,9,10] +allocate(P(10)) +P(:) = A(:)[1] ! 1 +if (any (A-B /= 0)) call abort + +A = [1,2,3,4,5,6,7,8,9,10] +B = [1,2,3,4,5,6,7,8,9,10] +allocate(P(10)) +P(:) = B(:)[1] ! 0 + +A = [1,2,3,4,5,6,7,8,9,10] +B = [1,2,3,4,5,6,7,8,9,10] +A(1:5)[1] = A(3:7)[1] ! 1 +B(1:5) = B(3:7) +if (any (A-B /= 0)) call abort +end + +! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, &parm.\[0-9\]+, 0B, &parm.\[0-9\]+, 4, 4, 0\\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, &parm.\[0-9\]+, 0B, &parm.\[0-9\]+, 4, 4, 1\\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, &parm.\[0-9\]+, 0B, &p, 4, 4, 1\\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "_gfortran_caf_get \\\(caf_token.1, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) b, 1, &parm.\[0-9\]+, 0B, &p, 4, 4, 0\\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "_gfortran_caf_sendget \\\(caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, &parm.\[0-9\]+, 0B, caf_token.0, \\\(integer\\\(kind=\[48\]\\\)\\\) parm.\[0-9\]+.data - \\\(integer\\\(kind=\[48\]\\\)\\\) a, 1, &parm.\[0-9\]+, 0B, 4, 4, 0\\\);" 1 "original" } } + +! { dg-final { cleanup-tree-dump "original" } } diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 0ce7226..d02452c 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -3448,7 +3448,7 @@ to a remote image identified by the image_index. @item @emph{Syntax}: @code{void _gfortran_caf_send (caf_token_t token, size_t offset, int image_index, gfc_descriptor_t *dest, caf_vector_t *dst_vector, -gfc_descriptor_t *src, int dst_kind, int src_kind)} +gfc_descriptor_t *src, int dst_kind, int src_kind, bool may_require_tmp)} @item @emph{Arguments}: @multitable @columnfractions .15 .70 @@ -3466,15 +3466,26 @@ triplet of the dest argument. transferred to the remote image @item @var{dst_kind} @tab Kind of the destination argument @item @var{src_kind} @tab Kind of the source argument +@item @var{may_require_tmp} @tab The variable is false it is known at compile +time that the @var{dest} and @var{src} either cannot overlap or overlap (fully +or partially) such that walking @var{src} and @var{dest} in element wise +element order (honoring the stride value) will not lead to wrong results. +Otherwise, the value is true. @end multitable @item @emph{NOTES} It is permitted to have image_id equal the current image; the memory of the send-to and the send-from might (partially) overlap in that case. The -implementation has to take care that it handles this case. Note that the -assignment of a scalar to an array is permitted. In addition, the library has -to handle numeric-type conversion and for strings, padding and different -character kinds. +implementation has to take care that it handles this case, e.g. using +@code{memmove} which handles (partially) overlapping memory. If +@var{may_require_tmp} is true, the library might additionally create a +temporary variable, unless additional checks show that this is not required +(e.g. because walking backward is possible or because both arrays are +contiguous and @code{memmove} takes care of overlap issues). + +Note that the assignment of a scalar to an array is permitted. In addition, +the library has to handle numeric-type conversion and for strings, padding +and different character kinds. @end table @@ -3490,7 +3501,7 @@ image identified by the image_index. @item @emph{Syntax}: @code{void _gfortran_caf_get_desc (caf_token_t token, size_t offset, int image_index, gfc_descriptor_t *src, caf_vector_t *src_vector, -gfc_descriptor_t *dest, int src_kind, int dst_kind)} +gfc_descriptor_t *dest, int src_kind, int dst_kind, bool may_require_tmp)} @item @emph{Arguments}: @multitable @columnfractions .15 .70 @@ -3508,14 +3519,25 @@ subscript of the destination array; the values are relative to the dimension triplet of the dest argument. @item @var{dst_kind} @tab Kind of the destination argument @item @var{src_kind} @tab Kind of the source argument +@item @var{may_require_tmp} @tab The variable is false it is known at compile +time that the @var{dest} and @var{src} either cannot overlap or overlap (fully +or partially) such that walking @var{src} and @var{dest} in element wise +element order (honoring the stride value) will not lead to wrong results. +Otherwise, the value is true. @end multitable @item @emph{NOTES} It is permitted to have image_id equal the current image; the memory of the send-to and the send-from might (partially) overlap in that case. The -implementation has to take care that it handles this case. Note that the -library has to handle numeric-type conversion and for strings, padding -and different character kinds. +implementation has to take care that it handles this case, e.g. using +@code{memmove} which handles (partially) overlapping memory. If +@var{may_require_tmp} is true, the library might additionally create a +temporary variable, unless additional checks show that this is not required +(e.g. because walking backward is possible or because both arrays are +contiguous and @code{memmove} takes care of overlap issues). + +Note that the library has to handle numeric-type conversion and for strings, +padding and different character kinds. @end table @@ -3533,7 +3555,8 @@ dst_image_index. @code{void _gfortran_caf_sendget (caf_token_t dst_token, size_t dst_offset, int dst_image_index, gfc_descriptor_t *dest, caf_vector_t *dst_vector, caf_token_t src_token, size_t src_offset, int src_image_index, -gfc_descriptor_t *src, caf_vector_t *src_vector, int dst_kind, int src_kind)} +gfc_descriptor_t *src, caf_vector_t *src_vector, int dst_kind, int src_kind, +bool may_require_tmp)} @item @emph{Arguments}: @multitable @columnfractions .15 .70 @@ -3543,7 +3566,7 @@ destination coarray. shifted compared to the base address of the destination coarray. @item @var{dst_image_index} @tab The ID of the destination remote image; must be a positive number. -@item @var{dst_dest} @tab intent(in) Array descriptor for the destination +@item @var{dest} @tab intent(in) Array descriptor for the destination remote image for the bounds and the size. The base_addr shall not be accessed. @item @var{dst_vector} @tab intent(int) If not NULL, it contains the vector subscript of the destination array; the values are relative to the dimension @@ -3553,21 +3576,31 @@ triplet of the dest argument. compared to the base address of the source coarray. @item @var{src_image_index} @tab The ID of the source remote image; must be a positive number. -@item @var{src_dest} @tab intent(in) Array descriptor of the local array to be +@item @var{src} @tab intent(in) Array descriptor of the local array to be transferred to the remote image. @item @var{src_vector} @tab intent(in) Array descriptor of the local array to be transferred to the remote image @item @var{dst_kind} @tab Kind of the destination argument @item @var{src_kind} @tab Kind of the source argument +@item @var{may_require_tmp} @tab The variable is false it is known at compile +time that the @var{dest} and @var{src} either cannot overlap or overlap (fully +or partially) such that walking @var{src} and @var{dest} in element wise +element order (honoring the stride value) will not lead to wrong results. +Otherwise, the value is true. @end multitable @item @emph{NOTES} -It is permitted to have image_id equal the current image; the memory of the -send-to and the send-from might (partially) overlap in that case. The -implementation has to take care that it handles this case. Note that the -assignment of a scalar to an array is permitted. In addition, the library has -to handle numeric-type conversion and for strings, padding and different -character kinds. +It is permitted to have image_ids equal; the memory of the send-to and the +send-from might (partially) overlap in that case. The implementation has to +take care that it handles this case, e.g. using @code{memmove} which handles +(partially) overlapping memory. If @var{may_require_tmp} is true, the library +might additionally create a temporary variable, unless additional checks show +that this is not required (e.g. because walking backward is possible or because +both arrays are contiguous and @code{memmove} takes care of overlap issues). + +Note that the assignment of a scalar to an array is permitted. In addition, +the library has to handle numeric-type conversion and for strings, padding and +different character kinds. @end table diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 3063fea..6afa6f3 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -3353,20 +3353,23 @@ gfc_build_builtin_function_decls (void) ppvoid_type_node, pint_type, pchar_type_node, integer_type_node); gfor_fndecl_caf_get = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_get")), ".R.RRRW", void_type_node, 8, + get_identifier (PREFIX("caf_get")), ".R.RRRW", void_type_node, 9, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, - pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node); + pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, + boolean_type_node); gfor_fndecl_caf_send = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_send")), ".R.RRRR", void_type_node, 8, + get_identifier (PREFIX("caf_send")), ".R.RRRR", void_type_node, 9, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, - pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node); + pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, + boolean_type_node); gfor_fndecl_caf_sendget = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("caf_sendget")), ".R.RRRR.RRR", void_type_node, - 12, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, + 13, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, pvoid_type_node, size_type_node, integer_type_node, - pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node); + pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, + boolean_type_node); gfor_fndecl_caf_sync_all = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("caf_sync_all")), ".WW", void_type_node, diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index 3aa59c9..a13b113 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see #include "trans-const.h" #include "trans-types.h" #include "trans-array.h" +#include "dependency.h" /* For CAF array alias analysis. */ /* Only for gfc_trans_assign and gfc_trans_pointer_assign. */ #include "trans-stmt.h" #include "tree-nested.h" @@ -1086,7 +1087,8 @@ conv_caf_vector_subscript (stmtblock_t *block, tree desc, gfc_array_ref *ar) /* Get data from a remote coarray. */ static void -gfc_conv_intrinsic_caf_get (gfc_se *se, gfc_expr *expr, tree lhs, tree lhs_kind) +gfc_conv_intrinsic_caf_get (gfc_se *se, gfc_expr *expr, tree lhs, tree lhs_kind, + tree may_require_tmp) { gfc_expr *array_expr; gfc_se argse; @@ -1193,9 +1195,13 @@ gfc_conv_intrinsic_caf_get (gfc_se *se, gfc_expr *expr, tree lhs, tree lhs_kind) image_index = gfc_caf_get_image_index (&se->pre, array_expr, caf_decl); gfc_get_caf_token_offset (&token, &offset, caf_decl, argse.expr, array_expr); - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_get, 8, + /* No overlap possible as we have generated a temporary. */ + if (lhs == NULL_TREE) + may_require_tmp = boolean_false_node; + + tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_get, 9, token, offset, image_index, argse.expr, vec, - dst_var, kind, lhs_kind); + dst_var, kind, lhs_kind, may_require_tmp); gfc_add_expr_to_block (&se->pre, tmp); if (se->ss) @@ -1215,6 +1221,7 @@ conv_caf_send (gfc_code *code) { gfc_se lhs_se, rhs_se; stmtblock_t block; tree caf_decl, token, offset, image_index, tmp, lhs_kind, rhs_kind; + tree may_require_tmp; tree lhs_type = NULL_TREE; tree vec = null_pointer_node, rhs_vec = null_pointer_node; @@ -1222,6 +1229,8 @@ conv_caf_send (gfc_code *code) { lhs_expr = code->ext.actual->expr; rhs_expr = code->ext.actual->next->expr; + may_require_tmp = gfc_check_dependency (lhs_expr, rhs_expr, false) == 0 + ? boolean_false_node : boolean_true_node; gfc_init_block (&block); /* LHS. */ @@ -1275,7 +1284,8 @@ conv_caf_send (gfc_code *code) { { gcc_assert (gfc_is_coindexed (rhs_expr)); gfc_init_se (&rhs_se, NULL); - gfc_conv_intrinsic_caf_get (&rhs_se, rhs_expr, lhs_se.expr, lhs_kind); + gfc_conv_intrinsic_caf_get (&rhs_se, rhs_expr, lhs_se.expr, lhs_kind, + may_require_tmp); gfc_add_block_to_block (&block, &rhs_se.pre); gfc_add_block_to_block (&block, &rhs_se.post); gfc_add_block_to_block (&block, &lhs_se.post); @@ -1342,9 +1352,9 @@ conv_caf_send (gfc_code *code) { rhs_kind = build_int_cst (integer_type_node, rhs_expr->ts.kind); if (!gfc_is_coindexed (rhs_expr)) - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_send, 8, token, + tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_send, 9, token, offset, image_index, lhs_se.expr, vec, - rhs_se.expr, lhs_kind, rhs_kind); + rhs_se.expr, lhs_kind, rhs_kind, may_require_tmp); else { tree rhs_token, rhs_offset, rhs_image_index; @@ -1355,10 +1365,11 @@ conv_caf_send (gfc_code *code) { rhs_image_index = gfc_caf_get_image_index (&block, rhs_expr, caf_decl); gfc_get_caf_token_offset (&rhs_token, &rhs_offset, caf_decl, rhs_se.expr, rhs_expr); - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sendget, 12, + tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sendget, 13, token, offset, image_index, lhs_se.expr, vec, rhs_token, rhs_offset, rhs_image_index, - rhs_se.expr, rhs_vec, lhs_kind, rhs_kind); + rhs_se.expr, rhs_vec, lhs_kind, rhs_kind, + may_require_tmp); } gfc_add_expr_to_block (&block, tmp); gfc_add_block_to_block (&block, &lhs_se.post); @@ -7383,7 +7394,7 @@ gfc_conv_intrinsic_function (gfc_se * se, gfc_expr * expr) break; case GFC_ISYM_CAF_GET: - gfc_conv_intrinsic_caf_get (se, expr, NULL_TREE, NULL_TREE); + gfc_conv_intrinsic_caf_get (se, expr, NULL_TREE, NULL_TREE, NULL_TREE); break; case GFC_ISYM_CMPLX: diff --git a/libgfortran/caf/libcaf.h b/libgfortran/caf/libcaf.h index 85d6811..0f3398a 100644 --- a/libgfortran/caf/libcaf.h +++ b/libgfortran/caf/libcaf.h @@ -114,12 +114,12 @@ void _gfortran_caf_co_max (gfc_descriptor_t *, int, int *, char *, int, int); void _gfortran_caf_get (caf_token_t, size_t, int, gfc_descriptor_t *, - caf_vector_t *, gfc_descriptor_t *, int, int); + caf_vector_t *, gfc_descriptor_t *, int, int, bool); void _gfortran_caf_send (caf_token_t, size_t, int, gfc_descriptor_t *, - caf_vector_t *, gfc_descriptor_t *, int, int); + caf_vector_t *, gfc_descriptor_t *, int, int, bool); void _gfortran_caf_sendget (caf_token_t, size_t, int, gfc_descriptor_t *, caf_vector_t *, caf_token_t, size_t, int, - gfc_descriptor_t *, caf_vector_t *, int, int); + gfc_descriptor_t *, caf_vector_t *, int, int, bool); void _gfortran_caf_atomic_define (caf_token_t, size_t, int, void *, int *, int, int); diff --git a/libgfortran/caf/single.c b/libgfortran/caf/single.c index 990953a..773941b 100644 --- a/libgfortran/caf/single.c +++ b/libgfortran/caf/single.c @@ -533,7 +533,8 @@ _gfortran_caf_get (caf_token_t token, size_t offset, int image_index __attribute__ ((unused)), gfc_descriptor_t *src, caf_vector_t *src_vector __attribute__ ((unused)), - gfc_descriptor_t *dest, int src_kind, int dst_kind) + gfc_descriptor_t *dest, int src_kind, int dst_kind, + bool may_require_tmp) { /* FIXME: Handle vector subscripts. */ size_t i, k, size; @@ -584,6 +585,82 @@ _gfortran_caf_get (caf_token_t token, size_t offset, if (size == 0) return; + if (may_require_tmp) + { + ptrdiff_t array_offset_sr, array_offset_dst; + void *tmp = malloc (size*src_size); + + array_offset_dst = 0; + for (i = 0; i < size; i++) + { + ptrdiff_t array_offset_sr = 0; + ptrdiff_t stride = 1; + ptrdiff_t extent = 1; + for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++) + { + array_offset_sr += ((i / (extent*stride)) + % (src->dim[j]._ubound + - src->dim[j].lower_bound + 1)) + * src->dim[j]._stride; + extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1); + stride = src->dim[j]._stride; + } + array_offset_sr += (i / extent) * src->dim[rank-1]._stride; + void *sr = (void *)((char *) TOKEN (token) + offset + + array_offset_sr*GFC_DESCRIPTOR_SIZE (src)); + memcpy ((void *) ((char *) tmp + array_offset_dst), sr, src_size); + array_offset_dst += src_size; + } + + array_offset_sr = 0; + for (i = 0; i < size; i++) + { + ptrdiff_t array_offset_dst = 0; + ptrdiff_t stride = 1; + ptrdiff_t extent = 1; + for (j = 0; j < rank-1; j++) + { + array_offset_dst += ((i / (extent*stride)) + % (dest->dim[j]._ubound + - dest->dim[j].lower_bound + 1)) + * dest->dim[j]._stride; + extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1); + stride = dest->dim[j]._stride; + } + array_offset_dst += (i / extent) * dest->dim[rank-1]._stride; + void *dst = dest->base_addr + + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest); + void *sr = tmp + array_offset_sr; + + if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src) + && dst_kind == src_kind) + { + memmove (dst, sr, dst_size > src_size ? src_size : dst_size); + if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER + && dst_size > src_size) + { + if (dst_kind == 1) + memset ((void*)(char*) dst + src_size, ' ', + dst_size-src_size); + else /* dst_kind == 4. */ + for (k = src_size/4; k < dst_size/4; k++) + ((int32_t*) dst)[k] = (int32_t) ' '; + } + } + else if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_kind == 1) + assign_char1_from_char4 (dst_size, src_size, dst, sr); + else if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER) + assign_char4_from_char1 (dst_size, src_size, dst, sr); + else + convert_type (dst, GFC_DESCRIPTOR_TYPE (dest), dst_kind, + sr, GFC_DESCRIPTOR_TYPE (src), src_kind); + array_offset_sr += src_size; + } + + free (tmp); + return; + } + for (i = 0; i < size; i++) { ptrdiff_t array_offset_dst = 0; @@ -646,7 +723,8 @@ _gfortran_caf_send (caf_token_t token, size_t offset, int image_index __attribute__ ((unused)), gfc_descriptor_t *dest, caf_vector_t *dst_vector __attribute__ ((unused)), - gfc_descriptor_t *src, int dst_kind, int src_kind) + gfc_descriptor_t *src, int dst_kind, int src_kind, + bool may_require_tmp) { /* FIXME: Handle vector subscripts. */ size_t i, k, size; @@ -697,6 +775,91 @@ _gfortran_caf_send (caf_token_t token, size_t offset, if (size == 0) return; + if (may_require_tmp) + { + ptrdiff_t array_offset_sr, array_offset_dst; + void *tmp; + + if (GFC_DESCRIPTOR_RANK (src) == 0) + { + tmp = malloc (src_size); + memcpy (tmp, GFC_DESCRIPTOR_DATA (src), src_size); + } + else + { + tmp = malloc (size*src_size); + array_offset_dst = 0; + for (i = 0; i < size; i++) + { + ptrdiff_t array_offset_sr = 0; + ptrdiff_t stride = 1; + ptrdiff_t extent = 1; + for (j = 0; j < GFC_DESCRIPTOR_RANK (src)-1; j++) + { + array_offset_sr += ((i / (extent*stride)) + % (src->dim[j]._ubound + - src->dim[j].lower_bound + 1)) + * src->dim[j]._stride; + extent = (src->dim[j]._ubound - src->dim[j].lower_bound + 1); + stride = src->dim[j]._stride; + } + array_offset_sr += (i / extent) * src->dim[rank-1]._stride; + void *sr = (void *) ((char *) src->base_addr + + array_offset_sr*GFC_DESCRIPTOR_SIZE (src)); + memcpy ((void *) ((char *) tmp + array_offset_dst), sr, src_size); + array_offset_dst += src_size; + } + } + + array_offset_sr = 0; + for (i = 0; i < size; i++) + { + ptrdiff_t array_offset_dst = 0; + ptrdiff_t stride = 1; + ptrdiff_t extent = 1; + for (j = 0; j < rank-1; j++) + { + array_offset_dst += ((i / (extent*stride)) + % (dest->dim[j]._ubound + - dest->dim[j].lower_bound + 1)) + * dest->dim[j]._stride; + extent = (dest->dim[j]._ubound - dest->dim[j].lower_bound + 1); + stride = dest->dim[j]._stride; + } + array_offset_dst += (i / extent) * dest->dim[rank-1]._stride; + void *dst = (void *)((char *) TOKEN (token) + offset + + array_offset_dst*GFC_DESCRIPTOR_SIZE (dest)); + void *sr = tmp + array_offset_sr; + if (GFC_DESCRIPTOR_TYPE (dest) == GFC_DESCRIPTOR_TYPE (src) + && dst_kind == src_kind) + { + memmove (dst, sr, + dst_size > src_size ? src_size : dst_size); + if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER + && dst_size > src_size) + { + if (dst_kind == 1) + memset ((void*)(char*) dst + src_size, ' ', + dst_size-src_size); + else /* dst_kind == 4. */ + for (k = src_size/4; k < dst_size/4; k++) + ((int32_t*) dst)[k] = (int32_t) ' '; + } + } + else if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER && dst_kind == 1) + assign_char1_from_char4 (dst_size, src_size, dst, sr); + else if (GFC_DESCRIPTOR_TYPE (dest) == BT_CHARACTER) + assign_char4_from_char1 (dst_size, src_size, dst, sr); + else + convert_type (dst, GFC_DESCRIPTOR_TYPE (dest), dst_kind, + sr, GFC_DESCRIPTOR_TYPE (src), src_kind); + if (GFC_DESCRIPTOR_RANK (src)) + array_offset_sr += src_size; + } + free (tmp); + return; + } + for (i = 0; i < size; i++) { ptrdiff_t array_offset_dst = 0; @@ -769,7 +932,7 @@ _gfortran_caf_sendget (caf_token_t dst_token, size_t dst_offset, int src_image_index __attribute__ ((unused)), gfc_descriptor_t *src, caf_vector_t *src_vector __attribute__ ((unused)), - int dst_kind, int src_kind) + int dst_kind, int src_kind, bool may_require_tmp) { /* FIXME: Handle vector subscript of 'src_vector'. */ /* For a single image, src->base_addr should be the same as src_token + offset @@ -777,7 +940,7 @@ _gfortran_caf_sendget (caf_token_t dst_token, size_t dst_offset, void *src_base = GFC_DESCRIPTOR_DATA (src); GFC_DESCRIPTOR_DATA (src) = (void *) ((char *) TOKEN (src_token) + src_offset); _gfortran_caf_send (dst_token, dst_offset, dst_image_index, dest, dst_vector, - src, dst_kind, src_kind); + src, dst_kind, src_kind, may_require_tmp); GFC_DESCRIPTOR_DATA (src) = src_base; }