From patchwork Fri Aug 16 10:22:20 2024
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Mikael Morin <morin-mikael@orange.fr>
X-Patchwork-Id: 1973199
Return-Path: <gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@legolas.ozlabs.org
Authentication-Results: legolas.ozlabs.org;
	dkim=pass (2048-bit key;
 unprotected) header.d=orange.fr header.i=@orange.fr header.a=rsa-sha256
 header.s=t20230301 header.b=tKKxZ6Un;
	dkim-atps=neutral
Authentication-Results: legolas.ozlabs.org;
 spf=pass (sender SPF authorized) smtp.mailfrom=gcc.gnu.org
 (client-ip=2620:52:3:1:0:246e:9693:128c; helo=server2.sourceware.org;
 envelope-from=gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org;
 receiver=patchwork.ozlabs.org)
Received: from server2.sourceware.org (server2.sourceware.org
 [IPv6:2620:52:3:1:0:246e:9693:128c])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature ECDSA (secp384r1) server-digest SHA384)
	(No client certificate requested)
	by legolas.ozlabs.org (Postfix) with ESMTPS id 4WldTy3wb3z1yYl
	for <incoming@patchwork.ozlabs.org>; Fri, 16 Aug 2024 20:27:34 +1000 (AEST)
Received: from server2.sourceware.org (localhost [IPv6:::1])
	by sourceware.org (Postfix) with ESMTP id 65D12385DDEB
	for <incoming@patchwork.ozlabs.org>; Fri, 16 Aug 2024 10:27:32 +0000 (GMT)
X-Original-To: gcc-patches@gcc.gnu.org
Delivered-To: gcc-patches@gcc.gnu.org
Received: from msa.smtpout.orange.fr (out-65.smtpout.orange.fr
 [193.252.22.65])
 by sourceware.org (Postfix) with ESMTPS id ABAE6385DDC5;
 Fri, 16 Aug 2024 10:22:34 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org ABAE6385DDC5
Authentication-Results: sourceware.org; dmarc=pass (p=quarantine dis=none)
 header.from=orange.fr
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=orange.fr
ARC-Filter: OpenARC Filter v1.0.0 sourceware.org ABAE6385DDC5
Authentication-Results: server2.sourceware.org;
 arc=none smtp.remote-ip=193.252.22.65
ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1723803758; cv=none;
 b=kBxXBgoHpt78j5uvjurxLakqx070UiJlJLKOuj4RR9DAdOCcZRtit1ppxdliQBQg5my6WuhDzZ2tWBEl/OTXcR633iqf307ptwxotlvOAlos1qj8ZFKueQnsYNc7rFUtzb8lImSiDDNAHl77h0ZQBD2ACuUWD2nTJ1nEvy6bqAI=
ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key;
 t=1723803758; c=relaxed/simple;
 bh=4lrbOpvnz8JeFzqhta9DbviKO0Zg8IoEHVzvObiOElk=;
 h=DKIM-Signature:From:To:Subject:Date:Message-ID:MIME-Version;
 b=XMWhod4otGoCV5hCT3+0oSsJRKB+2K/n033yhlaUqvz1+FEEkr8+OIGNVrCH0aWe6ypigEhZ5jT3M9/hEFVU8AOuxXKtHyUMTyWMHa66KDdA8iHn4d23liwPCGd5C0LpihT8TfRpVvlpR/MaS+ffTerBcXYk02ryBsO0nX7j040=
ARC-Authentication-Results: i=1; server2.sourceware.org
Received: from cyrano.home ([86.215.161.51]) by smtp.orange.fr with ESMTPA
 id eu6FsXj565Qjaeu6Ls0CgJ; Fri, 16 Aug 2024 12:22:33 +0200
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=orange.fr;
 s=t20230301; t=1723803753;
 bh=ntzYmpXfc8qzka0D173tPv6cH7pCxzgEj6VdDNSPN7A=;
 h=From:To:Subject:Date:Message-ID:MIME-Version;
 b=tKKxZ6UnWbT5BPJMA4DzOXMBHpn7l3o85aS/eYN8gv9BYO8ThMH7Iu7taOfCaZY7T
 GCnlle1u2YtARBrJ+oCg5Q2HypT23PkwhoxnmQihubWck0OxPRbj9xlnLP3SRlNEoQ
 r7090359t+M6Hxc0jE3gdk8gBgEyBI2dWbzEX/nQpU2lJNFvGLQGGlbmy+0m4av/u2
 l/BDiTSWo3K5dqU1VY9tslbqz860eRfEIWRJC4NLWcVxH3yo59bX0rBPk+FIWGbutj
 7jbiymiBZhPqSYzP/3ZIh8/RwK/wQWxS3WigsC6ukxkns7J+EL/4TKO2r+2AaOGKbW
 95vScS/J78lNw==
X-ME-Helo: cyrano.home
X-ME-Auth: bW9yaW4tbWlrYWVsQG9yYW5nZS5mcg==
X-ME-Date: Fri, 16 Aug 2024 12:22:33 +0200
X-ME-IP: 86.215.161.51
From: Mikael Morin <morin-mikael@orange.fr>
To: gcc-patches@gcc.gnu.org,
	fortran@gcc.gnu.org
Subject: [PATCH v2 03/10] fortran: Inline MINLOC/MAXLOC with no DIM and ARRAY
 of rank 1 [PR90608]
Date: Fri, 16 Aug 2024 12:22:20 +0200
Message-ID: <20240816102227.189290-4-morin-mikael@orange.fr>
X-Mailer: git-send-email 2.43.0
In-Reply-To: <20240816102227.189290-1-morin-mikael@orange.fr>
References: <20240816102227.189290-1-morin-mikael@orange.fr>
MIME-Version: 1.0
X-Spam-Status: No, score=-10.2 required=5.0 tests=BAYES_00, DKIM_SIGNED,
 DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0,
 KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, RCVD_IN_MSPIKE_H4, RCVD_IN_MSPIKE_WL,
 SPF_HELO_PASS, SPF_PASS, TXREP,
 T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on
 server2.sourceware.org
X-BeenThere: gcc-patches@gcc.gnu.org
X-Mailman-Version: 2.1.30
Precedence: list
List-Id: Gcc-patches mailing list <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-patches>,
 <mailto:gcc-patches-request@gcc.gnu.org?subject=subscribe>
Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org

From: Mikael Morin <mikael@gcc.gnu.org>

Regression-tested on x86_64-pc-linux-gnu.
OK for master?

-- >8 --

Enable inline code generation for the MINLOC and MAXLOC intrinsic, if the
DIM argument is not present and ARRAY has rank 1.  This case is similar to
the case where the result is scalar (DIM present and rank 1 ARRAY), which
already supports inline expansion of the intrinsic.  Both cases return
the same value, with the difference that the result is an array of size 1 if
DIM is absent, whereas it's a scalar if DIM  is present.  So all there is
to do for the new case to work is hook the inline expansion with the
scalarizer.

	PR fortran/90608

gcc/fortran/ChangeLog:

	* trans-array.cc (gfc_conv_ss_startstride): Set the scalarization
	rank based on the MINLOC/MAXLOC rank if needed.  Call the inline
	code generation and setup the scalarizer array descriptor info
	in the MINLOC and MAXLOC cases.
	* trans-intrinsic.cc (gfc_conv_intrinsic_minmaxloc): Return the
	result array element if the scalarizer is setup and we are inside
	the loops.  Restrict library function call dispatch to the case
	where inline expansion is not supported.  Declare an array result
	if the expression isn't scalar.  Initialize the array result single
	element and return the result variable if the expression isn't
	scalar.
	(walk_inline_intrinsic_minmaxloc): New function.
	(walk_inline_intrinsic_function): Add MINLOC and MAXLOC cases,
	dispatching to walk_inline_intrinsic_minmaxloc.
	(gfc_add_intrinsic_ss_code): Add MINLOC and MAXLOC cases.
	(gfc_inline_intrinsic_function_p): Return true if ARRAY has rank 1,
	regardless of DIM.
---
 gcc/fortran/trans-array.cc     |  25 ++++
 gcc/fortran/trans-intrinsic.cc | 224 +++++++++++++++++++++++----------
 2 files changed, 181 insertions(+), 68 deletions(-)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 9fb0b2b398d..46e2152d0f0 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -4851,6 +4851,8 @@ gfc_conv_ss_startstride (gfc_loopinfo * loop)
 	    case GFC_ISYM_UBOUND:
 	    case GFC_ISYM_LCOBOUND:
 	    case GFC_ISYM_UCOBOUND:
+	    case GFC_ISYM_MAXLOC:
+	    case GFC_ISYM_MINLOC:
 	    case GFC_ISYM_SHAPE:
 	    case GFC_ISYM_THIS_IMAGE:
 	      loop->dimen = ss->dimen;
@@ -4900,6 +4902,29 @@ done:
 	case GFC_SS_INTRINSIC:
 	  switch (expr->value.function.isym->id)
 	    {
+	    case GFC_ISYM_MINLOC:
+	    case GFC_ISYM_MAXLOC:
+	      {
+		gfc_se se;
+		gfc_init_se (&se, nullptr);
+		se.loop = loop;
+		se.ss = ss;
+		gfc_conv_intrinsic_function (&se, expr);
+		gfc_add_block_to_block (&outer_loop->pre, &se.pre);
+		gfc_add_block_to_block (&outer_loop->post, &se.post);
+
+		info->descriptor = se.expr;
+
+		info->data = gfc_conv_array_data (info->descriptor);
+		info->data = gfc_evaluate_now (info->data, &outer_loop->pre);
+
+		info->offset = gfc_index_zero_node;
+		info->start[0] = gfc_index_zero_node;
+		info->end[0] = gfc_index_zero_node;
+		info->stride[0] = gfc_index_one_node;
+		continue;
+	      }
+
 	    /* Fall through to supply start and stride.  */
 	    case GFC_ISYM_LBOUND:
 	    case GFC_ISYM_UBOUND:
diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc
index 2c8512060cc..9fcb57a9cc4 100644
--- a/gcc/fortran/trans-intrinsic.cc
+++ b/gcc/fortran/trans-intrinsic.cc
@@ -5273,66 +5273,95 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
    we need to handle.  For performance reasons we sometimes create two
    loops instead of one, where the second one is much simpler.
    Examples for minloc intrinsic:
-   1) Result is an array, a call is generated
-   2) Array mask is used and NaNs need to be supported:
-      limit = Infinity;
-      pos = 0;
-      S = from;
-      while (S <= to) {
-	if (mask[S]) {
-	  if (pos == 0) pos = S + (1 - from);
-	  if (a[S] <= limit) { limit = a[S]; pos = S + (1 - from); goto lab1; }
-	}
-	S++;
-      }
-      goto lab2;
-      lab1:;
-      while (S <= to) {
-	if (mask[S]) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
-	S++;
-      }
-      lab2:;
-   3) NaNs need to be supported, but it is known at compile time or cheaply
-      at runtime whether array is nonempty or not:
-      limit = Infinity;
-      pos = 0;
-      S = from;
-      while (S <= to) {
-	if (a[S] <= limit) { limit = a[S]; pos = S + (1 - from); goto lab1; }
-	S++;
-      }
-      if (from <= to) pos = 1;
-      goto lab2;
-      lab1:;
-      while (S <= to) {
-	if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
-	S++;
-      }
-      lab2:;
-   4) NaNs aren't supported, array mask is used:
-      limit = infinities_supported ? Infinity : huge (limit);
-      pos = 0;
-      S = from;
-      while (S <= to) {
-	if (mask[S]) { limit = a[S]; pos = S + (1 - from); goto lab1; }
-	S++;
-      }
-      goto lab2;
-      lab1:;
-      while (S <= to) {
-	if (mask[S]) if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
-	S++;
-      }
-      lab2:;
-   5) Same without array mask:
-      limit = infinities_supported ? Infinity : huge (limit);
-      pos = (from <= to) ? 1 : 0;
-      S = from;
-      while (S <= to) {
-	if (a[S] < limit) { limit = a[S]; pos = S + (1 - from); }
-	S++;
-      }
-   For 3) and 5), if mask is scalar, this all goes into a conditional,
+   A: Result is scalar.
+      1) Array mask is used and NaNs need to be supported:
+	 limit = Infinity;
+	 pos = 0;
+	 S = from;
+	 while (S <= to) {
+	   if (mask[S]) {
+	     if (pos == 0) pos = S + (1 - from);
+	     if (a[S] <= limit) {
+	       limit = a[S];
+	       pos = S + (1 - from);
+	       goto lab1;
+	     }
+	   }
+	   S++;
+	 }
+	 goto lab2;
+	 lab1:;
+	 while (S <= to) {
+	   if (mask[S])
+	     if (a[S] < limit) {
+	       limit = a[S];
+	       pos = S + (1 - from);
+	     }
+	   S++;
+	 }
+	 lab2:;
+      2) NaNs need to be supported, but it is known at compile time or cheaply
+	 at runtime whether array is nonempty or not:
+	 limit = Infinity;
+	 pos = 0;
+	 S = from;
+	 while (S <= to) {
+	   if (a[S] <= limit) {
+	     limit = a[S];
+	     pos = S + (1 - from);
+	     goto lab1;
+	   }
+	   S++;
+	 }
+	 if (from <= to) pos = 1;
+	 goto lab2;
+	 lab1:;
+	 while (S <= to) {
+	   if (a[S] < limit) {
+	     limit = a[S];
+	     pos = S + (1 - from);
+	   }
+	   S++;
+	 }
+	 lab2:;
+      3) NaNs aren't supported, array mask is used:
+	 limit = infinities_supported ? Infinity : huge (limit);
+	 pos = 0;
+	 S = from;
+	 while (S <= to) {
+	   if (mask[S]) {
+	     limit = a[S];
+	     pos = S + (1 - from);
+	     goto lab1;
+	   }
+	   S++;
+	 }
+	 goto lab2;
+	 lab1:;
+	 while (S <= to) {
+	   if (mask[S])
+	     if (a[S] < limit) {
+	       limit = a[S];
+	       pos = S + (1 - from);
+	     }
+	   S++;
+	 }
+	 lab2:;
+      4) Same without array mask:
+	 limit = infinities_supported ? Infinity : huge (limit);
+	 pos = (from <= to) ? 1 : 0;
+	 S = from;
+	 while (S <= to) {
+	   if (a[S] < limit) {
+	     limit = a[S];
+	     pos = S + (1 - from);
+	   }
+	   S++;
+	 }
+   B) ARRAY has rank 1, and DIM is absent.  Use the same code as the scalar
+      case and wrap the result in an array.
+   C) Otherwise, a call is generated
+   For 2) and 4), if mask is scalar, this all goes into a conditional,
    setting pos = 0; in the else branch.
 
    Since we now also support the BACK argument, instead of using
@@ -5346,7 +5375,7 @@ strip_kind_from_actual (gfc_actual_arglist * actual)
      ....
 
      The optimizer is smart enough to move the condition out of the loop.
-     The are now marked as unlikely to for further speedup.  */
+     They are now marked as unlikely too for further speedup.  */
 
 static void
 gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
@@ -5377,6 +5406,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
   gfc_expr *backexpr;
   gfc_se backse;
   tree pos;
+  tree result_var = NULL_TREE;
   int n;
   bool optional_mask;
 
@@ -5392,8 +5422,19 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
 
   if (se->ss)
     {
-      gfc_conv_intrinsic_funcall (se, expr);
-      return;
+      if (se->ss->info->useflags)
+	{
+	  /* The inline implementation of MINLOC/MAXLOC has been generated
+	     before, out of the scalarization loop; now we can just use the
+	     result.  */
+	  gfc_conv_tmp_array_ref (se);
+	  return;
+	}
+      else if (!gfc_inline_intrinsic_function_p (expr))
+	{
+	  gfc_conv_intrinsic_funcall (se, expr);
+	  return;
+	}
     }
 
   arrayexpr = actual->expr;
@@ -5419,10 +5460,29 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
       return;
     }
 
+  type = gfc_typenode_for_spec (&expr->ts);
+
+  if (expr->rank > 0)
+    {
+      gfc_array_spec as;
+      memset (&as, 0, sizeof (as));
+
+      as.rank = 1;
+      as.lower[0] = gfc_get_int_expr (gfc_index_integer_kind,
+				      &arrayexpr->where,
+				      HOST_WIDE_INT_1);
+      as.upper[0] = gfc_get_int_expr (gfc_index_integer_kind,
+				      &arrayexpr->where,
+				      HOST_WIDE_INT_1);
+
+      tree array = gfc_get_nodesc_array_type (type, &as, PACKED_STATIC, true);
+
+      result_var = gfc_create_var (array, "loc_result");
+    }
+
   /* Initialize the result.  */
   pos = gfc_create_var (gfc_array_index_type, "pos");
   offset = gfc_create_var (gfc_array_index_type, "offset");
-  type = gfc_typenode_for_spec (&expr->ts);
 
   /* Walk the arguments.  */
   arrayss = gfc_walk_expr (arrayexpr);
@@ -5828,7 +5888,18 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op)
     }
   gfc_cleanup_loop (&loop);
 
-  se->expr = convert (type, pos);
+  tree value = convert (type, pos);
+  if (expr->rank > 0)
+    {
+      tree res_arr_ref = gfc_build_array_ref (result_var, gfc_index_zero_node,
+					      NULL_TREE, true);
+
+      gfc_add_modify (&se->pre, res_arr_ref, value);
+
+      se->expr = result_var;
+    }
+  else
+    se->expr = value;
 }
 
 /* Emit code for findloc.  */
@@ -11537,6 +11608,19 @@ walk_inline_intrinsic_arith (gfc_ss *ss, gfc_expr *expr)
 }
 
 
+/* Create the gfc_ss list for the arguments to MINLOC or MAXLOC when the
+   function is to be inlined.  */
+
+static gfc_ss *
+walk_inline_intrinsic_minmaxloc (gfc_ss *ss, gfc_expr *expr ATTRIBUTE_UNUSED)
+{
+  if (expr->rank == 0)
+    return ss;
+
+  return gfc_get_array_ss (ss, expr, 1, GFC_SS_INTRINSIC);
+}
+
+
 static gfc_ss *
 walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr)
 {
@@ -11550,6 +11634,10 @@ walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr)
       case GFC_ISYM_TRANSPOSE:
 	return walk_inline_intrinsic_transpose (ss, expr);
 
+      case GFC_ISYM_MAXLOC:
+      case GFC_ISYM_MINLOC:
+	return walk_inline_intrinsic_minmaxloc (ss, expr);
+
       default:
 	gcc_unreachable ();
     }
@@ -11569,6 +11657,8 @@ gfc_add_intrinsic_ss_code (gfc_loopinfo * loop ATTRIBUTE_UNUSED, gfc_ss * ss)
     case GFC_ISYM_LBOUND:
     case GFC_ISYM_UCOBOUND:
     case GFC_ISYM_LCOBOUND:
+    case GFC_ISYM_MAXLOC:
+    case GFC_ISYM_MINLOC:
     case GFC_ISYM_THIS_IMAGE:
     case GFC_ISYM_SHAPE:
       break;
@@ -11660,16 +11750,14 @@ gfc_inline_intrinsic_function_p (gfc_expr *expr)
 	  return false;
 
 	gfc_actual_arglist *array_arg = expr->value.function.actual;
-	gfc_actual_arglist *dim_arg = array_arg->next;
 
 	gfc_expr *array = array_arg->expr;
-	gfc_expr *dim = dim_arg->expr;
 
 	if (!(array->ts.type == BT_INTEGER
 	      || array->ts.type == BT_REAL))
 	  return false;
 
-	if (array->rank == 1 && dim != nullptr)
+	if (array->rank == 1)
 	  return true;
 
 	return false;