diff mbox series

[openacc,og7,committed] Make reduction copy clauses 'private'

Message ID 4103cee4-003f-bfb1-a811-fd321a8669ad@mentor.com
State New
Headers show
Series [openacc,og7,committed] Make reduction copy clauses 'private' | expand

Commit Message

Chung-Lin Tang Sept. 5, 2017, 2:32 p.m. UTC
As we discussed, we are to support a behavior where within individual gangs,
worker/vector level reductions will correctly work with results immediately available.
This is on top of the implicit 'copy' clause added when we have loop reductions.

This patch adds a capability to mark map clauses additionally as 'private' (we may
be overloading this word a little too much :P), such that within offloaded regions
and wrt to our reduction lowering, the variable is (first)private, with additional
copy back appended at end of the offloaded region.

Care is taken to make sure this behavior is not applied when potential loop gang
reductions may happen (which this will not work).  In other cases, for gang-redundant
code, supposedly the multiple copy backs should all be the same, so the behavior
is same.

This is sort of a refinement of the implicit copy clause for reductions in PR70895.
A libgomp testcase is added to test the multiple worker-level reduction result case
across multiple gangs. Patch was tested and pushed to openacc-gcc-7-branch.

Chung-Lin
From 2dc21f336368889c1ebf031801a7613f65899ef1 Mon Sep 17 00:00:00 2001
From: Chung-Lin Tang <cltang@codesourcery.com>
Date: Tue, 5 Sep 2017 22:09:34 +0800
Subject: [PATCH] Add support for making maps 'private' inside offloaded
 regions.

2017-09-05  Chung-Lin Tang  <cltang@codesourcery.com>

	gcc/
	* tree.h (OMP_CLAUSE_MAP_PRIVATE): Define macro.
	* gimplify.c (enum gimplify_omp_var_data): Add GOVD_MAP_PRIVATE enum value.
	(omp_add_variable): Add GOVD_MAP_PRIVATE to reduction clause flags if
	not a gang-partitioned loop directive.
	(gimplify_adjust_omp_clauses_1): Set OMP_CLAUSE_MAP_PRIVATE of new map
	clause to 1 if GOVD_MAP_PRIVATE flag is present.
	* omp-low.c (lower_oacc_reductions): Handle map clauses with
	OMP_CLAUSE_MAP_PRIVATE set in same matter as firstprivate/private.
	(lower_omp_target): Likewise. Add copy back code for map clauses with
	OMP_CLAUSE_MAP_PRIVATE set.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/reduction-9.c: New test.
---
 gcc/ChangeLog.openacc                              | 14 ++++++++
 gcc/gimplify.c                                     | 34 ++++++++++++++++--
 gcc/omp-low.c                                      | 28 +++++++++++++--
 gcc/tree.h                                         |  3 ++
 libgomp/ChangeLog.openacc                          |  4 +++
 .../libgomp.oacc-c-c++-common/reduction-9.c        | 41 ++++++++++++++++++++++
 6 files changed, 119 insertions(+), 5 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c
diff mbox series

Patch

diff --git a/gcc/ChangeLog.openacc b/gcc/ChangeLog.openacc
index 4b1ce0b..23e19d9 100644
--- a/gcc/ChangeLog.openacc
+++ b/gcc/ChangeLog.openacc
@@ -1,3 +1,17 @@ 
+2017-09-05  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* tree.h (OMP_CLAUSE_MAP_PRIVATE): Define macro.
+	* gimplify.c (enum gimplify_omp_var_data): Add GOVD_MAP_PRIVATE enum value.
+	(omp_add_variable): Add GOVD_MAP_PRIVATE to reduction clause flags if
+	not a gang-partitioned loop directive.
+	(gimplify_adjust_omp_clauses_1): Set OMP_CLAUSE_MAP_PRIVATE of new map
+	clause to 1 if GOVD_MAP_PRIVATE flag is present.
+	* omp-low.c (lower_oacc_reductions): Handle map clauses with
+	OMP_CLAUSE_MAP_PRIVATE set in same matter as firstprivate/private.
+	(lower_omp_target): Likewise. Add copy back code for map clauses with
+	OMP_CLAUSE_MAP_PRIVATE set.
+	* tree.h (OMP_CLAUSE_MAP_PRIVATE): Define macro.
+
 2017-08-11  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* config/nvptx/nvptx.c (PTX_GANG_DEFAULT): Delete define.
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index e481a72..2c10c64 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -102,6 +102,9 @@  enum gimplify_omp_var_data
   /* Flag for GOVD_MAP: must be present already.  */
   GOVD_MAP_FORCE_PRESENT = 524288,
 
+  /* Flag for GOVD_MAP, copy to/from private storage inside offloaded region.  */
+  GOVD_MAP_PRIVATE = 1048576,
+
   GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE
 			   | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR
 			   | GOVD_LOCAL)
@@ -6717,6 +6720,21 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
   if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
     {
       struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
+
+      bool gang = false, worker = false, vector = false;
+      for (tree c = ctx->clauses; c; c = OMP_CLAUSE_CHAIN (c))
+	{
+	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_GANG)
+	    gang = true;
+	  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WORKER)
+	    worker = true;
+	  else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR)
+	    vector = true;
+	}
+
+      /* Set new copy map as 'private' if sure we're not gang-partitioning.  */
+      bool map_private = !gang && (worker || vector);
+
       while (outer_ctx)
 	{
 	  n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
@@ -6738,12 +6756,21 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
 		  /* Remove firstprivate and make it a copy map.  */
 		  n->value &= ~GOVD_FIRSTPRIVATE;
 		  n->value |= GOVD_MAP;
+
+		  /* If not gang-partitioned, add MAP_PRIVATE on the map
+		     clause.  */
+		  if (map_private)
+		    n->value |= GOVD_MAP_PRIVATE;
 		}
 	    }
 	  else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
 	    {
-	      splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl,
-				 GOVD_MAP | GOVD_SEEN);
+	      unsigned f = GOVD_MAP | GOVD_SEEN;
+
+	      /* If not gang-partitioned, add MAP_PRIVATE on the map clause.  */
+	      if (map_private)
+		f |= GOVD_MAP_PRIVATE;
+	      splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl, f);
 	      break;
 	    }
 	  outer_ctx = outer_ctx->outer_context;
@@ -8867,6 +8894,9 @@  gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data)
 	  gcc_unreachable ();
 	}
       OMP_CLAUSE_SET_MAP_KIND (clause, kind);
+      if ((flags & GOVD_MAP_PRIVATE)
+	  && TREE_CODE (OMP_CLAUSE_DECL (clause)) == VAR_DECL)
+	OMP_CLAUSE_MAP_PRIVATE (clause) = 1;
       tree c2 = gomp_needs_data_present (decl);
       /* Handle OpenACC pointers that were declared inside acc data
 	 regions.  */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f45c5c3..e790f0f 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5220,7 +5220,9 @@  lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 		      goto has_outer_reduction;
 		    }
 		  else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE
-			    || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE)
+			    || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE
+			    || (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_MAP
+				&& OMP_CLAUSE_MAP_PRIVATE (cls)))
 			   && orig == OMP_CLAUSE_DECL (cls))
 		    {
 		      is_private = true;
@@ -8120,7 +8122,10 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		&& TREE_CODE (var_type) == ARRAY_TYPE
 		&& !oacc_firstprivate_int)
 	      x = build_simple_mem_ref (x);
-	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
+	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
+		|| (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+		    && (OMP_CLAUSE_MAP_KIND (c) & GOMP_MAP_TO)
+		    && OMP_CLAUSE_MAP_PRIVATE (c)))
 	      {
 		gcc_assert (is_gimple_omp_oacc (ctx->stmt));
 		if (oacc_firstprivate_int)
@@ -9054,7 +9059,24 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       gimple_seq_add_seq (&new_body, join_seq);
 
       if (offloaded)
-	new_body = maybe_catch_exception (new_body);
+	{
+	  /* For OMP_CLAUSE_MAP_PRIVATE maps, add a copy back from private
+	     storage to receiver ref, for copying back to host.  */
+	  for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
+	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+		&& (OMP_CLAUSE_MAP_KIND (c) & GOMP_MAP_FROM)
+		&& OMP_CLAUSE_MAP_PRIVATE (c))
+	      {
+		tree var = OMP_CLAUSE_DECL (c);
+		tree new_var = lookup_decl (var, ctx);
+		tree x = build_receiver_ref (var, true, ctx);
+		gimple_seq seq = NULL;
+		gimplify_assign (x, new_var, &seq);
+		gimple_seq_add_seq (&new_body, seq);
+	      }
+
+	  new_body = maybe_catch_exception (new_body);
+	}
 
       gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
       gimple_omp_set_body (stmt, new_body);
diff --git a/gcc/tree.h b/gcc/tree.h
index a92ea11..cfe0ee2 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1554,6 +1554,9 @@  extern void protected_set_expr_location (tree, location_t);
 /* Nonzero if this map clause is for an ACC parallel reduction variable.  */
 #define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \
   TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
+/* Nozero if this map is loaded to private storage inside offloaded region.  */
+#define OMP_CLAUSE_MAP_PRIVATE(NODE) \
+  TREE_NOTHROW (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP))
 
 #define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)
diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc
index 74681f2..cd1f3ab 100644
--- a/libgomp/ChangeLog.openacc
+++ b/libgomp/ChangeLog.openacc
@@ -1,3 +1,7 @@ 
+2017-09-05  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* testsuite/libgomp.oacc-c-c++-common/reduction-9.c: New test.
+
 2017-08-11  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* plugin/plugin-nvptx.c (nvptx_exec): Dynamically allocate
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c
new file mode 100644
index 0000000..d6e02fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c
@@ -0,0 +1,41 @@ 
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+#define N 100
+  int n = N;
+  int i, j, tmp;
+  int input[N*N], output[N], houtput[N];
+
+  for (i = 0; i < n * n; i++)
+    input[i] = i;
+
+  for (i = 0; i < n; i++)
+    {
+      tmp = 0;
+      for (j = 0; j < n; j++)
+	tmp += input[i * n + j];
+      houtput[i] = tmp;
+    }
+  
+  #pragma acc parallel loop gang
+  for (i = 0; i < n; i++)
+    {
+      tmp = 0;
+
+      #pragma acc loop worker reduction(+:tmp)
+      for (j = 0; j < n; j++)
+	tmp += input[i * n + j];
+
+      output[i] = tmp;
+    }
+
+  /* Test if every worker-level reduction had correct private result.  */
+  for (i = 0; i < n; i++)
+    if (houtput[i] != output[i])
+      abort ();
+
+  return 0;
+}