diff mbox

[3/5] Handle original loop tree in expand_omp_for_generic

Message ID 87si5el9s9.fsf@kepler.schwinge.homeip.net
State New
Headers show

Commit Message

Thomas Schwinge Oct. 13, 2015, 9:48 p.m. UTC
Hi Tom!

On Mon, 12 Oct 2015 18:56:29 +0200, Tom de Vries <Tom_deVries@mentor.com> wrote:
> Handle original loop tree in expand_omp_for_generic
> 
> 2015-09-12  Tom de Vries  <tom@codesourcery.com>
> 
> 	PR tree-optimization/67476
> 	* omp-low.c (expand_omp_for_generic): Handle original loop tree.

Working on a merge from trunk into gomp-4_0-branch, I'm seeing your
change (trunk r228754) conflict with code Chung-Lin changed
(gomp-4_0-branch r224505).  So, would you two please cherry-pick/merge
trunk r228754 into gomp-4_0-branch?  Thanks!  (I'm assuming you can
easily tell what needs to be done here; it's been a long time that
Chung-Lin touched this code, so CCing him just in case.)  Thanks!


Chung-Lin's gomp-4_0-branch r224505:

commit 5f9849b7f0723d06fcd18a18e0880d4df75da92a
Author: cltang <cltang@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Jun 16 08:59:01 2015 +0000

    2015-06-16  Chung-Lin Tang  <cltang@codesourcery.com>
    
    	* omp-low.c (struct omp_region): Add inside_kernels_p field.
    	(expand_omp_for_generic): Adjust to generate a 'sequential' loop
    	when GOMP builtin arguments are BUILT_IN_NONE.
    	(expand_omp_for): Use expand_omp_for_generic() to generate a
    	non-parallelized loop for OMP_FORs inside OpenACC kernels regions.
    	(expand_omp): Mark inside_kernels_p field true for regions
    	nested inside OpenACC kernels constructs.
    
    
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@224505 138bc75d-0d04-0410-961f-82ee72b054a4



The merge conflict looks as follows:

      set_immediate_dominator (CDI_DOMINATORS, l1_bb,
			       recompute_dominator (CDI_DOMINATORS, l1_bb));

<<<<<<< HEAD
      struct loop *outer_loop;
      if (seq_loop)
	outer_loop = l0_bb->loop_father;
      else
	{
	  outer_loop = alloc_loop ();
	  outer_loop->header = l0_bb;
	  outer_loop->latch = l2_bb;
	  add_loop (outer_loop, l0_bb->loop_father);
	}
=======
      /* We enter expand_omp_for_generic with a loop.  This original loop may
	 have its own loop struct, or it may be part of an outer loop struct
	 (which may be the fake loop).  */
      struct loop *outer_loop = entry_bb->loop_father;
      bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
>>>>>>> e2c514f0507fb1864c4eed5d691e47156be57b5b

      add_bb_to_loop (l2_bb, outer_loop);

      /* We've added a new loop around the original loop.  Allocate the
	 corresponding loop struct.  */
      struct loop *new_loop = alloc_loop ();
      new_loop->header = l0_bb;
      new_loop->latch = l2_bb;
      add_loop (new_loop, outer_loop);

      /* Allocate a loop structure for the original loop unless we already
	 had one.  */
      if (!orig_loop_has_loop_struct
	  && !gimple_omp_for_combined_p (fd->for_stmt))
	{
	  struct loop *orig_loop = alloc_loop ();
	  orig_loop->header = l1_bb;
	  /* The loop may have multiple latches.  */
	  add_loop (orig_loop, new_loop);
	}
    }
}


Grüße,
 Thomas
diff mbox

Patch

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index be09b0f..6fa08da 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,13 @@ 
+2015-06-16  Chung-Lin Tang  <cltang@codesourcery.com>
+
+	* omp-low.c (struct omp_region): Add inside_kernels_p field.
+	(expand_omp_for_generic): Adjust to generate a 'sequential' loop
+	when GOMP builtin arguments are BUILT_IN_NONE.
+	(expand_omp_for): Use expand_omp_for_generic() to generate a
+	non-parallelized loop for OMP_FORs inside OpenACC kernels regions.
+	(expand_omp): Mark inside_kernels_p field true for regions
+	nested inside OpenACC kernels constructs.
+
 2015-06-15  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* omp-low.c (expand_omp_for_static_nochunk): Update entry_bb after
diff --git gcc/omp-low.c gcc/omp-low.c
index c7451c9..a3dab12 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -161,6 +161,9 @@  struct omp_region
   /* True if this is a combined parallel+workshare region.  */
   bool is_combined_parallel;
 
+  /* True if this is nested inside an OpenACC kernels construct.  */
+  bool inside_kernels_p;
+
   /* For an OpenACC loop, the level of parallelism requested.  */
   int gwv_this;
 
@@ -6862,6 +6865,7 @@  expand_omp_for_generic (struct omp_region *region,
   gassign *assign_stmt;
   bool in_combined_parallel = is_combined_parallel (region);
   bool broken_loop = region->cont == NULL;
+  bool seq_loop = (!start_fn || !next_fn);
   edge e, ne;
   tree *counts = NULL;
   int i;
@@ -6949,7 +6953,20 @@  expand_omp_for_generic (struct omp_region *region,
 							    zero_iter_bb));
 	}
     }
-  if (in_combined_parallel)
+  if (seq_loop)
+    {
+      tree n1 = fold_convert (fd->iter_type, fd->loop.n1);
+      tree n2 = fold_convert (fd->iter_type, fd->loop.n2);
+
+      assign_stmt = gimple_build_assign (istart0, n1);
+      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+
+      assign_stmt = gimple_build_assign (iend0, n2);
+      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+
+      t = fold_build2 (NE_EXPR, boolean_type_node, istart0, iend0);
+    }
+  else if (in_combined_parallel)
     {
       /* In a combined parallel loop, emit a call to
 	 GOMP_loop_foo_next.  */
@@ -7135,32 +7152,38 @@  expand_omp_for_generic (struct omp_region *region,
 	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
 
       /* Emit code to get the next parallel iteration in L2_BB.  */
-      gsi = gsi_start_bb (l2_bb);
+      if (!seq_loop)
+	{
+	  gsi = gsi_start_bb (l2_bb);
 
-      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
-			   build_fold_addr_expr (istart0),
-			   build_fold_addr_expr (iend0));
-      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-				    false, GSI_CONTINUE_LINKING);
-      if (TREE_TYPE (t) != boolean_type_node)
-	t = fold_build2 (NE_EXPR, boolean_type_node,
-			 t, build_int_cst (TREE_TYPE (t), 0));
-      gcond *cond_stmt = gimple_build_cond_empty (t);
-      gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	  t = build_call_expr (builtin_decl_explicit (next_fn), 2,
+			       build_fold_addr_expr (istart0),
+			       build_fold_addr_expr (iend0));
+	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+					false, GSI_CONTINUE_LINKING);
+	  if (TREE_TYPE (t) != boolean_type_node)
+	    t = fold_build2 (NE_EXPR, boolean_type_node,
+			     t, build_int_cst (TREE_TYPE (t), 0));
+	  gcond *cond_stmt = gimple_build_cond_empty (t);
+	  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
+	}
     }
 
   /* Add the loop cleanup function.  */
   gsi = gsi_last_bb (exit_bb);
-  if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
-    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
-  else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
-    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
-  else
-    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
-  gcall *call_stmt = gimple_build_call (t, 0);
-  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
-    gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
-  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
+  if (!seq_loop)
+    {
+      if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
+	t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
+      else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+	t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
+      else
+	t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
+      gcall *call_stmt = gimple_build_call (t, 0);
+      if (gimple_omp_return_lhs (gsi_stmt (gsi)))
+	gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
+      gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
+    }
   gsi_remove (&gsi, true);
 
   /* Connect the new blocks.  */
@@ -7172,7 +7195,7 @@  expand_omp_for_generic (struct omp_region *region,
       gimple_seq phis;
 
       e = find_edge (cont_bb, l3_bb);
-      ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
+      ne = make_edge (l2_bb, l3_bb, seq_loop ? EDGE_FALLTHRU : EDGE_FALSE_VALUE);
 
       phis = phi_nodes (l3_bb);
       for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -7208,7 +7231,8 @@  expand_omp_for_generic (struct omp_region *region,
 	  e = find_edge (cont_bb, l2_bb);
 	  e->flags = EDGE_FALLTHRU;
 	}
-      make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
+      if (!seq_loop)
+	make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
 
       set_immediate_dominator (CDI_DOMINATORS, l2_bb,
 			       recompute_dominator (CDI_DOMINATORS, l2_bb));
@@ -7219,10 +7243,16 @@  expand_omp_for_generic (struct omp_region *region,
       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
 
-      struct loop *outer_loop = alloc_loop ();
-      outer_loop->header = l0_bb;
-      outer_loop->latch = l2_bb;
-      add_loop (outer_loop, l0_bb->loop_father);
+      struct loop *outer_loop;
+      if (seq_loop)
+	outer_loop = l0_bb->loop_father;
+      else
+	{
+	  outer_loop = alloc_loop ();
+	  outer_loop->header = l0_bb;
+	  outer_loop->latch = l2_bb;
+	  add_loop (outer_loop, l0_bb->loop_father);
+	}
 
       if (!gimple_omp_for_combined_p (fd->for_stmt))
 	{
@@ -8704,7 +8734,10 @@  expand_omp_for (struct omp_region *region, gimple inner_stmt)
        original loops from being detected.  Fix that up.  */
     loops_state_set (LOOPS_NEED_FIXUP);
 
-  if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
+  if (region->inside_kernels_p)
+    expand_omp_for_generic (region, &fd, BUILT_IN_NONE, BUILT_IN_NONE,
+			    inner_stmt);
+  else if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
     expand_omp_simd (region, &fd);
   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
     expand_cilk_for (region, &fd);
@@ -10296,6 +10329,14 @@  expand_omp (struct omp_region *region)
       if (region->type == GIMPLE_OMP_PARALLEL)
 	determine_parallel_type (region);
 
+      if (region->type == GIMPLE_OMP_TARGET && region->inner)
+	{
+	  gomp_target *entry = as_a <gomp_target *> (last_stmt (region->entry));
+	  if (region->inside_kernels_p
+	      || gimple_omp_target_kind (entry) == GF_OMP_TARGET_KIND_OACC_KERNELS)
+	    region->inner->inside_kernels_p = true;
+	}
+
       if (region->type == GIMPLE_OMP_FOR
 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
 	inner_stmt = last_stmt (region->inner->entry);


Tom's trunk r228754:

commit 1c6a437bd44020c37452b7fb4f565f7e7f94d56b
Author: vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Tue Oct 13 10:08:40 2015 +0000

    Handle original loop tree in expand_omp_for_generic
    
    2015-10-13  Tom de Vries  <tom@codesourcery.com>
    
    	PR tree-optimization/67476
    	* omp-low.c (expand_omp_for_generic): Handle original loop tree.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228754 138bc75d-0d04-0410-961f-82ee72b054a4

diff --git gcc/ChangeLog gcc/ChangeLog
index e5ede0b..4632387 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,3 +1,8 @@ 
+2015-10-13  Tom de Vries  <tom@codesourcery.com>
+
+	PR tree-optimization/67476
+	* omp-low.c (expand_omp_for_generic): Handle original loop tree.
+
 2015-10-13  Richard Biener  <rguenther@suse.de>
 
 	* tree-vect-data-refs.c (vect_analyze_data_ref_dependences): Allocate
diff --git gcc/omp-low.c gcc/omp-low.c
index b2a93b9..7e894e4 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -6439,7 +6439,6 @@  expand_omp_for_generic (struct omp_region *region,
       remove_edge (e);
 
       make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
-      add_bb_to_loop (l2_bb, cont_bb->loop_father);
       e = find_edge (cont_bb, l1_bb);
       if (e == NULL)
 	{
@@ -6516,17 +6515,30 @@  expand_omp_for_generic (struct omp_region *region,
       set_immediate_dominator (CDI_DOMINATORS, l1_bb,
 			       recompute_dominator (CDI_DOMINATORS, l1_bb));
 
-      struct loop *outer_loop = alloc_loop ();
-      outer_loop->header = l0_bb;
-      outer_loop->latch = l2_bb;
-      add_loop (outer_loop, l0_bb->loop_father);
+      /* We enter expand_omp_for_generic with a loop.  This original loop may
+	 have its own loop struct, or it may be part of an outer loop struct
+	 (which may be the fake loop).  */
+      struct loop *outer_loop = entry_bb->loop_father;
+      bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
 
-      if (!gimple_omp_for_combined_p (fd->for_stmt))
+      add_bb_to_loop (l2_bb, outer_loop);
+
+      /* We've added a new loop around the original loop.  Allocate the
+	 corresponding loop struct.  */
+      struct loop *new_loop = alloc_loop ();
+      new_loop->header = l0_bb;
+      new_loop->latch = l2_bb;
+      add_loop (new_loop, outer_loop);
+
+      /* Allocate a loop structure for the original loop unless we already
+	 had one.  */
+      if (!orig_loop_has_loop_struct
+	  && !gimple_omp_for_combined_p (fd->for_stmt))
 	{
-	  struct loop *loop = alloc_loop ();
-	  loop->header = l1_bb;
+	  struct loop *orig_loop = alloc_loop ();
+	  orig_loop->header = l1_bb;
 	  /* The loop may have multiple latches.  */
-	  add_loop (loop, outer_loop);
+	  add_loop (orig_loop, new_loop);
 	}
     }
 }