diff mbox

[PR65637,3/3] Handle 2 preds for fin_bb in expand_omp_for_static_chunk

Message ID 552E6643.8090002@mentor.com
State New
Headers show

Commit Message

Tom de Vries April 15, 2015, 1:23 p.m. UTC
On 15-04-15 15:10, Tom de Vries wrote:
> Hi,
>
> This patch series fixes PR65637.
>
> Currently, ssa-handling code in expand_omp_for_static_chunk is dead and not
> exercised by testing.
>
> Ssa-handling code in omp-low.c is only triggered by pass_parallelize_loops, and
> that pass doesn't specify a chunk size on the GIMPLE_OMP_FOR it constructs, so
> that only exercises the expand_omp_for_static_nochunk path.
>
> Using the attached trigger patch, we excercise the ssa-handling code in
> expand_omp_for_static_chunk. The following patch series fixes the problems in
> the ssa-handling code that we encounter.
>
> 1. Fix gcc_assert in expand_omp_for_static_chunk
> 2. Fix inner loop phi in expand_omp_for_static_chunk
> 3. Handle 2 preds for fin_bb in expand_omp_for_static_chunk
>
> The patch series has been bootstrapped and reg-tested on x86_64 together with
> attached trigger patch.
>
> I'll post the patches from the patch series individually, in response to this
> email.
>

This patch fixes compilation of autopar/reduc-3.c in
expand_omp_for_static_chunk.

We encounter two situations in expand_omp_for_static_chunk:
1. single_pred_p (fin_bb)
    This situation happens for f.i. autopar-1.c, which uses a compile-time
    constant loop bound.
2. !single_pred_p (fin_bb)
    This situation happens for autopar/reduc-3.c, which uses a compile-time
    unknown loop bound.

The two situations are represented as control flow graphs here:
...
1.
   x
   |
   |
   *
entry_bb
   |
   |
   *
iter_part_bb --* seq_start_bb
   |        *         |
   |         \       ...
   *          \       |
fin_bb        \      *
   |            -- trip_update_bb
   |
   *
   x

2.
   x
   |
   |
   *
region.entry --* entry_bb
   |                |
   |                |
   *                *
fin_bb   *--   iter_part_bb --* seq_start_bb
   |                       *        |
   |                        \      ...
   *                         \      |
   x                          \     *
			      -- trip_update_bb
...

This patch handles the !single_pred_p (fin_bb) scenario, while keeping the
single_pred_p (fin_bb) scenario undisturbed.

With the patch, the resulting split-off function looks like this:
...
main1._loopfn.0 (voidD.41 * .paral_data_paramD.2498)
{
;;   basic block 2, loop depth 0, count 0, freq 79, maybe hot
;;    prev block 0, next block 3, flags: (NEW, REACHABLE)
;;    pred:       ENTRY (FALLTHRU)
   .paral_data_param_2 = .paral_data_param_1(D);
   .paral_data_load.12_3 = (struct  *) .paral_data_param_2;
   # VUSE <.MEM_33(D)>
   _4 = .paral_data_load.12_3->D.2490;
   # VUSE <.MEM_33(D)>
   ub_5 = .paral_data_load.12_3->ubD.2491;
   # VUSE <.MEM_33(D)>
   uc_6 = .paral_data_load.12_3->ucD.2492;
   if (0 < _4)
     goto <bb 4>;
   else
     goto <bb 3>;
;;    succ:       4 [100.0%]  (TRUE_VALUE)
;;                3 [0.0%]  (FALSE_VALUE)

;;   basic block 3, loop depth 0, count 0, freq 0, maybe hot
;;    prev block 2, next block 4, flags: (NEW, REACHABLE)
;;    pred:       2 [0.0%]  (FALSE_VALUE)
;;                5 (FALSE_VALUE)
   # udiff.8_7 = PHI <0(2), udiff.8_8(5)>
   _9 = &.paral_data_load.12_3->udiff.8D.2493;
   # .MEM_34 = VDEF <.MEM_33(D)>
   # USE = anything
   # CLB = anything
   __atomic_fetch_add_4D.1247 (_9, udiff.8_7, 0);
   # VUSE <.MEM_34>
   return;
;;    succ:       EXIT

;;   basic block 4, loop depth 0, count 0, freq 79, maybe hot
;;    prev block 3, next block 5, flags: (NEW, REACHABLE)
;;    pred:       2 [100.0%]  (TRUE_VALUE)
   _10 = omp_get_num_threadsD.1287 ();
   _11 = (unsigned int) _10;
   _12 = omp_get_thread_numD.1286 ();
   _13 = (unsigned int) _12;
   .trip.13_14 = 0;
;;    succ:       5 [100.0%]  (FALLTHRU)

;;   basic block 5, loop depth 1, count 0, freq 79, maybe hot
;;    prev block 4, next block 6, flags: (NEW, REACHABLE)
;;    pred:       4 [100.0%]  (FALLTHRU)
;;                8 [100.0%]  (FALLTHRU)
   # udiff.8_8 = PHI <0(4), udiff.8_15(8)>
   # .trip.13_16 = PHI <.trip.13_14(4), .trip.13_17(8)>
   _18 = _11 * .trip.13_16;
   _19 = _13 + _18;
   _20 = _19 + 1;
   _21 = MIN_EXPR <_4, _20>;
   if (_19 < _4)
     goto <bb 6>;
   else
     goto <bb 3>;
;;    succ:       6 [100.0%]  (TRUE_VALUE)
;;                3 (FALSE_VALUE)

;;   basic block 6, loop depth 1, count 0, freq 79, maybe hot
;;    prev block 5, next block 7, flags: (NEW, REACHABLE)
;;    pred:       5 [100.0%]  (TRUE_VALUE)
   ivtmp_22 = _19;
;;    succ:       7 [100.0%]  (FALLTHRU)

;;   basic block 7, loop depth 2, count 0, freq 7920, maybe hot
;;    prev block 6, next block 8, flags: (NEW, REACHABLE)
;;    pred:       6 [100.0%]  (FALLTHRU)
;;                7 [100.0%]  (TRUE_VALUE)
   # udiff.8_23 = PHI <udiff.8_8(6), udiff.8_15(7)>
   # ivtmp_24 = PHI <ivtmp_22(6), ivtmp_25(7)>
   i.9_28 = (intD.6) ivtmp_24;
   # VUSE <.MEM_33(D)>
   _29 = *ub_5[i.9_28];
   # VUSE <.MEM_33(D)>
   _30 = *uc_6[i.9_28];
   _31 = _29 - _30;
   udiff.8_15 = udiff.8_23 + _31;
   i.9_32 = i.9_28 + 1;
   ivtmp_25 = ivtmp_24 + 1;
   if (ivtmp_25 < _21)
     goto <bb 7>;
   else
     goto <bb 8>;
;;    succ:       7 [100.0%]  (TRUE_VALUE)
;;                8 (FALSE_VALUE)

;;   basic block 8, loop depth 1, count 0, freq 0, maybe hot
;;    prev block 7, next block 1, flags: (NEW, REACHABLE)
;;    pred:       7 (FALSE_VALUE)
   .trip.13_17 = .trip.13_16 + 1;
   goto <bb 5>;
;;    succ:       5 [100.0%]  (FALLTHRU)

}
...

OK for trunk?

Thanks,
- Tom
diff mbox

Patch

Handle 2 preds for fin_bb in expand_omp_for_static_chunk

2015-04-15  Tom de Vries  <tom@codesourcery.com>

	PR tree-optimization/65637
	* omp-low.c (expand_omp_for_static_chunk): Handle case that fin_bb has 2
	predecessors.

---
 gcc/omp-low.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 62cbed0..6d7d82d 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6991,7 +6991,7 @@  expand_omp_for_static_chunk (struct omp_region *region,
       se->probability = REG_BR_PROB_BASE / 2000 - 1;
       if (gimple_in_ssa_p (cfun))
 	{
-	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
+	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
 	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
 	       !gsi_end_p (gpi); gsi_next (&gpi))
 	    {
@@ -7262,7 +7262,7 @@  expand_omp_for_static_chunk (struct omp_region *region,
       /* When we redirect the edge from trip_update_bb to iter_part_bb, we
 	 remove arguments of the phi nodes in fin_bb.  We need to create
 	 appropriate phi nodes in iter_part_bb instead.  */
-      se = single_pred_edge (fin_bb);
+      se = find_edge (iter_part_bb, fin_bb);
       re = single_succ_edge (trip_update_bb);
       vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
       ene = single_succ_edge (entry_bb);
@@ -7277,6 +7277,10 @@  expand_omp_for_static_chunk (struct omp_region *region,
 	  phi = psi.phi ();
 	  t = gimple_phi_result (phi);
 	  gcc_assert (t == redirect_edge_var_map_result (vm));
+
+	  if (!single_pred_p (fin_bb))
+	    t = copy_ssa_name (t, phi);
+
 	  nphi = create_phi_node (t, iter_part_bb);
 
 	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
@@ -7296,16 +7300,20 @@  expand_omp_for_static_chunk (struct omp_region *region,
 	  gcc_assert (inner_loop_phi != NULL);
 	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
 		       find_edge (seq_start_bb, body_bb), locus);
+
+	  if (!single_pred_p (fin_bb))
+	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
 	}
       gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
       redirect_edge_var_map_clear (re);
-      while (1)
-	{
-	  psi = gsi_start_phis (fin_bb);
-	  if (gsi_end_p (psi))
-	    break;
-	  remove_phi_node (&psi, false);
-	}
+      if (single_pred_p (fin_bb))
+	while (1)
+	  {
+	    psi = gsi_start_phis (fin_bb);
+	    if (gsi_end_p (psi))
+	      break;
+	    remove_phi_node (&psi, false);
+	  }
 
       /* Make phi node for trip.  */
       phi = create_phi_node (trip_main, iter_part_bb);
-- 
1.9.1