@@ -39,4 +39,4 @@ int main1 ()
which is folded by vectorizer. Both outgoing edges must have probability
100% so the resulting profile match after folding. */
/* { dg-final { scan-tree-dump-times "Invalid sum of outgoing probabilities 200.0" 1 "ifcvt" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 2 "ifcvt" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum of incoming counts" 1 "ifcvt" } } */
new file mode 100644
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized" } */
+void foo ();
+void test(int v, int q)
+{
+ for (int i = 0; i < 10 && v/q; i++)
+ foo ();
+}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized"} } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized" } */
+void foo ();
+void test()
+{
+ for (int i = 0; i < 10; i++)
+ foo ();
+}
+/* We should figure out that after header dulication loop iterates 9 times. */
+/* { dg-final { scan-tree-dump "90.00" "ch2"} } */
+/* { dg-final { scan-tree-dump "10.00" "ch2"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized"} } */
@@ -6658,13 +6658,17 @@ add_phi_args_after_copy (basic_block *region_copy, unsigned n_region,
blocks are stored to REGION_COPY in the same order as they had in REGION,
provided that REGION_COPY is not NULL.
The function returns false if it is unable to copy the region,
- true otherwise. */
+ true otherwise.
+
+ ELIMINATED_EDGE is an edge that is known to be removed in the dupicated
+ region. */
bool
gimple_duplicate_sese_region (edge entry, edge exit,
- basic_block *region, unsigned n_region,
- basic_block *region_copy,
- bool update_dominance)
+ basic_block *region, unsigned n_region,
+ basic_block *region_copy,
+ bool update_dominance,
+ edge eliminated_edge)
{
unsigned i;
bool free_region_copy = false, copying_header = false;
@@ -6743,11 +6747,92 @@ gimple_duplicate_sese_region (edge entry, edge exit,
split_edge_bb_loc (entry), update_dominance);
if (total_count.initialized_p () && entry_count.initialized_p ())
{
- scale_bbs_frequencies_profile_count (region, n_region,
- total_count - entry_count,
- total_count);
- scale_bbs_frequencies_profile_count (region_copy, n_region, entry_count,
- total_count);
+ if (!eliminated_edge)
+ {
+ scale_bbs_frequencies_profile_count (region, n_region,
+ total_count - entry_count,
+ total_count);
+ scale_bbs_frequencies_profile_count (region_copy, n_region,
+ entry_count, total_count);
+ }
+ else
+ {
+ /* We only support only case where eliminated_edge is one and it
+ exists first BB. We also assume that the duplicated region is
+ acyclic. So we expect the following:
+
+ // region_copy_start entry will be scaled to entry_count
+ if (cond1) <- this condition will become false
+ and we update probabilities
+ goto loop_exit;
+ if (cond2)
+ goto loop_exit;
+ goto loop_header <- this will be redirected to loop.
+ // region_copy_end
+ loop:
+ <body>
+ // region start
+ loop_header:
+ if (cond1) <- we need to update probabbility here
+ goto loop_exit;
+ if (cond2) <- and determine scaling factor here.
+ goto loop_exit;
+ else
+ goto loop;
+ // region end
+
+ Adding support for more exits can be done similarly,
+ but only consumer so far is tree-ssa-loop-ch and it uses only this
+ to handle the common case of peeling headers which have
+ conditionals known to be always true upon entry. */
+ gcc_assert (eliminated_edge->src == region[0]
+ && EDGE_COUNT (region[0]->succs) == 2
+ && copying_header);
+
+ edge e, e_copy, eliminated_edge_copy;
+ if (EDGE_SUCC (region[0], 0) == eliminated_edge)
+ {
+ e = EDGE_SUCC (region[0], 1);
+ e_copy = EDGE_SUCC (region_copy[0], 1);
+ eliminated_edge_copy = EDGE_SUCC (region_copy[0], 0);
+ }
+ else
+ {
+ e = EDGE_SUCC (region[0], 0);
+ e_copy = EDGE_SUCC (region_copy[0], 0);
+ eliminated_edge_copy = EDGE_SUCC (region_copy[0], 1);
+ }
+ gcc_checking_assert (e != e_copy
+ && eliminated_edge_copy != eliminated_edge
+ && eliminated_edge_copy->dest
+ == eliminated_edge->dest);
+
+
+ /* Handle first basic block in duplicated region as in the
+ non-eliminating case. */
+ scale_bbs_frequencies_profile_count (region_copy, n_region,
+ entry_count, total_count);
+ /* Now update redirecting eliminated edge to the other edge.
+ Actual CFG update is done by caller. */
+ e_copy->probability = profile_probability::always ();
+ eliminated_edge_copy->probability = profile_probability::never ();
+ /* Header copying is a special case of jump threading, so use
+ common code to update loop body exit condition. */
+ update_bb_profile_for_threading (region[0], e_copy->count (), e);
+ /* If we duplicated more conditionals first scale the profile of
+ rest of the preheader. Then work out the probability of
+ entering the loop and scale rest of the loop. */
+ if (n_region > 1)
+ {
+ scale_bbs_frequencies_profile_count (region_copy + 1,
+ n_region - 1,
+ e_copy->count (),
+ region_copy[1]->count);
+ scale_bbs_frequencies_profile_count (region + 1, n_region - 1,
+ e->count (),
+ region[1]->count);
+ }
+ }
}
if (copying_header)
@@ -70,7 +70,7 @@ extern void add_phi_args_after_copy_bb (basic_block);
extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
extern basic_block split_edge_bb_loc (edge);
extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
- basic_block *, bool);
+ basic_block *, bool, edge);
extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
basic_block *);
extern void gather_blocks_in_sese_region (basic_block entry, basic_block exit,
@@ -59,17 +59,18 @@ edge_range_query (irange &r, edge e, gcond *cond, gimple_ranger &ranger)
r.set_varying (boolean_type_node);
}
-/* Return true if the condition on the first iteration of the loop can
- be statically determined. */
+/* Return edge that is true in the first iteration of the loop
+ and NULL otherwise. */
-static bool
-entry_loop_condition_is_static (class loop *l, gimple_ranger *ranger)
+static edge
+static_loop_exit (class loop *l, gimple_ranger *ranger)
{
edge e = loop_preheader_edge (l);
gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (e->dest));
+ edge ret_e;
if (!last)
- return false;
+ return NULL;
edge true_e, false_e;
extract_true_false_edges_from_block (e->dest, &true_e, &false_e);
@@ -77,17 +78,23 @@ entry_loop_condition_is_static (class loop *l, gimple_ranger *ranger)
/* If neither edge is the exit edge, this is not a case we'd like to
special-case. */
if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e))
- return false;
+ return NULL;
int_range<1> desired_static_range;
if (loop_exit_edge_p (l, true_e))
- desired_static_range = range_false ();
+ {
+ desired_static_range = range_false ();
+ ret_e = true_e;
+ }
else
+ {
desired_static_range = range_true ();
+ ret_e = false_e;
+ }
int_range<2> r;
edge_range_query (r, e, last, *ranger);
- return r == desired_static_range;
+ return r == desired_static_range ? ret_e : NULL;
}
/* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
@@ -394,7 +401,8 @@ ch_base::copy_headers (function *fun)
copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
bbs_size = n_basic_blocks_for_fn (fun);
- auto_vec<loop_p> candidates;
+ struct candidate {class loop *loop; edge static_exit;};
+ auto_vec<struct candidate> candidates;
auto_vec<std::pair<edge, loop_p> > copied;
auto_vec<class loop *> loops_to_unloop;
auto_vec<int> loops_to_unloop_nunroll;
@@ -427,12 +435,14 @@ ch_base::copy_headers (function *fun)
|| !process_loop_p (loop))
continue;
+ edge static_exit = static_loop_exit (loop, ranger);
+
/* Avoid loop header copying when optimizing for size unless we can
determine that the loop condition is static in the first
iteration. */
if (optimize_loop_for_size_p (loop)
&& !loop->force_vectorize
- && !entry_loop_condition_is_static (loop, ranger))
+ && !static_exit)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
@@ -442,13 +452,14 @@ ch_base::copy_headers (function *fun)
}
if (should_duplicate_loop_header_p (header, loop, &remaining_limit))
- candidates.safe_push (loop);
+ candidates.safe_push ({loop, static_exit});
}
/* Do not use ranger after we change the IL and not have updated SSA. */
delete ranger;
- for (auto loop : candidates)
+ for (auto candidate : candidates)
{
+ class loop *loop = candidate.loop;
int initial_limit = param_max_loop_header_insns;
int remaining_limit = initial_limit;
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -501,11 +512,17 @@ ch_base::copy_headers (function *fun)
continue;
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "Duplicating header of the loop %d up to edge %d->%d,"
- " %i insns.\n",
- loop->num, exit->src->index, exit->dest->index,
- initial_limit - remaining_limit);
+ {
+ fprintf (dump_file,
+ "Duplicating header of the loop %d up to edge %d->%d,"
+ " %i insns.\n",
+ loop->num, exit->src->index, exit->dest->index,
+ initial_limit - remaining_limit);
+ if (candidate.static_exit)
+ fprintf (dump_file,
+ " Will eliminate peeled conditional in bb %d.\n",
+ candidate.static_exit->src->index);
+ }
/* Ensure that the header will have just the latch as a predecessor
inside the loop. */
@@ -519,7 +536,7 @@ ch_base::copy_headers (function *fun)
propagate_threaded_block_debug_into (exit->dest, entry->dest);
if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs,
- true))
+ true, candidate.static_exit))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Duplication failed.\n");