@@ -499,6 +499,32 @@ scale_loop_frequencies (class loop *loop, profile_probability p)
free (bbs);
}
+/* Scales the frequencies of all basic blocks in LOOP that are strictly
+ dominated by BB by NUM/DEN. */
+
+void
+scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+ profile_count num, profile_count den)
+{
+ basic_block son;
+
+ if (!den.nonzero_p () && !(num == profile_count::zero ()))
+ return;
+ auto_vec <basic_block, 8> worklist;
+ worklist.safe_push (bb);
+
+ while (!worklist.is_empty ())
+ for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
+ son;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ {
+ if (!flow_bb_inside_loop_p (loop, son))
+ continue;
+ son->count = son->count.apply_scale (num, den);
+ worklist.safe_push (son);
+ }
+}
+
/* Scale profile in LOOP by P.
If ITERATION_BOUND is not -1, scale even further if loop is predicted
to iterate too many times.
@@ -649,19 +675,9 @@ scale_loop_profile (class loop *loop, profile_probability p,
if (other_edge && other_edge->dest == loop->latch)
loop->latch->count -= new_exit_count - old_exit_count;
else
- {
- basic_block *body = get_loop_body (loop);
- profile_count new_count = exit_edge->src->count - new_exit_count;
- profile_count old_count = exit_edge->src->count - old_exit_count;
-
- for (unsigned int i = 0; i < loop->num_nodes; i++)
- if (body[i] != exit_edge->src
- && dominated_by_p (CDI_DOMINATORS, body[i], exit_edge->src))
- body[i]->count = body[i]->count.apply_scale (new_count,
- old_count);
-
- free (body);
- }
+ scale_dominated_blocks_in_loop (loop, exit_edge->src,
+ exit_edge->src->count - new_exit_count,
+ exit_edge->src->count - old_exit_count);
}
else if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -1237,6 +1253,7 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e,
should've managed the flags so all except for original loop
has won't exist set. */
scale_act = wanted_count.probability_in (count_in);
+
/* Now simulate the duplication adjustments and compute header
frequency of the last copy. */
for (i = 0; i < ndupl; i++)
@@ -1252,16 +1269,21 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e,
profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
? prob_pass_wont_exit
: prob_pass_thru;
- profile_probability p = prob_pass_main;
- profile_count scale_main_den = count_in;
- for (i = 0; i < ndupl; i++)
+ if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
{
- scale_main_den += count_in.apply_probability (p);
- p = p * scale_step[i];
+ profile_probability p = prob_pass_main;
+ profile_count scale_main_den = count_in;
+ for (i = 0; i < ndupl; i++)
+ {
+ scale_main_den += count_in.apply_probability (p);
+ p = p * scale_step[i];
+ }
+ /* If original loop is executed COUNT_IN times, the unrolled
+ loop will account SCALE_MAIN_DEN times. */
+ scale_main = count_in.probability_in (scale_main_den);
}
- /* If original loop is executed COUNT_IN times, the unrolled
- loop will account SCALE_MAIN_DEN times. */
- scale_main = count_in.probability_in (scale_main_den);
+ else
+ scale_main = profile_probability::always ();
scale_act = scale_main * prob_pass_main;
}
else
@@ -32,6 +32,8 @@ enum
field of newly create BB. */
#define DLTHE_FLAG_COMPLETTE_PEEL 4 /* Update frequencies expecting
a complete peeling. */
+#define DLTHE_FLAG_FLAT_PROFILE 8 /* Profile is flat; do not reduce
+ count by unroll factor. */
extern edge mfb_kj_edge;
extern bool remove_path (edge, bool * = NULL, bitmap = NULL);
@@ -64,5 +66,7 @@ class loop * loop_version (class loop *, void *,
profile_probability, profile_probability,
profile_probability, profile_probability, bool);
void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise);
+void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+ profile_count num, profile_count den);
#endif /* GCC_CFGLOOPMANIP_H */
@@ -790,7 +790,7 @@ dump_prediction (FILE *file, enum br_predictor predictor, int probability,
{
fprintf (file, " exec ");
bb->count.dump (file);
- if (e)
+ if (e && e->count ().initialized_p () && bb->count.to_gcov_type ())
{
fprintf (file, " hit ");
e->count ().dump (file);
@@ -4634,43 +4634,6 @@ force_edge_cold (edge e, bool impossible)
}
}
-/* Change E's probability to NEW_E_PROB, redistributing the probabilities
- of other outgoing edges proportionally.
-
- Note that this function does not change the profile counts of any
- basic blocks. The caller must do that instead, using whatever
- information it has about the region that needs updating. */
-
-void
-change_edge_frequency (edge e, profile_probability new_e_prob)
-{
- profile_probability old_e_prob = e->probability;
- profile_probability old_other_prob = old_e_prob.invert ();
- profile_probability new_other_prob = new_e_prob.invert ();
-
- e->probability = new_e_prob;
- profile_probability cumulative_prob = new_e_prob;
-
- unsigned int num_other = EDGE_COUNT (e->src->succs) - 1;
- edge other_e;
- edge_iterator ei;
- FOR_EACH_EDGE (other_e, ei, e->src->succs)
- if (other_e != e)
- {
- num_other -= 1;
- if (num_other == 0)
- /* Ensure that the probabilities add up to 1 without
- rounding error. */
- other_e->probability = cumulative_prob.invert ();
- else
- {
- other_e->probability /= old_other_prob;
- other_e->probability *= new_other_prob;
- cumulative_prob += other_e->probability;
- }
- }
-}
-
#if CHECKING_P
namespace selftest {
@@ -100,7 +100,6 @@ extern void rebuild_frequencies (void);
extern void report_predictor_hitrates (void);
extern void force_edge_cold (edge, bool);
extern void propagate_unlikely_bbs_forward (void);
-extern void change_edge_frequency (edge, profile_probability);
extern void add_reg_br_prob_note (rtx_insn *, profile_probability);
@@ -1,4 +1,4 @@
-/* { dg-options "-Wall -Wextra -O2 -fno-toplevel-reorder -fno-tree-ch -fno-tree-dce -fno-tree-dominator-opts -fno-tree-dse -fno-tree-loop-ivcanon -fpredictive-commoning" } */
+/* { dg-options "-Wall -Wextra -O2 -fno-toplevel-reorder -fno-tree-ch -fno-tree-dce -fno-tree-dominator-opts -fno-tree-dse -fno-tree-loop-ivcanon -fpredictive-commoning -fdump-tree-pcom-details-blocks -fdump-tree-lim-details-blocks" } */
short a, b;
int c[9];
@@ -12,3 +12,5 @@ void e() {
}
}
int main() {return 0;}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "lim2" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fpredictive-commoning -fno-tree-loop-im" } */
+/* { dg-options "-O1 -fpredictive-commoning -fno-tree-loop-im -fdump-tree-pcom-details-blocks" } */
int bl;
@@ -17,3 +17,4 @@ ie (void)
ie ();
}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-do run } */
-/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
void abort (void);
@@ -47,3 +47,4 @@ int main(void)
/* Also check that we undid the transformation previously made by PRE. */
/* { dg-final { scan-tree-dump-times "looparound ref" 1 "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details -fno-tree-pre" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks -fno-tree-pre" } */
/* { dg-additional-options "-fno-tree-vectorize" { target amdgcn-*-* } } */
void abort (void);
@@ -44,3 +44,4 @@ int main(void)
/* Verify that both loops were transformed and unrolled. */
/* { dg-final { scan-tree-dump-times "Unrolling 2 times." 2 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details -fno-tree-pre -fno-tree-loop-vectorize" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks -fno-tree-pre -fno-tree-loop-vectorize" } */
int a[1000], b[1000];
@@ -13,3 +13,4 @@ void test(void)
/* Verify that we used 3 temporary variables for the loop. */
/* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
/* Test for predictive commoning of expressions, without reassociation. */
@@ -26,3 +26,4 @@ int main(void)
/* { dg-final { scan-tree-dump-times "Combination" 1 "pcom"} } */
/* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
/* Test for predictive commoning of expressions, with reassociation. */
@@ -26,3 +26,4 @@ int main(void)
/* { dg-final { scan-tree-dump-times "Combination" 2 "pcom"} } */
/* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O3 -fdump-tree-pcom-details" } */
+/* { dg-options "-O3 -fdump-tree-pcom-details-blocks" } */
int b, f, d[5][2];
unsigned int c;
@@ -15,3 +15,7 @@ main ()
}
/* { dg-final { scan-tree-dump "Executing predictive commoning" "pcom" } } */
+/* dom pass introduces one mismatch after simplfying mispredicted conditional
+ on c being non-zero on first iteration. This happens since c is global variable
+ and needs alias analysis. */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 1 "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-pcom-details" } */
+/* { dg-options "-O3 -fdump-tree-pcom-details-blocks" } */
int is_sorted(int *a, int n)
{
@@ -10,3 +10,4 @@ int is_sorted(int *a, int n)
}
/* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11};
@@ -60,3 +60,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11};
@@ -41,4 +41,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-not "Store-stores chain" "pcom"} } */
-
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11};
int x[105] = {2, 3, 5, 7, 11};
@@ -48,4 +48,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
/* { dg-final { scan-tree-dump "Store-loads chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11};
@@ -65,3 +65,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11};
@@ -60,3 +60,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-tree-vectorize -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr1[105] = {2, 3, 5, 7, 11, 13, 0};
int arr2[105] = {2, 3, 5, 7, 11, 13, 0};
@@ -106,3 +106,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "Store-stores chain" 4 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11};
@@ -59,3 +59,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11};
@@ -61,3 +61,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -63,3 +63,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -61,3 +61,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -58,3 +58,4 @@ int main (void)
return 0;
}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
int arr1[105] = {2, 3, 5, 7, 11, 13, 17, 19};
int arr2[105] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -88,3 +88,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
@@ -16,5 +16,4 @@ int foo(unsigned n)
/* We used to make the probability that the body of the loop (unrolled
to enable prefetching) is entered 0, which is not correct. */
-/* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch" { xfail *-*-* }} } */
/* { dg-final { scan-tree-dump-not "SUCC: 7 .100.0%" "aprefetch"} } */
@@ -1040,71 +1040,6 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
*exit_bound = bound;
}
-/* Scales the frequencies of all basic blocks in LOOP that are strictly
- dominated by BB by NUM/DEN. */
-
-static void
-scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
- profile_count num, profile_count den)
-{
- basic_block son;
-
- if (!den.nonzero_p () && !(num == profile_count::zero ()))
- return;
-
- for (son = first_dom_son (CDI_DOMINATORS, bb);
- son;
- son = next_dom_son (CDI_DOMINATORS, son))
- {
- if (!flow_bb_inside_loop_p (loop, son))
- continue;
- scale_bbs_frequencies_profile_count (&son, 1, num, den);
- scale_dominated_blocks_in_loop (loop, son, num, den);
- }
-}
-
-/* Return estimated niter for LOOP after unrolling by FACTOR times. */
-
-gcov_type
-niter_for_unrolled_loop (class loop *loop, unsigned factor)
-{
- gcc_assert (factor != 0);
- bool profile_p = false;
- gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
- /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
- "+ 1" converts latch iterations to loop iterations and the "- 1"
- converts back. */
- gcov_type new_est_niter = est_niter / factor;
-
- if (est_niter == -1)
- return -1;
-
- /* Without profile feedback, loops for which we do not know a better estimate
- are assumed to roll 10 times. When we unroll such loop, it appears to
- roll too little, and it may even seem to be cold. To avoid this, we
- ensure that the created loop appears to roll at least 5 times (but at
- most as many times as before unrolling). Don't do adjustment if profile
- feedback is present. */
- if (new_est_niter < 5 && !profile_p)
- {
- if (est_niter < 5)
- new_est_niter = est_niter;
- else
- new_est_niter = 5;
- }
-
- if (loop->any_upper_bound)
- {
- /* As above, this is really CEIL (upper_bound + 1, factor) - 1. */
- widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
- factor);
- if (wi::ltu_p (bound, new_est_niter))
- new_est_niter = bound.to_uhwi ();
- }
-
- return new_est_niter;
-}
-
/* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
whose source block dominates the latch. DESC describes the number of
iterations of LOOP.
@@ -1169,47 +1104,39 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
transform_callback transform,
void *data)
{
- gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
enum tree_code exit_cmp;
tree enter_main_cond, exit_base, exit_step, exit_bound;
+ bool flat = maybe_flat_loop_profile (loop);
determine_exit_conditions (loop, desc, factor,
&enter_main_cond, &exit_base, &exit_step,
&exit_cmp, &exit_bound);
bool single_loop_p = !exit_base;
- /* Let us assume that the unrolled loop is quite likely to be entered. */
- profile_probability prob_entry;
- if (integer_nonzerop (enter_main_cond))
- prob_entry = profile_probability::always ();
- else
- prob_entry = profile_probability::guessed_always ()
- .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
-
gcond *exit_if = nullptr;
class loop *new_loop = nullptr;
edge new_exit;
if (!single_loop_p)
{
- edge exit = single_dom_exit (loop);
+ profile_count entry_count = loop_preheader_edge (loop)->src->count;
+ /* Let us assume that the unrolled loop is quite likely to be entered. */
+ profile_probability prob_entry;
+ if (integer_nonzerop (enter_main_cond))
+ prob_entry = profile_probability::always ();
+ else
+ prob_entry = profile_probability::guessed_always ()
+ .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
+
/* The values for scales should keep profile consistent, and somewhat
- close to correct.
-
- TODO: The current value of SCALE_REST makes it appear that the loop
- that is created by splitting the remaining iterations of the unrolled
- loop is executed the same number of times as the original loop, and
- with the same frequencies, which is obviously wrong. This does not
- appear to cause problems, so we do not bother with fixing it for now.
- To make the profile correct, we would need to change the probability
- of the exit edge of the loop, and recompute the distribution of
- frequencies in its body because of this change (scale the frequencies
- of blocks before and after the exit by appropriate factors). */
- profile_probability scale_unrolled = prob_entry;
+ close to correct. */
new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
- prob_entry.invert (), scale_unrolled,
- profile_probability::guessed_always (),
+ prob_entry.invert (),
+ prob_entry,
+ /* We will later redirect exit from vectorized
+ loop to new_loop. */
+ profile_probability::always (),
true);
gcc_assert (new_loop != NULL);
update_ssa (TODO_update_ssa_no_phi);
@@ -1220,18 +1147,16 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
edge precond_edge = single_pred_edge (rest);
split_edge (loop_latch_edge (loop));
basic_block exit_bb = single_pred (loop->latch);
+ edge exit = single_dom_exit (loop);
/* Since the exit edge will be removed, the frequency of all the blocks
- in the loop that are dominated by it must be scaled by
- 1 / (1 - exit->probability). */
+ in the loop that are dominated by it must be scaled. */
if (exit->probability.initialized_p ())
scale_dominated_blocks_in_loop (loop, exit->src,
/* We are scaling up here so
probability does not fit. */
- loop->header->count,
- loop->header->count
- - loop->header->count.apply_probability
- (exit->probability));
+ exit->src->count,
+ exit->src->count - exit->count ());
gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
@@ -1243,14 +1168,14 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
rescan_loop_exit (new_exit, true, false);
/* Set the probability of new exit to the same of the old one. Fix
- the frequency of the latch block, by scaling it back by
- 1 - exit->probability. */
+ the count of the latch block. */
new_exit->probability = exit->probability;
edge new_nonexit = single_pred_edge (loop->latch);
new_nonexit->probability = exit->probability.invert ();
new_nonexit->flags = EDGE_TRUE_VALUE;
- if (new_nonexit->probability.initialized_p ())
- scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
+ set_edge_probability_and_rescale_others
+ (exit, profile_probability::never ());
+ loop->latch->count = new_nonexit->count ();
edge old_entry = loop_preheader_edge (loop);
edge new_entry = loop_preheader_edge (new_loop);
@@ -1296,12 +1221,21 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
}
remove_path (exit);
+ /* We will later redirect exit from vectorized loop to new_loop. */
+ loop_preheader_edge (new_loop)->src->count = entry_count;
/* The epilog loop latch executes at most factor - 1 times.
Since the epilog is entered unconditionally it will need to handle
up to factor executions of its body. */
- new_loop->any_upper_bound = 1;
+ new_loop->any_upper_bound = true;
new_loop->nb_iterations_upper_bound = factor - 1;
+ /* We do not really know estimate on number of iterations, since we do not
+ track any estimates modulo unroll factor.
+ Drop estimate from loop_info and scale loop profile.
+ It may be more realistic to scale loop profile to factor / 2 - 1,
+ but vectorizer also uses factor - 1. */
+ new_loop->any_estimate = false;
+ scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
}
else
new_exit = single_dom_exit (loop);
@@ -1318,10 +1252,10 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
auto_vec<edge> to_remove;
bool ok
- = gimple_duplicate_loop_body_to_header_edge (loop, loop_latch_edge (loop),
- factor - 1, wont_exit,
- new_exit, &to_remove,
- DLTHE_FLAG_UPDATE_FREQ);
+ = gimple_duplicate_loop_body_to_header_edge
+ (loop, loop_latch_edge (loop), factor - 1, wont_exit,
+ new_exit, &to_remove,
+ DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
gcc_assert (ok);
for (edge e : to_remove)
@@ -1332,36 +1266,25 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
update_ssa (TODO_update_ssa);
new_exit = single_dom_exit (loop);
+
+ /* gimple_duplicate_loop_body_to_header_edge depending on
+ DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
+ or scales it down accordingly.
+ However exit edge probability is kept as original. Fix it if needed
+ and compensate. */
+ profile_probability new_prob
+ = loop_preheader_edge
+ (loop)->count ().probability_in (new_exit->src->count);
+ if (!(new_prob == new_exit->probability))
+ {
+ profile_count old_count = new_exit->src->count - new_exit->count ();
+ set_edge_probability_and_rescale_others (new_exit, new_prob);
+ profile_count new_count = new_exit->src->count - new_exit->count ();
+ scale_dominated_blocks_in_loop (loop, new_exit->src,
+ new_count, old_count);
+ }
if (!single_loop_p)
{
- /* Ensure that the frequencies in the loop match the new estimated
- number of iterations, and change the probability of the new
- exit edge. */
-
- profile_count freq_h = loop->header->count;
- profile_count freq_e = (loop_preheader_edge (loop))->count ();
- if (freq_h.nonzero_p ())
- {
- /* Avoid dropping loop body profile counter to 0 because of zero
- count in loop's preheader. */
- if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
- freq_e = freq_e.force_nonzero ();
- scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
- }
-
- basic_block rest = new_exit->dest;
- new_exit->probability
- = (profile_probability::always () / (new_est_niter + 1));
-
- rest->count += new_exit->count ();
-
- edge new_nonexit = single_pred_edge (loop->latch);
- profile_probability prob = new_nonexit->probability;
- new_nonexit->probability = new_exit->probability.invert ();
- prob = new_nonexit->probability / prob;
- if (prob.initialized_p ())
- scale_bbs_frequencies (&loop->latch, 1, prob);
-
/* Finally create the new counter for number of iterations and add
the new exit instruction. */
tree ctr_before, ctr_after;
@@ -1374,66 +1297,15 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
gimple_cond_set_rhs (exit_if, exit_bound);
update_stmt (exit_if);
}
- else
- {
- /* gimple_duplicate_loop_to_header_edge has adjusted the loop body's
- original profile counts in line with the unroll factor. However,
- the old counts might not have been consistent with the old
- iteration count.
-
- Therefore, if the iteration count is known exactly, make sure that the
- profile counts of the loop header (and any other blocks that might be
- executed in the final iteration) are consistent with the combination
- of (a) the incoming profile count and (b) the new iteration count. */
- profile_count in_count = loop_preheader_edge (loop)->count ();
- profile_count old_header_count = loop->header->count;
- if (in_count.nonzero_p ()
- && old_header_count.nonzero_p ()
- && TREE_CODE (desc->niter) == INTEGER_CST)
- {
- /* The + 1 converts latch counts to iteration counts. */
- profile_count new_header_count = in_count * (new_est_niter + 1);
- basic_block *body = get_loop_body (loop);
- scale_bbs_frequencies_profile_count (body, loop->num_nodes,
- new_header_count,
- old_header_count);
- free (body);
- }
-
- /* gimple_duplicate_loop_to_header_edge discarded FACTOR - 1
- exit edges and adjusted the loop body's profile counts for the
- new probabilities of the remaining non-exit edges. However,
- the remaining exit edge still has the same probability as it
- did before, even though it is now more likely.
-
- Therefore, all blocks executed after a failed exit test now have
- a profile count that is too high, and the sum of the profile counts
- for the header's incoming edges is greater than the profile count
- of the header itself.
-
- Adjust the profile counts of all code in the loop body after
- the exit test so that the sum of the counts on entry to the
- header agree. */
- profile_count old_latch_count = loop_latch_edge (loop)->count ();
- profile_count new_latch_count = loop->header->count - in_count;
- if (old_latch_count.nonzero_p () && new_latch_count.nonzero_p ())
- scale_dominated_blocks_in_loop (loop, new_exit->src, new_latch_count,
- old_latch_count);
-
- /* Set the probability of the exit edge based on NEW_EST_NITER
- (which estimates latch counts rather than iteration counts).
- Update the probabilities of other edges to match.
-
- If the profile counts are large enough to give the required
- precision, the updates above will have made
-
- e->dest->count / e->src->count ~= new e->probability
-
- for every outgoing edge e of NEW_EXIT->src. */
- profile_probability new_exit_prob
- = profile_probability::always () / (new_est_niter + 1);
- change_edge_frequency (new_exit, new_exit_prob);
- }
+ if (loop->any_upper_bound)
+ loop->nb_iterations_upper_bound = wi::udiv_floor
+ (loop->nb_iterations_upper_bound + 1, factor) - 1;
+ if (loop->any_likely_upper_bound)
+ loop->nb_iterations_likely_upper_bound = wi::udiv_floor
+ (loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
+ if (loop->any_estimate)
+ loop->nb_iterations_estimate = wi::udiv_floor
+ (loop->nb_iterations_estimate + 1, factor) - 1;
checking_verify_flow_info ();
checking_verify_loop_structure ();