===================================================================
@@ -2593,8 +2593,10 @@ iv_number_of_iterations (struct loop *lo
? iv0.base
: mode_mmin);
max = (up - down) / inc + 1;
- record_niter_bound (loop, double_int::from_uhwi (max),
- false, true);
+ if (!desc->infinite
+ && !desc->assumptions)
+ record_niter_bound (loop, double_int::from_uhwi (max),
+ false, true);
if (iv0.step == const0_rtx)
{
@@ -2806,15 +2808,19 @@ iv_number_of_iterations (struct loop *lo
desc->const_iter = true;
desc->niter = val & GET_MODE_MASK (desc->mode);
- record_niter_bound (loop, double_int::from_uhwi (desc->niter),
- false, true);
+ if (!desc->infinite
+ && desc->assumptions)
+ record_niter_bound (loop, double_int::from_uhwi (desc->niter),
+ false, true);
}
else
{
max = determine_max_iter (loop, desc, old_niter);
gcc_assert (max);
- record_niter_bound (loop, double_int::from_uhwi (max),
- false, true);
+ if (!desc->infinite
+ && desc->assumptions)
+ record_niter_bound (loop, double_int::from_uhwi (max),
+ false, true);
/* simplify_using_initial_values does a copy propagation on the registers
in the expression for the number of iterations. This prolongs life
===================================================================
@@ -37,7 +37,6 @@ static int find_path (edge, basic_block
static void fix_loop_placements (struct loop *, bool *);
static bool fix_bb_placement (basic_block);
static void fix_bb_placements (basic_block, bool *);
-static void unloop (struct loop *, bool *);
/* Checks whether basic block BB is dominated by DATA. */
static bool
@@ -895,7 +894,7 @@ loopify (edge latch_edge, edge header_ed
If this may cause the information about irreducible regions to become
invalid, IRRED_INVALIDATED is set to true. */
-static void
+void
unloop (struct loop *loop, bool *irred_invalidated)
{
basic_block *body;
===================================================================
@@ -320,7 +320,8 @@ extern struct loop *loopify (edge, edge,
struct loop * loop_version (struct loop *, void *,
basic_block *, unsigned, unsigned, unsigned, bool);
extern bool remove_path (edge);
-void scale_loop_frequencies (struct loop *, int, int);
+extern void scale_loop_frequencies (struct loop *, int, int);
+extern void unloop (struct loop *, bool *);
/* Induction variable analysis. */
===================================================================
@@ -123,7 +123,7 @@ struct opt_info
static void decide_unrolling_and_peeling (int);
static void peel_loops_completely (int);
static void decide_peel_simple (struct loop *, int);
-static void decide_peel_once_rolling (struct loop *, int);
+static void decide_peel_not_rolling (struct loop *, int);
static void decide_peel_completely (struct loop *, int);
static void decide_unroll_stupid (struct loop *, int);
static void decide_unroll_constant_iterations (struct loop *, int);
@@ -209,13 +209,18 @@ loop_exit_at_end_p (struct loop *loop)
struct niter_desc *desc = get_simple_loop_desc (loop);
rtx insn;
+ /* We should never have conditional in latch block. */
+ gcc_assert (desc->in_edge->dest != loop->header);
+
if (desc->in_edge->dest != loop->latch)
return false;
- /* Check that the latch is empty. */
+ /* Check that the latch is empty.
+ forwarder_block_p will not help here since it protects
+ loop latches. */
FOR_BB_INSNS (loop->latch, insn)
{
- if (INSN_P (insn))
+ if (INSN_P (insn) && active_insn_p (insn))
return false;
}
@@ -241,7 +246,7 @@ peel_loops_completely (int flags)
loop->ninsns = num_loop_insns (loop);
- decide_peel_once_rolling (loop, flags);
+ decide_peel_not_rolling (loop, flags);
if (loop->lpt_decision.decision == LPT_NONE)
decide_peel_completely (loop, flags);
@@ -311,44 +316,25 @@ decide_unrolling_and_peeling (int flags)
}
}
-/* Decide whether the LOOP is once rolling and suitable for complete
- peeling. */
+/* Decide whether the LOOP is not rolling and should be turned into a non-loop. */
static void
-decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
+decide_peel_not_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
- struct niter_desc *desc;
-
if (dump_file)
- fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
-
- /* Is the loop small enough? */
- if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
- {
- if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
- return;
- }
-
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
+ fprintf (dump_file, "\n;; Considering to unloop not rolling loop\n");
/* Check number of iterations. */
- if (!desc->simple_p
- || desc->assumptions
- || desc->infinite
- || !desc->const_iter
- || (desc->niter != 0
- && max_loop_iterations_int (loop) != 0))
+ if (max_loop_iterations_int (loop) != 0)
{
if (dump_file)
fprintf (dump_file,
- ";; Unable to prove that the loop rolls exactly once\n");
+ ";; Unable to prove that the loop do not roll\n");
return;
}
/* Success. */
if (dump_file)
- fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
+ fprintf (dump_file, ";; Decided unloop not rolling loop\n");
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
}
@@ -358,6 +344,7 @@ decide_peel_completely (struct loop *loo
{
unsigned npeel;
struct niter_desc *desc;
+ HOST_WIDE_INT niter;
if (dump_file)
fprintf (dump_file, "\n;; Considering peeling completely\n");
@@ -403,25 +390,25 @@ decide_peel_completely (struct loop *loo
/* Check for simple loops. */
desc = get_simple_loop_desc (loop);
- /* Check number of iterations. */
- if (!desc->simple_p
- || desc->assumptions
- || !desc->const_iter
- || desc->infinite)
+ niter = max_loop_iterations_int (loop);
+ if (niter == -1)
{
+ gcc_assert (!desc->simple_p
+ || desc->assumptions || desc->infinite
+ || !desc->const_iter);
if (dump_file)
fprintf (dump_file,
- ";; Unable to prove that the loop iterates constant times\n");
+ ";; Unable to bound loop iterations by a constant\n");
return;
}
- if (desc->niter > npeel - 1)
+ if (niter > (HOST_WIDE_INT)npeel - 1)
{
if (dump_file)
{
fprintf (dump_file,
";; Not peeling loop completely, rolls too much (");
- fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
+ fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, niter);
fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
}
return;
@@ -457,8 +444,31 @@ peel_loop_completely (struct loop *loop)
edge ein;
struct niter_desc *desc = get_simple_loop_desc (loop);
struct opt_info *opt_info = NULL;
+ bool loop_cancelled = false;
+ edge not_taken_exit = desc->out_edge;
+ edge edge_to_cancel = desc->in_edge;
- npeel = desc->niter;
+ /* Check number of iterations. */
+ npeel = max_loop_iterations_int (loop);
+
+ /* See if loop-iv was able to determine the IV variable
+ test giving us the bound on the number of iterations. */
+ if (!desc->simple_p
+ || !desc->const_iter
+ || desc->assumptions
+ || desc->infinite)
+ {
+ not_taken_exit = NULL;
+ edge_to_cancel = NULL;
+ }
+ else
+ {
+ /* If the IV test exists we can remove it in all peeled copies
+ and moreover we may use it to cancel the final loop. */
+ gcc_assert (desc->niter >= npeel);
+ if (npeel < desc->niter)
+ edge_to_cancel = NULL;
+ }
if (npeel)
{
@@ -467,7 +477,8 @@ peel_loop_completely (struct loop *loop)
wont_exit = sbitmap_alloc (npeel + 1);
sbitmap_ones (wont_exit);
RESET_BIT (wont_exit, 0);
- if (desc->noloop_assumptions)
+ if (desc->noloop_assumptions
+ || !desc->simple_p)
RESET_BIT (wont_exit, 1);
remove_edges = NULL;
@@ -478,7 +489,7 @@ peel_loop_completely (struct loop *loop)
opt_info_start_duplication (opt_info);
ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
npeel,
- wont_exit, desc->out_edge,
+ wont_exit, not_taken_exit,
&remove_edges,
DLTHE_FLAG_UPDATE_FREQ
| DLTHE_FLAG_COMPLETTE_PEEL
@@ -500,15 +511,99 @@ peel_loop_completely (struct loop *loop)
VEC_free (edge, heap, remove_edges);
}
- ein = desc->in_edge;
free_simple_loop_desc (loop);
+ /* If we have no exit edge to cancel, see if the loop has exit at the end.
+ If so we can force the last exit to be taken. */
+ if (!edge_to_cancel)
+ {
+ VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+ unsigned i;
+
+ FOR_EACH_VEC_ELT (edge, exits, i, edge_to_cancel)
+ {
+ rtx insn;
+
+ /* Find the other edge than the loop exit
+ leaving the conditoinal. */
+ if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
+ continue;
+ if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
+ edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
+ else
+ edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
+
+ /* We should never have conditionals in the loop latch. */
+ gcc_assert (edge_to_cancel->dest != loop->header);
+
+ /* Check that it leads to loop latch. */
+ if (edge_to_cancel->dest != loop->latch)
+ continue;
+
+ /* Check that the latch is empty.
+ forwarder_block_p will not help here since it protects
+ loop latches. */
+ FOR_BB_INSNS (loop->latch, insn)
+ {
+ if (INSN_P (insn) && active_insn_p (insn))
+ {
+ edge_to_cancel = NULL;
+ break;
+ }
+ }
+ if (edge_to_cancel)
+ break;
+ }
+ VEC_free (edge, heap, exits);
+ }
+
/* Now remove the unreachable part of the last iteration and cancel
the loop. */
- remove_path (ein);
+ if (edge_to_cancel)
+ loop_cancelled = remove_path (edge_to_cancel);
+
+ /* We do not know why the loop iteration count is bounded (probably it
+ was determined at tree level from array bounds) or we failed
+ to remove the path making the loop count. Still cancel the loop
+ and redirect latch edge to barrier; we know it will never execute.
+ Hopefully later passes will get rid of the dead code. */
+ if (!loop_cancelled)
+ {
+ bool irred_invalidated = false;
+ int flags;
+ basic_block barrier_bb;
+ rtx barrier;
+
+ basic_block latch = loop->latch;
+ ein = loop_latch_edge (loop);
+ flags = ein->flags;
+ unloop (loop, &irred_invalidated);
+ if (irred_invalidated
+ && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
+ mark_irreducible_loops ();
+ barrier_bb = create_basic_block (NULL, NULL, latch);
+ barrier = emit_barrier_after (BB_END (barrier_bb));
+ BB_FOOTER (barrier_bb) = unlink_insn_chain (barrier, barrier);
+ BB_END (barrier_bb) = BB_HEAD (barrier_bb);
+ BLOCK_FOR_INSN (barrier) = NULL;
+
+ barrier_bb->frequency = 0;
+ barrier_bb->count = 0;
+ barrier_bb->loop_father = current_loops->tree_root;
+ current_loops->tree_root->num_nodes++;
+ set_immediate_dominator (CDI_DOMINATORS, barrier_bb, latch);
+
+ ein = make_edge (latch, barrier_bb, flags);
+ ein->probability = 0;
+ ein->count = 0;
+ }
if (dump_file)
- fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
+ {
+ fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
+ if (!loop_cancelled)
+ fprintf (dump_file, ";; Loop latch redirected to barrier\n");
+ }
}
/* Decide whether to unroll LOOP iterating constant number of times
@@ -519,6 +614,7 @@ decide_unroll_constant_iterations (struc
{
unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
struct niter_desc *desc;
+ double_int iterations;
if (!(flags & UAP_UNROLL))
{
@@ -561,8 +657,14 @@ decide_unroll_constant_iterations (struc
return;
}
- /* Check whether the loop rolls enough to consider. */
- if (desc->niter < 2 * nunroll)
+ /* Check whether the loop rolls enough to consider.
+ Consult also loop bounds and profile; in the case the loop has more
+ than one exit it may well loop less than determined maximal number
+ of iterations. */
+ if (desc->niter < 2 * nunroll
+ || ((estimated_loop_iterations (loop, &iterations)
+ || max_loop_iterations (loop, &iterations))
+ && iterations.ult (double_int::from_shwi (2 * nunroll))))
{
if (dump_file)
fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
@@ -1221,7 +1323,6 @@ static void
decide_peel_simple (struct loop *loop, int flags)
{
unsigned npeel;
- struct niter_desc *desc;
double_int iterations;
if (!(flags & UAP_PEEL))
@@ -1246,20 +1347,17 @@ decide_peel_simple (struct loop *loop, i
return;
}
- /* Check for simple loops. */
- desc = get_simple_loop_desc (loop);
-
- /* Check number of iterations. */
- if (desc->simple_p && !desc->assumptions && desc->const_iter)
- {
- if (dump_file)
- fprintf (dump_file, ";; Loop iterates constant times\n");
- return;
- }
-
/* Do not simply peel loops with branches inside -- it increases number
- of mispredicts. */
- if (num_loop_branches (loop) > 1)
+ of mispredicts.
+ Exception is when we do have profile and we however have good chance
+ to peel proper number of iterations loop will iterate in practice.
+ TODO: this heuristic needs tunning; while for complette unrolling
+ the branch inside loop mostly eliminates any improvements, for
+ peeling it is not the case. Also a function call inside loop is
+ also branch from branch prediction POV (and probably better reason
+ to not unroll/peel). */
+ if (num_loop_branches (loop) > 1
+ && profile_status != PROFILE_READ)
{
if (dump_file)
fprintf (dump_file, ";; Not peeling, contains branches\n");
@@ -1428,7 +1526,9 @@ decide_unroll_stupid (struct loop *loop,
}
/* Do not unroll loops with branches inside -- it increases number
- of mispredicts. */
+ of mispredicts.
+ TODO: this heuristic needs tunning; call inside the loop body
+ is also relatively good reason to not unroll. */
if (num_loop_branches (loop) > 1)
{
if (dump_file)
===================================================================
@@ -0,0 +1,23 @@
+/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops -fno-peel-loops" } */
+void abort ();
+
+int a[1000];
+int
+__attribute__ ((noinline))
+t()
+{
+ int i;
+ for (i=0;i<1000;i++)
+ if (!a[i])
+ return 1;
+ abort ();
+}
+main()
+{
+ int i;
+ for (i=0;i<1000;i++)
+ t();
+ return 0;
+}
+/* { dg-final-use { scan-rtl-dump "Considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
+/* { dg-final-use { cleanup-rtl-dump "Not unrolling loop, doesn't roll" } } */
===================================================================
@@ -0,0 +1,25 @@
+/* { dg-options "-O3 -fdump-rtl-loop2_unroll -fno-unroll-loops -fpeel-loops" } */
+void abort();
+
+int a[1000];
+int
+__attribute__ ((noinline))
+t()
+{
+ int i;
+ for (i=0;i<1000;i++)
+ if (!a[i])
+ return 1;
+ abort ();
+}
+main()
+{
+ int i;
+ for (i=0;i<1000;i++)
+ t();
+ return 0;
+}
+/* { dg-final-use { scan-rtl-dump "Considering simply peeling loop" "loop2_unroll" } } */
+/* In fact one peeling is enough; we however mispredict number of iterations of the loop
+ at least until loop_ch is schedule ahead of profiling pass. */
+/* { dg-final-use { cleanup-rtl-dump "Decided to simply peel the loop 2 times" } } */
===================================================================
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fno-peel-loops -fdisable-tree-cunroll -fdisable-tree-cunrolli -funroll-loops" } */
+
+int a[2];
+test(int c)
+{
+ int i;
+ for (i=0;i<c;i++)
+ {
+ a[i]=5;
+ if (test2())
+ return;
+ }
+}
+
+/* { dg-final { scan-rtl-dump-times "Decided to peel loop completely" 1 "loop2_unroll" } } */
+/* { dg-final { scan-rtl-dump-times "Peeled loop completely, 2 times" 1 "loop2_unroll" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */