@@ -1195,3 +1195,27 @@ get_loop_copy (class loop *loop)
else
return NULL;
}
+
+/* Scales the frequencies of all basic blocks that are strictly
+ dominated by BB by NUM/DEN. */
+
+void
+scale_strictly_dominated_blocks (basic_block bb,
+ profile_count num, profile_count den)
+{
+ basic_block son;
+
+ if (!den.nonzero_p () && !(num == profile_count::zero ()))
+ return;
+ auto_vec <basic_block, 8> worklist;
+ worklist.safe_push (bb);
+
+ while (!worklist.is_empty ())
+ for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
+ son;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ {
+ son->count = son->count.apply_scale (num, den);
+ worklist.safe_push (son);
+ }
+}
@@ -127,6 +127,8 @@ extern void set_bb_copy (basic_block, basic_block);
extern basic_block get_bb_copy (basic_block);
void set_loop_copy (class loop *, class loop *);
class loop *get_loop_copy (class loop *);
+void scale_strictly_dominated_blocks (basic_block,
+ profile_count, profile_count);
/* Generic RAII class to allocate a bit from storage of integer type T.
The allocated bit is accessible as mask with the single bit set
@@ -7703,6 +7703,44 @@ fold_loop_internal_call (gimple *g, tree value)
FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
SET_USE (use_p, value);
update_stmt (use_stmt);
+ /* If we turn conditional to constant, scale profile counts.
+ We know that the conditional was created by loop distribution
+ and all basic blocks dominated by the taken edge are part of
+ the loop distributed. */
+ if (gimple_code (use_stmt) == GIMPLE_COND)
+ {
+ edge true_edge, false_edge;
+ extract_true_false_edges_from_block (gimple_bb (use_stmt),
+ &true_edge, &false_edge);
+ edge taken_edge = NULL, other_edge = NULL;
+ if (gimple_cond_true_p (as_a <gcond *>(use_stmt)))
+ {
+ taken_edge = true_edge;
+ other_edge = false_edge;
+ }
+ else if (gimple_cond_false_p (as_a <gcond *>(use_stmt)))
+ {
+ taken_edge = false_edge;
+ other_edge = true_edge;
+ }
+ if (taken_edge
+ && !(taken_edge->probability == profile_probability::always ()))
+ {
+ profile_count old_count = taken_edge->count ();
+ profile_count new_count = taken_edge->src->count;
+ taken_edge->probability = profile_probability::always ();
+ other_edge->probability = profile_probability::never ();
+ /* If we have multiple predecessors, we can't use the dominance
+ test. This should not happen as the guarded code should
+ start with pre-header. */
+ gcc_assert (single_pred_edge (taken_edge->dest));
+ taken_edge->dest->count
+ = taken_edge->dest->count.apply_scale (new_count,
+ old_count);
+ scale_strictly_dominated_blocks (taken_edge->dest,
+ new_count, old_count);
+ }
+ }
}
}
@@ -1,6 +1,7 @@
/* { dg-do compile } */
/* { dg-additional-options "-O3" } */
/* { dg-additional-options "-march=skylake-avx512" { target avx512f } } */
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
extern unsigned long long int arr_86[];
extern unsigned long long int arr_87[][15];
@@ -14,3 +15,4 @@ void test(_Bool a, unsigned short c[][15], unsigned char d[])
arr_87[h][0] = a ? c[h][i] : 0;
}
}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */