@@ -1912,6 +1912,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_PREDICT_DOLOOP_P
#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
+#undef TARGET_HAVE_COUNT_REG_DECR_P
+#define TARGET_HAVE_COUNT_REG_DECR_P true
+
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
@@ -11618,6 +11618,14 @@ loops, and will help ivopts to make some decisions.
The default version of this hook returns false.
@end deftypefn
+@deftypevr {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P
+Return true if the target supports hardware count register for decrement
+and branch. This count register can't be used as general register since
+moving to/from a general register from/to it is very expensive.
+For the targets with this support, ivopts can take doloop use as zero cost.
+The default value is false.
+@end deftypevr
+
@deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int @var{&iterations}, const widest_int @var{&iterations_max}, unsigned int @var{loop_depth}, bool @var{entered_at_top})
Return true if it is possible to use low-overhead loops (@code{doloop_end}
and @code{doloop_begin}) for a particular loop. @var{iterations} gives the
@@ -7946,6 +7946,8 @@ to by @var{ce_info}.
@hook TARGET_PREDICT_DOLOOP_P
+@hook TARGET_HAVE_COUNT_REG_DECR_P
+
@hook TARGET_CAN_USE_DOLOOP_P
@hook TARGET_INVALID_WITHIN_DOLOOP
@@ -4246,6 +4246,16 @@ The default version of this hook returns false.",
bool, (struct loop *loop),
default_predict_doloop_p)
+DEFHOOKPOD
+(have_count_reg_decr_p,
+ "Return true if the target supports hardware count register for decrement\n\
+and branch. This count register can't be used as general register since\n\
+moving to/from a general register from/to it is very expensive.\n\
+For the targets with this support, ivopts can take doloop use as zero cost.\n\
+The default value is false.",
+ bool, false)
+
+
DEFHOOK
(can_use_doloop_p,
"Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\
@@ -18,5 +18,5 @@ f1 (char *p, uintptr_t i, uintptr_t n)
}
/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */
-/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts"} } */
-/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" } } */
+/* { dg-final { scan-tree-dump-times "PHI <p_" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "p_\[0-9\]* <" 1 "ivopts" { target { ! powerpc*-*-* } } } } */
@@ -399,6 +399,8 @@ struct iv_group
struct cost_pair *cost_map;
/* The selected candidate for the group. */
struct iv_cand *selected;
+ /* To indicate this is a doloop use group. */
+ bool doloop_p;
/* Uses in the group. */
vec<struct iv_use *> vuses;
};
@@ -612,6 +614,9 @@ struct ivopts_data
/* Whether the loop body can only be exited via single exit. */
bool loop_single_exit_p;
+
+ /* Whether the loop has doloop comparison use. */
+ bool doloop_use_p;
};
/* An assignment of iv candidates to uses. */
@@ -630,6 +635,9 @@ struct iv_ca
/* Number of times each candidate is used. */
unsigned *n_cand_uses;
+ /* How many doloop uses for each candidates. */
+ unsigned *n_cand_doloop_uses;
+
/* The candidates used. */
bitmap cands;
@@ -1528,6 +1536,7 @@ record_group (struct ivopts_data *data, enum use_type type)
group->type = type;
group->related_cands = BITMAP_ALLOC (NULL);
group->vuses.create (1);
+ group->doloop_p = false;
data->vgroups.safe_push (group);
return group;
@@ -3724,7 +3733,7 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data)
Some RTL specific checks seems unable to be checked in gimple, if any new
checks or easy checks _are_ missing here, please add them. */
-static bool ATTRIBUTE_UNUSED
+static bool
generic_predict_doloop_p (struct ivopts_data *data)
{
struct loop *loop = data->current_loop;
@@ -5291,6 +5300,17 @@ determine_group_iv_cost_cond (struct ivopts_data *data,
return !cost.infinite_cost_p ();
}
+/* Set no cost for pair between doloop iv use GROUP and iv cand CAND. */
+
+static void
+adjust_group_iv_cost_for_doloop (struct ivopts_data *data,
+ struct iv_group *group, struct iv_cand *cand)
+{
+ struct cost_pair *cp = get_group_iv_cost (data, group, cand);
+ gcc_assert (cp);
+ cp->cost = no_cost;
+}
+
/* Determines cost of computing uses in GROUP with CAND. Returns false
if USE cannot be represented with CAND. */
@@ -5308,7 +5328,12 @@ determine_group_iv_cost (struct ivopts_data *data,
return determine_group_iv_cost_address (data, group, cand);
case USE_COMPARE:
- return determine_group_iv_cost_cond (data, group, cand);
+ {
+ bool finite_cost_p = determine_group_iv_cost_cond (data, group, cand);
+ if (data->doloop_use_p && group->doloop_p && finite_cost_p)
+ adjust_group_iv_cost_for_doloop (data, group, cand);
+ return finite_cost_p;
+ }
default:
gcc_unreachable ();
@@ -5829,11 +5854,15 @@ iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
ivs->cand_for_group[gid] = NULL;
ivs->n_cand_uses[cid]--;
+ if (group->doloop_p)
+ ivs->n_cand_doloop_uses[cid]--;
+ else if (ivs->n_cand_uses[cid] == ivs->n_cand_doloop_uses[cid])
+ ivs->cand_cost -= cp->cand->cost;
+
if (ivs->n_cand_uses[cid] == 0)
{
bitmap_clear_bit (ivs->cands, cid);
ivs->n_cands--;
- ivs->cand_cost -= cp->cand->cost;
iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
}
@@ -5886,11 +5915,14 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
ivs->bad_groups--;
ivs->cand_for_group[gid] = cp;
ivs->n_cand_uses[cid]++;
+ if (group->doloop_p)
+ ivs->n_cand_doloop_uses[cid]++;
+ else if (ivs->n_cand_uses[cid] == (ivs->n_cand_doloop_uses[cid] + 1))
+ ivs->cand_cost += cp->cand->cost;
if (ivs->n_cand_uses[cid] == 1)
{
bitmap_set_bit (ivs->cands, cid);
ivs->n_cands++;
- ivs->cand_cost += cp->cand->cost;
iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
}
@@ -6098,6 +6130,7 @@ iv_ca_new (struct ivopts_data *data)
nw->cand_for_group = XCNEWVEC (struct cost_pair *,
data->vgroups.length ());
nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
+ nw->n_cand_doloop_uses = XCNEWVEC (unsigned, data->vcands.length ());
nw->cands = BITMAP_ALLOC (NULL);
nw->n_cands = 0;
nw->n_invs = 0;
@@ -6117,6 +6150,7 @@ iv_ca_free (struct iv_ca **ivs)
{
free ((*ivs)->cand_for_group);
free ((*ivs)->n_cand_uses);
+ free ((*ivs)->n_cand_doloop_uses);
BITMAP_FREE ((*ivs)->cands);
free ((*ivs)->n_inv_var_uses);
free ((*ivs)->n_inv_expr_uses);
@@ -7568,6 +7602,47 @@ determine_scaling_factor (struct ivopts_data *data, basic_block *body)
}
}
+/* Find doloop comparison use and set its doloop_p on if found. */
+
+static bool
+find_doloop_use (struct ivopts_data *data)
+{
+ struct loop *loop = data->current_loop;
+
+ for (unsigned i = 0; i < data->vgroups.length (); i++)
+ {
+ struct iv_group *group = data->vgroups[i];
+ if (group->type == USE_COMPARE)
+ {
+ gcc_assert (group->vuses.length () == 1);
+ struct iv_use *use = group->vuses[0];
+ gimple *stmt = use->stmt;
+ if (gimple_code (stmt) == GIMPLE_COND)
+ {
+ basic_block bb = gimple_bb (stmt);
+ edge true_edge, false_edge;
+ extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
+ /* This comparison is used for loop latch. Require latch is empty
+ for now. */
+ if ((loop->latch == true_edge->dest
+ || loop->latch == false_edge->dest)
+ && empty_block_p (loop->latch))
+ {
+ group->doloop_p = true;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Doloop cmp iv use: ");
+ print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
+ }
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
/* Optimizes the LOOP. Returns true if anything changed. */
static bool
@@ -7580,6 +7655,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
basic_block *body;
gcc_assert (!data->niters);
+ data->doloop_use_p = false;
data->current_loop = loop;
data->loop_loc = find_loop_location (loop).get_location_t ();
data->speed = optimize_loop_for_speed_p (loop);
@@ -7625,6 +7701,19 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
/* Finds candidates for the induction variables (item 2). */
find_iv_candidates (data);
+ if (flag_branch_on_count_reg && targetm.have_count_reg_decr_p
+ && generic_predict_doloop_p (data))
+ {
+ data->doloop_use_p = find_doloop_use (data);
+ if (data->doloop_use_p && dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Predict loop %d can perform doloop optimization later.\n",
+ loop->num);
+ flow_loop_dump (loop, dump_file, NULL, 1);
+ }
+ }
+
/* Calculates the costs (item 3, part 1). */
determine_iv_costs (data);
determine_group_iv_costs (data);