@@ -473,6 +473,9 @@ struct iv_cand
struct iv *orig_iv; /* The original iv if this cand is added from biv with
smaller type. */
bool doloop_p; /* Whether this is a doloop candidate. */
+ bool reg_offset_p; /* Whether this is available for an address type group
+ where its all uses are valid to adopt reg_offset
+ addressing mode even considering unrolling. */
};
/* Hashtable entry for common candidate derived from iv uses. */
@@ -653,6 +656,10 @@ struct ivopts_data
/* Whether the loop has doloop comparison use. */
bool doloop_use_p;
+
+ /* Whether need to consider register offset addressing mode for the loop with
+ upcoming unrolling by estimated unroll factor. */
+ bool consider_reg_offset_for_unroll_p;
};
/* An assignment of iv candidates to uses. */
@@ -2731,6 +2738,112 @@ split_address_groups (struct ivopts_data *data)
}
}
+/* For each address type group, it finds the address-based IV candidates with
+ the same base and step, for those that are available to be used for the
+ whole group with reg_offset addressing mode by considering the address offset
+ difference and increased offset with unrolling factor estimation, mark them
+ as reg_offset_p. */
+
+static void
+mark_reg_offset_candidates (struct ivopts_data *data)
+{
+ class loop *loop = data->current_loop;
+ gcc_assert (data->current_loop->estimated_unroll > 1);
+ bool any_reg_offset_p = false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "<Reg_offset_p Candidates>:\n");
+
+ auto valid_reg_offset_p
+ = [] (struct iv_use *use, poly_uint64 off, poly_uint64 max_inc) {
+ if (!addr_offset_valid_p (use, off))
+ return false;
+ if (!addr_offset_valid_p (use, off + max_inc))
+ return false;
+ return true;
+ };
+
+ for (unsigned i = 0; i < data->vgroups.length (); i++)
+ {
+ struct iv_group *group = data->vgroups[i];
+
+ if (address_p (group->type))
+ {
+ struct iv_use *head_use = group->vuses[0];
+ if (!tree_fits_poly_int64_p (head_use->iv->step))
+ continue;
+
+ poly_int64 step = tree_to_poly_int64 (head_use->iv->step);
+ /* Max extra offset to be added due to unrolling. */
+ poly_int64 max_increase = (loop->estimated_unroll - 1) * step;
+
+ tree use_base = head_use->addr_base;
+ STRIP_NOPS (use_base);
+
+ struct iv_use *last_use = NULL;
+ unsigned group_size = group->vuses.length ();
+ gcc_assert (group_size >= 1);
+ if (maybe_ne (head_use->addr_offset,
+ group->vuses[group_size - 1]->addr_offset))
+ last_use = group->vuses[group_size - 1];
+
+ unsigned j;
+ bitmap_iterator bi;
+ EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
+ {
+ struct iv_cand *cand = data->vcands[j];
+
+ if (!cand->iv->base_object)
+ continue;
+
+ if (cand->reg_offset_p)
+ continue;
+
+ if (!operand_equal_p (head_use->iv->base_object,
+ cand->iv->base_object, 0))
+ continue;
+
+ if (!operand_equal_p (head_use->iv->step, cand->iv->step, 0))
+ continue;
+
+ poly_uint64 cand_offset = 0;
+ tree cand_base = strip_offset (cand->iv->base, &cand_offset);
+ STRIP_NOPS (cand_base);
+ if (!operand_equal_p (use_base, cand_base, 0))
+ continue;
+
+ /* Only need to check the first one and the last one in the group
+ since it's sorted. If both are valid, the other intermediate
+ ones should be in the acceptable range. */
+ poly_uint64 head_off = head_use->addr_offset - cand_offset;
+ if (!valid_reg_offset_p (head_use, head_off, max_increase))
+ continue;
+
+ if (last_use)
+ {
+ poly_int64 last_off = last_use->addr_offset - cand_offset;
+ if (!valid_reg_offset_p (head_use, last_off, max_increase))
+ continue;
+ }
+
+ cand->reg_offset_p = true;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " cand %u valid for group %u\n", j, i);
+
+ if (!any_reg_offset_p)
+ any_reg_offset_p = true;
+ }
+ }
+ }
+
+ if (!any_reg_offset_p)
+ data->consider_reg_offset_for_unroll_p = false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\n");
+}
+
/* Finds uses of the induction variables that are interesting. */
static void
@@ -3147,6 +3260,7 @@ add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
cand->important = important;
cand->incremented_at = incremented_at;
cand->doloop_p = doloop;
+ cand->reg_offset_p = false;
data->vcands.safe_push (cand);
if (!poly_int_tree_p (step))
@@ -3654,6 +3768,14 @@ set_group_iv_cost (struct ivopts_data *data,
return;
}
+ /* Since we priced more on non reg_offset IV cand step cost, we should scale
+ up the appropriate IV group costs. Simply consider USE_COMPARE at the
+ loop exit, FIXME if multiple exits supported or no loop exit comparisons
+ matter. */
+ if (data->consider_reg_offset_for_unroll_p
+ && group->vuses[0]->type != USE_COMPARE)
+ cost *= (HOST_WIDE_INT) data->current_loop->estimated_unroll;
+
if (data->consider_all_candidates)
{
group->cost_map[cand->id].cand = cand;
@@ -5718,6 +5840,9 @@ find_iv_candidates (struct ivopts_data *data)
if (!data->consider_all_candidates)
relate_compare_use_with_all_cands (data);
+ if (data->consider_reg_offset_for_unroll_p)
+ mark_reg_offset_candidates (data);
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
unsigned i;
@@ -5890,6 +6015,10 @@ determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
cost = cost_step + adjust_setup_cost (data, cost_base.cost);
+ /* Consider additional step updates during unrolling. */
+ if (data->consider_reg_offset_for_unroll_p && !cand->reg_offset_p)
+ cost += (data->current_loop->estimated_unroll - 1) * cost_step;
+
/* Prefer the original ivs unless we may gain something by replacing it.
The reason is to make debugging simpler; so this is not relevant for
artificial ivs created by other optimization passes. */
@@ -7976,6 +8105,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
data->current_loop = loop;
data->loop_loc = find_loop_location (loop).get_location_t ();
data->speed = optimize_loop_for_speed_p (loop);
+ data->consider_reg_offset_for_unroll_p = false;
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -8008,6 +8138,16 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
if (!find_induction_variables (data))
goto finish;
+ if (param_iv_consider_reg_offset_for_unroll != 0 && exit)
+ {
+ tree_niter_desc *desc = niter_for_exit (data, exit);
+ estimate_unroll_factor (loop, desc);
+ data->consider_reg_offset_for_unroll_p = loop->estimated_unroll > 1;
+ if (dump_file && (dump_flags & TDF_DETAILS)
+ && data->consider_reg_offset_for_unroll_p)
+ fprintf (dump_file, "\nEstimated_unroll:%u\n", loop->estimated_unroll);
+ }
+
/* Finds interesting uses (item 1). */
find_interesting_uses (data);
if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)