@@ -10283,7 +10283,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gimple *new_stmt;
gphi *induction_phi;
tree induc_def, vec_dest;
- tree init_expr, step_expr;
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned i;
tree expr;
@@ -10369,7 +10368,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
iv_loop = loop;
gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
- if (slp_node && !nunits.is_constant ())
+ if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1))
{
/* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
@@ -10387,7 +10386,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return false;
}
- step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+ tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
gcc_assert (step_expr != NULL_TREE);
if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
&& !type_has_mode_precision_p (TREE_TYPE (step_expr)))
@@ -10475,9 +10474,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
[i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
if (slp_node)
{
- /* Enforced above. */
- unsigned int const_nunits = nunits.to_constant ();
-
/* The initial values are vectorized, but any lanes > group_size
need adjustment. */
slp_tree init_node
@@ -10499,11 +10495,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* Now generate the IVs. */
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert ((const_nunits * nvects) % group_size == 0);
+ gcc_assert (multiple_p (nunits * nvects, group_size));
unsigned nivs;
+ unsigned HOST_WIDE_INT const_nunits;
if (nested_in_vect_loop)
nivs = nvects;
- else
+ else if (nunits.is_constant (&const_nunits))
{
/* Compute the number of distinct IVs we need. First reduce
group_size if it is a multiple of const_nunits so we get
@@ -10514,21 +10511,43 @@ vectorizable_induction (loop_vec_info loop_vinfo,
nivs = least_common_multiple (group_sizep,
const_nunits) / const_nunits;
}
+ else
+ {
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+ nivs = 1;
+ }
+ gimple_seq init_stmts = NULL;
tree stept = TREE_TYPE (step_vectype);
tree lupdate_mul = NULL_TREE;
if (!nested_in_vect_loop)
{
- /* The number of iterations covered in one vector iteration. */
- unsigned lup_mul = (nvects * const_nunits) / group_size;
- lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept, lup_mul,
- UNSIGNED)
- : build_int_cstu (stept, lup_mul));
+ if (nunits.is_constant (&const_nunits))
+ {
+ /* The number of iterations covered in one vector iteration. */
+ unsigned lup_mul = (nvects * const_nunits) / group_size;
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, lup_mul,
+ UNSIGNED)
+ : build_int_cstu (stept, lup_mul));
+ }
+ else
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ {
+ tree tem = build_int_cst (integer_type_node, vf);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
+ stept, tem);
+ }
+ else
+ lupdate_mul = build_int_cst (stept, vf);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ lupdate_mul);
+ }
}
tree peel_mul = NULL_TREE;
- gimple_seq init_stmts = NULL;
if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
{
if (SCALAR_FLOAT_TYPE_P (stept))
@@ -10540,44 +10559,105 @@ vectorizable_induction (loop_vec_info loop_vinfo,
peel_mul = gimple_build_vector_from_val (&init_stmts,
step_vectype, peel_mul);
}
+ tree step_mul = NULL_TREE;
unsigned ivn;
auto_vec<tree> vec_steps;
for (ivn = 0; ivn < nivs; ++ivn)
{
- tree_vector_builder step_elts (step_vectype, const_nunits, 1);
- tree_vector_builder init_elts (vectype, const_nunits, 1);
- tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
- for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
+ gimple_seq stmts = NULL;
+ bool invariant = true;
+ if (nunits.is_constant (&const_nunits))
{
- /* The scalar steps of the IVs. */
- tree elt = steps[(ivn*const_nunits + eltn) % group_size];
- elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt);
- step_elts.quick_push (elt);
+ tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+ tree_vector_builder init_elts (vectype, const_nunits, 1);
+ tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
+ for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
+ {
+ /* The scalar steps of the IVs. */
+ tree elt = steps[(ivn*const_nunits + eltn) % group_size];
+ elt = gimple_convert (&init_stmts,
+ TREE_TYPE (step_vectype), elt);
+ step_elts.quick_push (elt);
+ if (!init_node)
+ {
+ /* The scalar inits of the IVs if not vectorized. */
+ elt = inits[(ivn*const_nunits + eltn) % group_size];
+ if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (elt)))
+ elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ TREE_TYPE (vectype), elt);
+ init_elts.quick_push (elt);
+ }
+ /* The number of steps to add to the initial values. */
+ unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
+ mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, mul_elt,
+ UNSIGNED)
+ : build_int_cstu (stept, mul_elt));
+ }
+ vec_step = gimple_build_vector (&init_stmts, &step_elts);
+ step_mul = gimple_build_vector (&init_stmts, &mul_elts);
if (!init_node)
+ vec_init = gimple_build_vector (&init_stmts, &init_elts);
+ }
+ else
+ {
+ if (init_node)
+ ;
+ else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
+ {
+ new_name = gimple_convert (&init_stmts, stept, inits[0]);
+ /* Build the initial value directly as a VEC_SERIES_EXPR. */
+ vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
+ step_vectype, new_name, steps[0]);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
+ }
+ else
{
- /* The scalar inits of the IVs if not vectorized. */
- elt = inits[(ivn*const_nunits + eltn) % group_size];
- if (!useless_type_conversion_p (TREE_TYPE (vectype),
- TREE_TYPE (elt)))
- elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
- TREE_TYPE (vectype), elt);
- init_elts.quick_push (elt);
+ /* Build:
+ [base, base, base, ...]
+ + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
+ gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
+ gcc_assert (flag_associative_math);
+ tree index = build_index_vector (step_vectype, 0, 1);
+ new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]),
+ inits[0]);
+ tree base_vec = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ new_name);
+ tree step_vec = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ steps[0]);
+ vec_init = gimple_build (&init_stmts, FLOAT_EXPR,
+ step_vectype, index);
+ vec_init = gimple_build (&init_stmts, MULT_EXPR,
+ step_vectype, vec_init, step_vec);
+ vec_init = gimple_build (&init_stmts, PLUS_EXPR,
+ step_vectype, vec_init, base_vec);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
}
- /* The number of steps to add to the initial values. */
- unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
- mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept,
- mul_elt, UNSIGNED)
- : build_int_cstu (stept, mul_elt));
+ /* iv_loop is nested in the loop to be vectorized. Generate:
+ vec_step = [S, S, S, S] */
+ t = unshare_expr (steps[0]);
+ gcc_assert (CONSTANT_CLASS_P (t)
+ || TREE_CODE (t) == SSA_NAME);
+ vec_step = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, t);
}
- vec_step = gimple_build_vector (&init_stmts, &step_elts);
vec_steps.safe_push (vec_step);
- tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
if (peel_mul)
- step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype,
- step_mul, peel_mul);
- if (!init_node)
- vec_init = gimple_build_vector (&init_stmts, &init_elts);
+ {
+ if (!step_mul)
+ step_mul = peel_mul;
+ else
+ step_mul = gimple_build (&init_stmts,
+ MINUS_EXPR, step_vectype,
+ step_mul, peel_mul);
+ }
/* Create the induction-phi that defines the induction-operand. */
vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
@@ -10588,9 +10668,38 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* Create the iv update inside the loop */
tree up = vec_step;
if (lupdate_mul)
- up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
- vec_step, lupdate_mul);
- gimple_seq stmts = NULL;
+ {
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the
+ non-final iterations are not always processing VF
+ elements. So vectorize induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
+ vectype, 0, 0);
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
+ else
+ expr = gimple_convert (&stmts, stept, len);
+ lupdate_mul = gimple_build_vector_from_val (&stmts,
+ step_vectype,
+ expr);
+ up = gimple_build (&stmts, MULT_EXPR,
+ step_vectype, vec_step, lupdate_mul);
+ }
+ else
+ up = gimple_build (&init_stmts,
+ MULT_EXPR, step_vectype,
+ vec_step, lupdate_mul);
+ }
vec_def = gimple_convert (&stmts, step_vectype, induc_def);
vec_def = gimple_build (&stmts,
PLUS_EXPR, step_vectype, vec_def, up);
@@ -10602,8 +10711,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (init_node)
vec_init = vect_get_slp_vect_def (init_node, ivn);
if (!nested_in_vect_loop
+ && step_mul
&& !integer_zerop (step_mul))
{
+ gcc_assert (invariant);
vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
vec_step, step_mul);
@@ -10620,8 +10731,11 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (!nested_in_vect_loop)
{
/* Fill up to the number of vectors we need for the whole group. */
- nivs = least_common_multiple (group_size,
- const_nunits) / const_nunits;
+ if (nunits.is_constant (&const_nunits))
+ nivs = least_common_multiple (group_size,
+ const_nunits) / const_nunits;
+ else
+ nivs = 1;
vec_steps.reserve (nivs-ivn);
for (; ivn < nivs; ++ivn)
{
@@ -10634,14 +10748,31 @@ vectorizable_induction (loop_vec_info loop_vinfo,
stmts by adding VF' * stride to the IVs generated above. */
if (ivn < nvects)
{
- unsigned vfp
- = least_common_multiple (group_size, const_nunits) / group_size;
- tree lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept,
- vfp, UNSIGNED)
- : build_int_cstu (stept, vfp));
+ if (nunits.is_constant (&const_nunits))
+ {
+ unsigned vfp = (least_common_multiple (group_size, const_nunits)
+ / group_size);
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ vfp, UNSIGNED)
+ : build_int_cstu (stept, vfp));
+ }
+ else
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ {
+ tree tem = build_int_cst (integer_type_node, nunits);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
+ stept, tem);
+ }
+ else
+ lupdate_mul = build_int_cst (stept, nunits);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ lupdate_mul);
+ }
for (; ivn < nvects; ++ivn)
{
gimple *iv
@@ -10673,7 +10804,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return true;
}
- init_expr = vect_phi_initial_value (phi);
+ tree init_expr = vect_phi_initial_value (phi);
gimple_seq stmts = NULL;
if (!nested_in_vect_loop)