@@ -10282,7 +10282,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gimple *new_stmt;
gphi *induction_phi;
tree induc_def, vec_dest;
- tree init_expr, step_expr;
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned i;
tree expr;
@@ -10368,7 +10367,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
iv_loop = loop;
gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
- if (slp_node && !nunits.is_constant ())
+ if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1))
{
/* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
@@ -10386,7 +10385,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return false;
}
- step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+ tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
gcc_assert (step_expr != NULL_TREE);
if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
&& !type_has_mode_precision_p (TREE_TYPE (step_expr)))
@@ -10474,9 +10473,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
[i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
if (slp_node)
{
- /* Enforced above. */
- unsigned int const_nunits = nunits.to_constant ();
-
/* The initial values are vectorized, but any lanes > group_size
need adjustment. */
slp_tree init_node
@@ -10498,11 +10494,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* Now generate the IVs. */
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert ((const_nunits * nvects) % group_size == 0);
+ gcc_assert (multiple_p (nunits * nvects, group_size));
unsigned nivs;
+ unsigned HOST_WIDE_INT const_nunits;
if (nested_in_vect_loop)
nivs = nvects;
- else
+ else if (nunits.is_constant (&const_nunits))
{
/* Compute the number of distinct IVs we need. First reduce
group_size if it is a multiple of const_nunits so we get
@@ -10513,21 +10510,42 @@ vectorizable_induction (loop_vec_info loop_vinfo,
nivs = least_common_multiple (group_sizep,
const_nunits) / const_nunits;
}
+ else
+ {
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+ nivs = 1;
+ }
+ gimple_seq init_stmts = NULL;
tree stept = TREE_TYPE (step_vectype);
tree lupdate_mul = NULL_TREE;
if (!nested_in_vect_loop)
{
- /* The number of iterations covered in one vector iteration. */
- unsigned lup_mul = (nvects * const_nunits) / group_size;
- lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept, lup_mul,
- UNSIGNED)
- : build_int_cstu (stept, lup_mul));
+ if (nunits.is_constant ())
+ {
+ /* The number of iterations covered in one vector iteration. */
+ unsigned lup_mul = (nvects * const_nunits) / group_size;
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, lup_mul,
+ UNSIGNED)
+ : build_int_cstu (stept, lup_mul));
+ }
+ else
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ {
+ tree tem = build_int_cst (integer_type_node, vf);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem);
+ }
+ else
+ lupdate_mul = build_int_cst (stept, vf);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ lupdate_mul);
+ }
}
tree peel_mul = NULL_TREE;
- gimple_seq init_stmts = NULL;
if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
{
if (SCALAR_FLOAT_TYPE_P (stept))
@@ -10539,10 +10557,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
peel_mul = gimple_build_vector_from_val (&init_stmts,
step_vectype, peel_mul);
}
+ tree step_mul = NULL_TREE;
unsigned ivn;
auto_vec<tree> vec_steps;
for (ivn = 0; ivn < nivs; ++ivn)
{
+ gimple_seq stmts = NULL;
+ bool invariant = true;
+ if (nunits.is_constant ())
+ {
tree_vector_builder step_elts (step_vectype, const_nunits, 1);
tree_vector_builder init_elts (vectype, const_nunits, 1);
tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
@@ -10570,13 +10593,65 @@ vectorizable_induction (loop_vec_info loop_vinfo,
: build_int_cstu (stept, mul_elt));
}
vec_step = gimple_build_vector (&init_stmts, &step_elts);
- vec_steps.safe_push (vec_step);
- tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
- if (peel_mul)
- step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype,
- step_mul, peel_mul);
+ step_mul = gimple_build_vector (&init_stmts, &mul_elts);
if (!init_node)
vec_init = gimple_build_vector (&init_stmts, &init_elts);
+ }
+ else
+ {
+ if (init_node)
+ ;
+ else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
+ {
+ new_name = gimple_convert (&init_stmts, stept, inits[0]);
+ /* Build the initial value directly from a VEC_SERIES_EXPR. */
+ vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
+ step_vectype, new_name, steps[0]);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
+ }
+ else
+ {
+ /* Build:
+ [base, base, base, ...]
+ + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
+ gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
+ gcc_assert (flag_associative_math);
+ tree index = build_index_vector (step_vectype, 0, 1);
+ new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]),
+ inits[0]);
+ tree base_vec = gimple_build_vector_from_val (&init_stmts, step_vectype,
+ new_name);
+ tree step_vec = gimple_build_vector_from_val (&init_stmts, step_vectype,
+ steps[0]);
+ vec_init = gimple_build (&init_stmts, FLOAT_EXPR, step_vectype, index);
+ vec_init = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_init, step_vec);
+ vec_init = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+ vec_init, base_vec);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
+ }
+ /* iv_loop is nested in the loop to be vectorized. Generate:
+ vec_step = [S, S, S, S] */
+ t = unshare_expr (steps[0]);
+ gcc_assert (CONSTANT_CLASS_P (t)
+ || TREE_CODE (t) == SSA_NAME);
+ vec_step = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, t);
+ }
+ vec_steps.safe_push (vec_step);
+ if (peel_mul)
+ {
+ if (!step_mul)
+ step_mul = peel_mul;
+ else
+ step_mul = gimple_build (&init_stmts,
+ MINUS_EXPR, step_vectype,
+ step_mul, peel_mul);
+ }
/* Create the induction-phi that defines the induction-operand. */
vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
@@ -10587,9 +10662,35 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* Create the iv update inside the loop */
tree up = vec_step;
if (lupdate_mul)
- up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
- vec_step, lupdate_mul);
- gimple_seq stmts = NULL;
+ {
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the non-final
+ iterations are not always processing VF elements. So vectorize
+ induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
+ vectype, 0, 0);
+ expr = gimple_convert (&stmts, stept, unshare_expr (len));
+ lupdate_mul = gimple_build_vector_from_val (&stmts,
+ step_vectype,
+ expr);
+ up = gimple_build (&stmts, MULT_EXPR,
+ step_vectype, vec_step, lupdate_mul);
+ }
+ else
+ up = gimple_build (&init_stmts,
+ MULT_EXPR, step_vectype,
+ vec_step, lupdate_mul);
+ }
vec_def = gimple_convert (&stmts, step_vectype, induc_def);
vec_def = gimple_build (&stmts,
PLUS_EXPR, step_vectype, vec_def, up);
@@ -10601,8 +10702,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (init_node)
vec_init = vect_get_slp_vect_def (init_node, ivn);
if (!nested_in_vect_loop
+ && step_mul
&& !integer_zerop (step_mul))
{
+ gcc_assert (invariant);
vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
vec_step, step_mul);
@@ -10619,8 +10722,11 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (!nested_in_vect_loop)
{
/* Fill up to the number of vectors we need for the whole group. */
- nivs = least_common_multiple (group_size,
- const_nunits) / const_nunits;
+ if (nunits.is_constant ())
+ nivs = least_common_multiple (group_size,
+ const_nunits) / const_nunits;
+ else
+ nivs = 1;
vec_steps.reserve (nivs-ivn);
for (; ivn < nivs; ++ivn)
{
@@ -10633,14 +10739,28 @@ vectorizable_induction (loop_vec_info loop_vinfo,
stmts by adding VF' * stride to the IVs generated above. */
if (ivn < nvects)
{
- unsigned vfp
- = least_common_multiple (group_size, const_nunits) / group_size;
- tree lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept,
- vfp, UNSIGNED)
- : build_int_cstu (stept, vfp));
+ if (nunits.is_constant ())
+ {
+ unsigned vfp = least_common_multiple (group_size, const_nunits) / group_size;
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ vfp, UNSIGNED)
+ : build_int_cstu (stept, vfp));
+ }
+ else
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ {
+ tree tem = build_int_cst (integer_type_node, nunits);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem);
+ }
+ else
+ lupdate_mul = build_int_cst (stept, nunits);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, lupdate_mul);
+ }
for (; ivn < nvects; ++ivn)
{
gimple *iv
@@ -10672,7 +10792,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return true;
}
- init_expr = vect_phi_initial_value (phi);
+ tree init_expr = vect_phi_initial_value (phi);
gimple_seq stmts = NULL;
if (!nested_in_vect_loop)