@@ -3078,10 +3078,23 @@ start_over:
if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
OPTIMIZE_FOR_SPEED)
&& LOOP_VINFO_LENS (loop_vinfo).length () == 1
- && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1 && !slp
+ && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1
&& (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()))
LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
+
+ /* If any of the SLP instances cover more than a single lane
+ we cannot use .SELECT_VL at the moment, even if the number
+ of lanes is uniform throughout the SLP graph. */
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ for (slp_instance inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
+ if (SLP_TREE_LANES (SLP_INSTANCE_TREE (inst)) != 1
+ && !(SLP_INSTANCE_KIND (inst) == slp_inst_kind_store
+ && SLP_INSTANCE_TREE (inst)->ldst_lanes))
+ {
+ LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = false;
+ break;
+ }
}
/* Decide whether this loop_vinfo should use partial vectors or peeling,
@@ -8744,8 +8744,9 @@ vectorizable_store (vec_info *vinfo,
aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
else
aggr_type = vectype;
- bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
- memory_access_type, loop_lens);
+ if (!costing_p)
+ bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+ memory_access_type, loop_lens);
}
if (mask && !costing_p)
@@ -10820,8 +10821,9 @@ vectorizable_load (vec_info *vinfo,
aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
else
aggr_type = vectype;
- bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
- memory_access_type, loop_lens);
+ if (!costing_p)
+ bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+ memory_access_type, loop_lens);
}
auto_vec<tree> vec_offsets;