new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+void foo (int * __restrict x, int *y)
+{
+ x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
+ y = __builtin_assume_aligned (y, __BIGGEST_ALIGNMENT__);
+ for (int i = 0; i < 1024; ++i)
+ {
+ x[4*i+0] = y[4*i+0];
+ x[4*i+1] = y[4*i+2] * 2;
+ x[4*i+2] = y[4*i+0] + 3;
+ x[4*i+3] = y[4*i+2] * 2 - 5;
+ }
+}
+
+/* Check we can handle SLP with gaps and an interleaving scheme. */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_int && vect_int_mult } } } } */
@@ -1080,10 +1080,15 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
stmt_vec_info stmt_info;
FOR_EACH_VEC_ELT (stmts, i, stmt_info)
{
- gimple *stmt = stmt_info->stmt;
swap[i] = 0;
matches[i] = false;
+ if (!stmt_info)
+ {
+ matches[i] = true;
+ continue;
+ }
+ gimple *stmt = stmt_info->stmt;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
@@ -1984,10 +1989,16 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
bool any_permute = false;
+ bool any_null = false;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
{
int load_place;
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ if (! load_info)
+ {
+ load_place = j;
+ any_null = true;
+ }
+ else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
load_place = vect_get_place_in_interleaving_chain
(load_info, first_stmt_info);
else
@@ -1996,6 +2007,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
any_permute |= load_place != j;
load_permutation.quick_push (load_place);
}
+ if (any_null)
+ {
+ gcc_assert (!any_permute);
+ load_permutation.release ();
+ }
if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
{
@@ -3978,24 +3994,11 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
stmt_vec_info first
= DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (loads[0])[0]);
- /* ??? In principle we have to consider a gap up to the next full
- vector, but we have to actually represent a scalar stmt for the
- gaps value so delay handling this. The same is true for
- inbetween gaps which the load places in the load-permutation
- represent. It's probably not worth trying an intermediate packing
- to vectors without gap even if that might handle some more cases.
- Instead get the gap case correct in some way. */
- unsigned group_lanes = 0;
- for (stmt_vec_info s = first; s; s = DR_GROUP_NEXT_ELEMENT (s))
- {
- if ((s == first && DR_GROUP_GAP (s) != 0)
- || (s != first && DR_GROUP_GAP (s) != 1))
- return;
- group_lanes++;
- }
/* Only a power-of-two number of lanes matches interleaving with N levels.
+ The non-SLP path also supports DR_GROUP_SIZE == 3.
??? An even number of lanes could be reduced to 1<<ceil_log2(N)-1 lanes
at each step. */
+ unsigned group_lanes = DR_GROUP_SIZE (first);
if (exact_log2 (group_lanes) == -1)
return;
@@ -4017,11 +4020,19 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
continue;
/* First build (and possibly re-use) a load node for the
- unpermuted group. */
+ unpermuted group. Gaps in the middle and on the end are
+ represented with NULL stmts. */
vec<stmt_vec_info> stmts;
stmts.create (group_lanes);
for (stmt_vec_info s = first; s; s = DR_GROUP_NEXT_ELEMENT (s))
- stmts.quick_push (s);
+ {
+ if (s != first)
+ for (unsigned i = 1; i < DR_GROUP_GAP (s); ++i)
+ stmts.quick_push (NULL);
+ stmts.quick_push (s);
+ }
+ for (unsigned i = 0; i < DR_GROUP_GAP (first); ++i)
+ stmts.quick_push (NULL);
poly_uint64 max_nunits;
bool *matches = XALLOCAVEC (bool, group_lanes);
unsigned limit = 1;