===================================================================
@@ -15,7 +15,8 @@ main1 (unsigned int x, unsigned int y, u
int i;
unsigned int a0, a1, a2, a3;
- /* pin and pout may alias. */
+ /* pin and pout may alias. But since all the loads are before the first
store
+ the basic block is vectorizable. */
a0 = *pin++ + 23;
a1 = *pin++ + 142;
a2 = *pin++ + 2;
@@ -26,6 +27,9 @@ main1 (unsigned int x, unsigned int y, u
*pout++ = a2 * x;
*pout++ = a3 * y;
+ if (i)
+ __asm__ volatile ("" : : : "memory");
+
/* Check results. */
if (out[0] != (in[0] + 23) * x
|| out[1] != (in[1] + 142) * y
@@ -45,6 +49,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0
"slp" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1
"slp" { target vect_hw_misalign } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
===================================================================
@@ -0,0 +1,53 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int
*pout)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+
+ /* pin and pout may alias, and loads and stores are mixed. The basic
block
+ cannot be vectorized. */
+ a0 = *pin++ + 23;
+ *pout++ = a0 * x;
+ a1 = *pin++ + 142;
+ *pout++ = a1 * y;
+ a2 = *pin++ + 2;
+ *pout++ = a2 * x;
+ a3 = *pin++ + 31;
+ *pout++ = a3 * y;
+
+ if (i)
+ __asm__ volatile ("" : : : "memory");
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3, &in[0], &out[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0
"slp" } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
===================================================================
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+ int i;
+ unsigned int a0, a1, a2, a3;
+ unsigned int *pin = &in[0];
+ unsigned int *pout = &out[0];
+
+ /* pin and pout are different, so despite the fact that loads and stores
+ are mixed the basic block is vectorizable. */
+ a0 = *pin++ + 23;
+ *pout++ = a0 * x;
+ a1 = *pin++ + 142;
+ *pout++ = a1 * y;
+ a2 = *pin++ + 2;
+ *pout++ = a2 * x;
+ a3 = *pin++ + 31;
+ *pout++ = a3 * y;
+
+ if (i)
+ __asm__ volatile ("" : : : "memory");
+
+ /* Check results. */
+ if (out[0] != (in[0] + 23) * x
+ || out[1] != (in[1] + 142) * y
+ || out[2] != (in[2] + 2) * x
+ || out[3] != (in[3] + 31) * y)
+ abort();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (2, 3);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1
"slp" { target vect_hw_misalign } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
===================================================================
@@ -1,3 +1,11 @@
+2010-09-01 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the
+ check into different basic blocks. Expect vectorization if
misaligned
+ stores are supported.
+ * gcc.dg/vect/bb-slp-8a.c: New test.
+ * gcc.dg/vect/bb-slp-8b.c: New test.
+
2010-09-01 Richard Guenther <rguenther@suse.de>
* gcc.dg/vect/vect-outer-fir.c: Adjust.
===================================================================
@@ -633,6 +633,32 @@ get_earlier_stmt (gimple stmt1, gimple s
return stmt2;
}
+static inline gimple
+get_later_stmt (gimple stmt1, gimple stmt2)
+{
+ unsigned int uid1, uid2;
+
+ if (stmt1 == NULL)
+ return stmt2;
+
+ if (stmt2 == NULL)
+ return stmt1;
+
+ uid1 = gimple_uid (stmt1);
+ uid2 = gimple_uid (stmt2);
+
+ if (uid1 == 0 || uid2 == 0)
+ return NULL;
+
+ gcc_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec));
+ gcc_assert (uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec));
+
+ if (uid1 > uid2)
+ return stmt1;
+ else
+ return stmt2;
+}
+
static inline bool
is_pattern_stmt_p (stmt_vec_info stmt_info)
{
@@ -776,7 +802,7 @@ extern enum dr_alignment_support vect_su
extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
HOST_WIDE_INT *);
extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info,
- int *);
+ int *, bool *);
extern bool vect_enhance_data_refs_alignment (loop_vec_info);
extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info);
extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
===================================================================
@@ -1378,7 +1378,7 @@ vect_analyze_loop_operations (loop_vec_i
loop_vec_info
vect_analyze_loop (struct loop *loop)
{
- bool ok;
+ bool ok, dummy;
loop_vec_info loop_vinfo;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
@@ -1444,7 +1444,7 @@ vect_analyze_loop (struct loop *loop)
the dependences.
FORNOW: fail at the first data dependence that we encounter. */
- ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
+ ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf,
&dummy);
if (!ok
|| max_vf < min_vf)
{
===================================================================
@@ -322,6 +322,64 @@ vect_equal_offsets (tree offset1, tree o
}
+/* Check dependence between DRA and DRB for basic block vectorization. */
+
+static bool
+vect_drs_dependent_in_basic_block (struct data_reference *dra,
+ struct data_reference *drb)
+{
+ HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
+ gimple earlier_stmt;
+
+ /* We only call this function for pairs of loads and stores, but we
verify
+ it here. */
+ if (DR_IS_READ (dra) == DR_IS_READ (drb))
+ {
+ if (DR_IS_READ (dra))
+ return false;
+ else
+ return true;
+ }
+
+ /* Check that the data-refs have same bases and offsets. If not, we
can't
+ determine if they are dependent. */
+ if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
+ && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
+ || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
+ || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
+ != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
+ || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
+ return true;
+
+ /* Check the types. */
+ type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
(dra))));
+ type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF
(drb))));
+
+ if (type_size_a != type_size_b
+ || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
+ TREE_TYPE (DR_REF (drb))))
+ return true;
+
+ init_a = TREE_INT_CST_LOW (DR_INIT (dra));
+ init_b = TREE_INT_CST_LOW (DR_INIT (drb));
+
+ /* Two different locations - no dependence. */
+ if (init_a != init_b)
+ return false;
+
+ /* We have a read-write dependence. Check that the load is before the
store.
+ When we vectorize basic blocks, vector load can be only before
+ corresponding scalar load, and vector store can be only after its
+ corresponding scalar store. So the order of the acceses is preserved
in
+ case the load is before the store. */
+ earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
+ if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+ return false;
+
+ return true;
+}
+
+
/* Function vect_check_interleaving.
Check if DRA and DRB are a part of interleaving. In case they are,
insert
@@ -495,7 +553,8 @@ vect_mark_for_runtime_alias_test (ddr_p
static bool
vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
- loop_vec_info loop_vinfo, int *max_vf)
+ loop_vec_info loop_vinfo, int *max_vf,
+ bool *data_dependence_in_bb)
{
unsigned int i;
struct loop *loop = NULL;
@@ -554,10 +613,14 @@ vect_analyze_data_ref_dependence (struct
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
}
- /* Mark the statements as unvectorizable. */
- STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
- STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
-
+ /* We do not vectorize basic blocks with write-write dependencies.
*/
+ if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
+ return true;
+
+ /* We deal with read-write dependencies in basic blocks later (by
+ verifying that all the loads in the basic block are before all
the
+ stores). */
+ *data_dependence_in_bb = true;
return false;
}
@@ -577,7 +640,12 @@ vect_analyze_data_ref_dependence (struct
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
}
- return true;
+ /* Do not vectorize basic blcoks with write-write dependences. */
+ if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
+ return true;
+
+ /* Check if this dependence is allowed in basic block vectorization.
*/
+ return vect_drs_dependent_in_basic_block (dra, drb);
}
/* Loop-based vectorization and known data dependence. */
@@ -678,7 +746,8 @@ vect_analyze_data_ref_dependence (struct
bool
vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
- bb_vec_info bb_vinfo, int *max_vf)
+ bb_vec_info bb_vinfo, int *max_vf,
+ bool *data_dependence_in_bb)
{
unsigned int i;
VEC (ddr_p, heap) *ddrs = NULL;
@@ -693,7 +762,8 @@ vect_analyze_data_ref_dependences (loop_
ddrs = BB_VINFO_DDRS (bb_vinfo);
FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
- if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
+ if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
+ data_dependence_in_bb))
return false;
return true;
===================================================================
@@ -1082,6 +1082,24 @@ vect_find_first_load_in_slp_instance (sl
}
+/* Find the last store in SLP INSTANCE. */
+static gimple
+vect_find_last_store_in_slp_instance (slp_instance instance)
+{
+ int i;
+ slp_tree node;
+ gimple last_store = NULL, store;
+
+ node = SLP_INSTANCE_TREE (instance);
+ for (i = 0;
+ VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
+ i++)
+ last_store = get_later_stmt (store, last_store);
+
+ return last_store;
+}
+
+
/* Analyze an SLP instance starting from a group of strided stores. Call
vect_build_slp_tree to build a tree of packed stmts if possible.
Return FALSE if it's impossible to SLP any stmt in the loop. */
@@ -1503,6 +1521,42 @@ vect_slp_analyze_operations (bb_vec_info
return true;
}
+/* Check if loads and stores are mixed in the basic block (in that
+ case if we are not sure that the accesses differ, we can't vectorize
the
+ basic block). Also return FALSE in case that there is statement marked
as
+ not vectorizable. */
+
+static bool
+vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
+{
+ basic_block bb = BB_VINFO_BB (bb_vinfo);
+ gimple_stmt_iterator si;
+ bool detected_store = false;
+ gimple stmt;
+ struct data_reference *dr;
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ stmt = gsi_stmt (si);
+
+ /* We can't allow not analyzed statements, since they may contain
data
+ accesses. */
+ if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
+ return false;
+
+ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ continue;
+
+ dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+ if (DR_IS_READ (dr) && detected_store)
+ return false;
+
+ if (!DR_IS_READ (dr))
+ detected_store = true;
+ }
+
+ return true;
+}