@@ -1,5 +1,20 @@
2010-10-20 Sebastian Pop <sebastian.pop@amd.com>
+ PR tree-optimization/46029
+ * doc/invoke.texi (-ftree-loop-if-convert-stores): Update description.
+ * tree-if-conv.c (has_unaligned_memory_refs): New.
+ (if_convertible_gimple_assign_stmt_p): Call has_unaligned_memory_refs.
+ (create_scratchpad): New.
+ (create_indirect_cond_expr): New.
+ (predicate_mem_writes): Call create_indirect_cond_expr. Take an extra
+ parameter for scratch_pad.
+ (combine_blocks): Same.
+ (tree_if_conversion): Same.
+ (main_tree_if_conversion): Pass to tree_if_conversion a pointer to
+ scratch_pad.
+
+2010-10-20 Sebastian Pop <sebastian.pop@amd.com>
+
* tree-if-conv.c (struct ifc_dr): Removed.
(IFC_DR): Removed.
(DR_WRITTEN_AT_LEAST_ONCE): Removed.
@@ -6935,20 +6935,26 @@ if vectorization is enabled.
@item -ftree-loop-if-convert-stores
Attempt to also if-convert conditional jumps containing memory writes.
-This transformation can be unsafe for multi-threaded programs as it
-transforms conditional memory writes into unconditional memory writes.
For example,
@smallexample
for (i = 0; i < N; i++)
if (cond)
- A[i] = expr;
+ A[i] = B[i] + 2;
@end smallexample
would be transformed to
@smallexample
-for (i = 0; i < N; i++)
- A[i] = cond ? expr : A[i];
+void *scratchpad = alloca (64);
+for (i = 0; i < N; i++) @{
+ a = cond ? &A[i] : scratchpad;
+ b = cond ? &B[i] : scratchpad;
+ *a = *b + 2;
+@}
@end smallexample
-potentially producing data races.
+The compiler allocates a scratchpad memory on the stack for each
+function in which the if-conversion of memory stores or reads
+happened. This scratchpad memory is used during the part of the
+computation that is discarded, i.e., when the condition is evaluated
+to false.
@item -ftree-loop-distribution
Perform loop distribution. This flag can improve cache performance on
@@ -1,5 +1,12 @@
2010-10-20 Sebastian Pop <sebastian.pop@amd.com>
+ PR tree-optimization/46029
+ * g++.dg/tree-ssa/ifc-pr46029.C: New.
+ * gcc.dg/tree-ssa/ifc-8.c: New.
+ * gcc.dg/tree-ssa/ifc-5.c: Make it exactly like the FFmpeg kernel.
+
+2010-10-20 Sebastian Pop <sebastian.pop@amd.com>
+
* gcc.dg/tree-ssa/flat-loop-1.c: New.
* gcc.dg/tree-ssa/flat-loop-2.c: New.
* gcc.dg/tree-ssa/flat-loop-3.c: New.
new file mode 100644
@@ -0,0 +1,76 @@
+// { dg-do run }
+/* { dg-options "-O -ftree-loop-if-convert-stores" } */
+
+namespace
+{
+ struct rb_tree_node_
+ {
+ rb_tree_node_ ():m_p_left (0), m_p_parent (0), m_metadata (0)
+ {
+ }
+ unsigned &get_metadata ()
+ {
+ return m_metadata;
+ }
+ rb_tree_node_ *m_p_left;
+ rb_tree_node_ *m_p_parent;
+ unsigned m_metadata;
+ };
+
+ struct bin_search_tree_const_node_it_
+ {
+ bin_search_tree_const_node_it_ (rb_tree_node_ * p_nd):m_p_nd (p_nd)
+ {
+ }
+ unsigned &get_metadata ()
+ {
+ return m_p_nd->get_metadata ();
+ }
+ bin_search_tree_const_node_it_ get_l_child ()
+ {
+ return bin_search_tree_const_node_it_ (m_p_nd->m_p_left);
+ }
+
+ rb_tree_node_ *m_p_nd;
+ };
+
+ struct bin_search_tree_no_data_
+ {
+ typedef rb_tree_node_ *node_pointer;
+ bin_search_tree_no_data_ ():m_p_head (new rb_tree_node_ ())
+ {
+ }
+ void insert_imp_empty (int r_value)
+ {
+ rb_tree_node_ *p_new_node = new rb_tree_node_ ();
+ m_p_head->m_p_parent = p_new_node;
+ p_new_node->m_p_parent = m_p_head;
+ update_to_top (m_p_head->m_p_parent);
+ }
+ void apply_update (bin_search_tree_const_node_it_ nd_it)
+ {
+ unsigned
+ l_max_endpoint
+ =
+ (nd_it.get_l_child ().m_p_nd ==
+ 0) ? 0 : nd_it.get_l_child ().get_metadata ();
+ nd_it.get_metadata () = l_max_endpoint;
+ }
+ void update_to_top (node_pointer p_nd)
+ {
+ while (p_nd != m_p_head)
+ {
+ apply_update (p_nd);
+ p_nd = p_nd->m_p_parent;
+ }
+ }
+
+ rb_tree_node_ * m_p_head;
+ };
+}
+
+int main ()
+{
+ bin_search_tree_no_data_ ().insert_imp_empty (0);
+ return 0;
+}
@@ -12,11 +12,18 @@ dct_unquantize_h263_inter_c (short *block, int n, int qscale, int nCoeffs)
for (i = 0; i <= nCoeffs; i++)
{
level = block[i];
- if (level < 0)
- level = level * qmul - qadd;
- else
- level = level * qmul + qadd;
- block[i] = level;
+ if (level)
+ {
+ if (level < 0)
+ {
+ level = level * qmul - qadd;
+ }
+ else
+ {
+ level = level * qmul + qadd;
+ }
+ block[i] = level;
+ }
}
}
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-c -O2 -ftree-vectorize" { target *-*-* } } */
+
+typedef union tree_node *tree;
+struct tree_common
+{
+ unsigned volatile_flag : 1;
+ unsigned unsigned_flag : 1;
+};
+struct tree_type
+{
+ tree next_variant;
+ tree main_variant;
+};
+union tree_node
+{
+ struct tree_common common;
+ struct tree_type type;
+};
+void finish_enum (tree enumtype)
+{
+ tree tem;
+ for (tem = ((enumtype)->type.main_variant); tem; tem = ((tem)->type.next_variant))
+ {
+ if (tem == enumtype)
+ continue;
+ ((tem)->common.unsigned_flag) = ((enumtype)->common.unsigned_flag);
+ }
+}
@@ -459,6 +459,36 @@ ifcvt_could_trap_p (gimple stmt)
return gimple_could_trap_p (stmt);
}
+/* Returns true when stmt contains a data reference */
+
+static bool
+has_unaligned_memory_refs (gimple stmt)
+{
+ int unsignedp, volatilep;
+ HOST_WIDE_INT bitsize, bitpos;
+ tree toffset;
+ enum machine_mode mode;
+ VEC (data_ref_loc, heap) *refs = VEC_alloc (data_ref_loc, heap, 3);
+ bool res = get_references_in_stmt (stmt, &refs);
+ unsigned i;
+ data_ref_loc *ref;
+
+ FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
+ {
+ get_inner_reference (*ref->pos, &bitsize, &bitpos, &toffset,
+ &mode, &unsignedp, &volatilep, true);
+
+ if ((bitpos % BITS_PER_UNIT) != 0)
+ {
+ res = true;
+ break;
+ }
+ }
+
+ VEC_free (data_ref_loc, heap, refs);
+ return res;
+}
+
/* Return true when STMT is if-convertible.
GIMPLE_ASSIGN statement is not if-convertible if,
@@ -501,6 +531,14 @@ if_convertible_gimple_assign_stmt_p (gimple stmt)
fprintf (dump_file, "tree could trap...\n");
return false;
}
+
+ if (has_unaligned_memory_refs (stmt))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "uses misaligned memory...\n");
+ return false;
+ }
+
return true;
}
@@ -1190,6 +1228,78 @@ insert_gimplified_predicates (loop_p loop)
}
}
+/* Insert at the beginning of the first basic block of the current
+ function the allocation on the stack of N bytes of memory and
+ return a pointer to this scratchpad memory. */
+
+static tree
+create_scratchpad (void)
+{
+ basic_block bb = single_succ (ENTRY_BLOCK_PTR);
+ gimple_stmt_iterator gsi = gsi_after_labels (bb);
+
+ /* void *tmp = __builtin_alloca */
+ const char *name = "scratch_pad";
+ tree x = build_int_cst (integer_type_node, 64);
+ gimple stmt = gimple_build_call (built_in_decls[BUILT_IN_ALLOCA], 1, x);
+ tree var = create_tmp_var (ptr_type_node, name);
+ tree tmp = make_ssa_name (var, stmt);
+
+ add_referenced_var (var);
+ gimple_call_set_lhs (stmt, tmp);
+ SSA_NAME_DEF_STMT (tmp) = stmt;
+ update_stmt (stmt);
+
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ return tmp;
+}
+
+/* Returns a memory reference to the pointer defined by the
+ conditional expression: pointer = cond ? &A[i] : scratch_pad; and
+ inserts this code at GSI. */
+
+static tree
+create_indirect_cond_expr (tree ai, tree cond, tree *scratch_pad,
+ gimple_stmt_iterator *gsi)
+{
+ tree type = TREE_TYPE (ai);
+
+ tree pointer_to_type, address_of_ai, addr_expr, cond_expr;
+ tree pointer, star_pointer;
+ gimple addr_stmt, pointer_stmt;
+
+ /* address_of_ai = &A[i]; */
+ pointer_to_type = build_pointer_type (type);
+ address_of_ai = create_tmp_var (pointer_to_type, "_ifc_");
+ add_referenced_var (address_of_ai);
+ addr_expr = build_fold_addr_expr (ai);
+ addr_stmt = gimple_build_assign (address_of_ai, addr_expr);
+ address_of_ai = make_ssa_name (address_of_ai, addr_stmt);
+ gimple_assign_set_lhs (addr_stmt, address_of_ai);
+ SSA_NAME_DEF_STMT (address_of_ai) = addr_stmt;
+ update_stmt (addr_stmt);
+ gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
+
+ /* Allocate the scratch pad only once per function. */
+ if (!*scratch_pad)
+ *scratch_pad = create_scratchpad ();
+
+ /* pointer = cond ? address_of_ai : scratch_pad; */
+ pointer = create_tmp_var (pointer_to_type, "_ifc_");
+ add_referenced_var (pointer);
+ cond_expr = build3 (COND_EXPR, pointer_to_type, unshare_expr (cond),
+ address_of_ai, *scratch_pad);
+ pointer_stmt = gimple_build_assign (pointer, cond_expr);
+ pointer = make_ssa_name (pointer, pointer_stmt);
+ gimple_assign_set_lhs (pointer_stmt, pointer);
+ SSA_NAME_DEF_STMT (pointer) = pointer_stmt;
+ update_stmt (pointer_stmt);
+ gsi_insert_before (gsi, pointer_stmt, GSI_SAME_STMT);
+
+ star_pointer = build_simple_mem_ref (pointer);
+ return star_pointer;
+}
+
/* Predicate each write to memory in LOOP.
This function transforms control flow constructs containing memory
@@ -1201,10 +1311,19 @@ insert_gimplified_predicates (loop_p loop)
into the following form that does not contain control flow:
- | for (i = 0; i < N; i++)
- | A[i] = cond ? expr : A[i];
+ | void *scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
+ |
+ | for (i = 0; i < N; i++) {
+ | p = cond ? &A[i] : scratch_pad;
+ | *p = expr;
+ | }
+
+ SCRATCH_PAD is allocated on the stack for each function once and it is
+ large enough to contain any kind of scalar assignment or read. All
+ values read or written to SCRATCH_PAD are not used in the computation.
- The original CFG looks like this:
+ In a more detailed way, the if-conversion of memory writes works
+ like this, supposing that the original CFG looks like this:
| bb_0
| i = 0
@@ -1254,10 +1373,12 @@ insert_gimplified_predicates (loop_p loop)
| goto bb_1
| end_bb_4
- predicate_mem_writes is then predicating the memory write as follows:
+ predicate_mem_writes is then allocating SCRATCH_PAD in the basic block
+ preceding the loop header, and is predicating the memory write:
| bb_0
| i = 0
+ | scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
| end_bb_0
|
| bb_1
@@ -1265,12 +1386,14 @@ insert_gimplified_predicates (loop_p loop)
| end_bb_1
|
| bb_2
+ | cond = some_computation;
| if (cond) goto bb_3 else goto bb_4
| end_bb_2
|
| bb_3
| cond = some_computation;
- | A[i] = cond ? expr : A[i];
+ | p = cond ? &A[i] : scratch_pad;
+ | *p = expr;
| goto bb_4
| end_bb_3
|
@@ -1283,12 +1406,14 @@ insert_gimplified_predicates (loop_p loop)
| bb_0
| i = 0
+ | scratch_pad = alloca (MAX_TYPE_SIZE_IN_BYTES);
| if (i < N) goto bb_5 else goto bb_1
| end_bb_0
|
| bb_1
| cond = some_computation;
- | A[i] = cond ? expr : A[i];
+ | p = cond ? &A[i] : scratch_pad;
+ | *p = expr;
| if (i < N) goto bb_5 else goto bb_4
| end_bb_1
|
@@ -1298,7 +1423,7 @@ insert_gimplified_predicates (loop_p loop)
*/
static void
-predicate_mem_writes (loop_p loop)
+predicate_mem_writes (loop_p loop, tree *scratch_pad)
{
unsigned int i, orig_loop_num_nodes = loop->num_nodes;
@@ -1313,20 +1438,35 @@ predicate_mem_writes (loop_p loop)
continue;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- if ((stmt = gsi_stmt (gsi))
- && gimple_assign_single_p (stmt)
- && gimple_vdef (stmt))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
- tree type = TREE_TYPE (lhs);
-
- lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
- rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
- rhs = build3 (COND_EXPR, type, unshare_expr (cond), rhs, lhs);
- gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
- update_stmt (stmt);
- }
+ {
+ stmt = gsi_stmt (gsi);
+ if (gimple_assign_single_p (stmt)
+ && gimple_vdef (stmt))
+ {
+ /* A[i] = x; */
+ tree ai = gimple_assign_lhs (stmt);
+
+ /* pointer = cond ? &A[i] : scratch_pad; */
+ tree star_pointer = create_indirect_cond_expr (ai, cond,
+ scratch_pad, &gsi);
+ /* *pointer = x; */
+ gimple_assign_set_lhs (stmt, star_pointer);
+ update_stmt (stmt);
+ }
+ else if (gimple_assign_single_p (stmt)
+ && gimple_vuse (stmt))
+ {
+ /* x = A[i]; */
+ tree ai = gimple_assign_rhs1 (stmt);
+
+ /* pointer = cond ? &A[i] : scratch_pad; */
+ tree star_pointer = create_indirect_cond_expr (ai, cond,
+ scratch_pad, &gsi);
+ /* x = *pointer; */
+ gimple_assign_set_rhs1 (stmt, star_pointer);
+ update_stmt (stmt);
+ }
+ }
}
}
@@ -1376,7 +1516,7 @@ remove_conditions_and_labels (loop_p loop)
blocks. Replace PHI nodes with conditional modify expressions. */
static void
-combine_blocks (struct loop *loop)
+combine_blocks (struct loop *loop, tree *scratch_pad)
{
basic_block bb, exit_bb, merge_target_bb;
unsigned int orig_loop_num_nodes = loop->num_nodes;
@@ -1389,7 +1529,7 @@ combine_blocks (struct loop *loop)
predicate_all_scalar_phis (loop);
if (flag_tree_loop_if_convert_stores)
- predicate_mem_writes (loop);
+ predicate_mem_writes (loop, scratch_pad);
/* Merge basic blocks: first remove all the edges in the loop,
except for those from the exit block. */
@@ -1478,7 +1618,7 @@ combine_blocks (struct loop *loop)
profitability analysis. Returns true when something changed. */
static bool
-tree_if_conversion (struct loop *loop)
+tree_if_conversion (struct loop *loop, tree *scratch_pad)
{
bool changed = false;
ifc_bbs = NULL;
@@ -1490,7 +1630,7 @@ tree_if_conversion (struct loop *loop)
/* Now all statements are if-convertible. Combine all the basic
blocks into one huge basic block doing the if-conversion
on-the-fly. */
- combine_blocks (loop);
+ combine_blocks (loop, scratch_pad);
if (flag_tree_loop_if_convert_stores)
mark_sym_for_renaming (gimple_vop (cfun));
@@ -1521,12 +1661,13 @@ main_tree_if_conversion (void)
struct loop *loop;
bool changed = false;
unsigned todo = 0;
+ tree scratch_pad = NULL_TREE;
if (number_of_loops () <= 1)
return 0;
FOR_EACH_LOOP (li, loop, 0)
- changed |= tree_if_conversion (loop);
+ changed |= tree_if_conversion (loop, &scratch_pad);
if (changed)
todo |= TODO_cleanup_cfg;