diff mbox series

tree-optimization/115579 - fix wrong code with store-motion

Message ID 20240622132353.04E1813ABD@imap1.dmz-prg2.suse.org
State New
Headers show
Series tree-optimization/115579 - fix wrong code with store-motion | expand

Commit Message

Richard Biener June 22, 2024, 1:23 p.m. UTC
The recent change to relax store motion for variables that cannot have
store data races broke the optimization to share flag vars for stores
that all happen in the same single BB.  The following fixes this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

	PR tree-optimization/115579
	* tree-ssa-loop-im.cc (execute_sm): Return the auxiliary data
	created.
	(hoist_memory_references): Record the flag var that's eventually
	created and re-use it when all stores are in the same BB.

	* gcc.dg/pr115579.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr115579.c | 18 ++++++++++++++++++
 gcc/tree-ssa-loop-im.cc         | 27 ++++++++++++++-------------
 2 files changed, 32 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr115579.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/pr115579.c b/gcc/testsuite/gcc.dg/pr115579.c
new file mode 100644
index 00000000000..04781056723
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr115579.c
@@ -0,0 +1,18 @@ 
+/* { dg-do run } */
+/* { dg-options "-Os -fno-tree-sra" } */
+
+int printf(const char *, ...);
+int a, b = 1, c;
+int main() {
+  int d[2], *e = &d[1];
+  while (a) {
+    int *f = &b;
+    d[1] = 0;
+    *f = 0;
+  }
+  if (c)
+    printf("%d\n", *e);
+  if (b != 1)
+    __builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc
index 3acbd886a0d..61c6339bc35 100644
--- a/gcc/tree-ssa-loop-im.cc
+++ b/gcc/tree-ssa-loop-im.cc
@@ -2269,7 +2269,7 @@  struct sm_aux
    temporary variable is put to the preheader of the loop, and assignments
    to the reference from the temporary variable are emitted to exits.  */
 
-static void
+static sm_aux *
 execute_sm (class loop *loop, im_mem_ref *ref,
 	    hash_map<im_mem_ref *, sm_aux *> &aux_map, bool maybe_mt,
 	    bool use_other_flag_var)
@@ -2345,6 +2345,8 @@  execute_sm (class loop *loop, im_mem_ref *ref,
       lim_data->tgt_loop = loop;
       gsi_insert_before (&gsi, load, GSI_SAME_STMT);
     }
+
+  return aux;
 }
 
 /* sm_ord is used for ordinary stores we can retain order with respect
@@ -2802,20 +2804,18 @@  hoist_memory_references (class loop *loop, bitmap mem_refs,
       hash_map<im_mem_ref *, sm_aux *> aux_map;
 
       /* Execute SM but delay the store materialization for ordered
-	 sequences on exit.  */
-      bool first_p = true;
+	 sequences on exit.  Remember a created flag var and make
+	 sure to re-use it.  */
+      sm_aux *flag_var_aux = nullptr;
       EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
 	{
 	  ref = memory_accesses.refs_list[i];
-	  execute_sm (loop, ref, aux_map, true, !first_p);
-	  first_p = false;
+	  sm_aux *aux = execute_sm (loop, ref, aux_map, true,
+				    flag_var_aux != nullptr);
+	  if (aux->store_flag)
+	    flag_var_aux = aux;
 	}
 
-      /* Get at the single flag variable we eventually produced.  */
-      im_mem_ref *ref
-	= memory_accesses.refs_list[bitmap_first_set_bit (mem_refs)];
-      sm_aux *aux = *aux_map.get (ref);
-
       /* Materialize ordered store sequences on exits.  */
       edge e;
       FOR_EACH_VEC_ELT (exits, i, e)
@@ -2826,13 +2826,14 @@  hoist_memory_references (class loop *loop, bitmap mem_refs,
 	  /* Construct the single flag variable control flow and insert
 	     the ordered seq of stores in the then block.  With
 	     -fstore-data-races we can do the stores unconditionally.  */
-	  if (aux->store_flag)
+	  if (flag_var_aux)
 	    insert_e
 	      = single_pred_edge
 		  (execute_sm_if_changed (e, NULL_TREE, NULL_TREE,
-					  aux->store_flag,
+					  flag_var_aux->store_flag,
 					  loop_preheader_edge (loop),
-					  &aux->flag_bbs, append_cond_position,
+					  &flag_var_aux->flag_bbs,
+					  append_cond_position,
 					  last_cond_fallthru));
 	  execute_sm_exit (loop, insert_e, seq, aux_map, sm_ord,
 			   append_cond_position, last_cond_fallthru);