diff mbox series

[committed,PR,rtl-optimization/101188] Fix reload_cse_move2add ignoring clobbers

Message ID 52aefdbf-130d-dcb9-63f1-1b451153ccba@gmail.com
State New
Headers show
Series [committed,PR,rtl-optimization/101188] Fix reload_cse_move2add ignoring clobbers | expand

Commit Message

Jeff Law June 12, 2023, 6:55 p.m. UTC
So as Georg-Johann discusses in the BZ, reload_cse_move2add can generate 
incorrect code when optimizing code with clobbers.  Specifically in the 
case where we try to optimize a sequence of 4 operations down to 3 
operations we can reset INSN to the next instruction and continue the loop.

That skips the code to invalidate objects based on things like REG_INC 
nodes, stack pushes and most importantly clobbers attached to the 
current insn.

This patch factors all of the invalidation code used by 
reload_cse_move2add into a new function and calls it at the appropriate 
time.

Georg-Johann has confirmed this patch fixes his avr bug and I've had it 
in my tester over the weekend.  It's bootstrapped and regression tested 
on aarch64, m68k, sh4, alpha and hppa.  It's also regression tested 
successfully on a wide variety of other targets.


Pushing to the trunk momentarily.

Jeff
commit ae193f9008e02683e27f3c87f3b06f38e103b1d0
Author: Jeff Law <jlaw@ventanamicro.com>
Date:   Mon Jun 12 12:52:10 2023 -0600

    [committed] [PR rtl-optimization/101188] Fix reload_cse_move2add ignoring clobbers
    
    So as Georg-Johann discusses in the BZ, reload_cse_move2add can generate
     incorrect code when optimizing code with clobbers.  Specifically in the
    case where we try to optimize a sequence of 4 operations down to 3
    operations we can reset INSN to the next instruction and continue the loop.
    
    That skips the code to invalidate objects based on things like REG_INC
    nodes, stack pushes and most importantly clobbers attached to the current
    insn.
    
    This patch factors all of the invalidation code used by reload_cse_move2add
    into a new function and calls it at the appropriate time.
    
    Georg-Johann has confirmed this patch fixes his avr bug and I've had it in
    my tester over the weekend.  It's bootstrapped and regression tested on
    aarch64, m68k, sh4, alpha and hppa.  It's also regression tested successfully
    on a wide variety of other targets.
    
    gcc/
            PR rtl-optimization/101188
            * postreload.cc (reload_cse_move2add_invalidate): New function,
            extracted from...
            (reload_cse_move2add): Call reload_cse_move2add_invalidate.
    
    gcc/testsuite
            PR rtl-optimization/101188
            * gcc.c-torture/execute/pr101188.c: New test
diff mbox series

Patch

diff --git a/gcc/postreload.cc b/gcc/postreload.cc
index 20e138b4fa8..d5f367071bb 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -1899,6 +1899,79 @@  move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
   return changed;
 }
 
+/* Perform any invalidations necessary for INSN.  */
+
+static void
+reload_cse_move2add_invalidate (rtx_insn *insn)
+{
+  for (rtx note = REG_NOTES (insn); note; note = XEXP (note, 1))
+    {
+      if (REG_NOTE_KIND (note) == REG_INC
+	  && REG_P (XEXP (note, 0)))
+	{
+	  /* Reset the information about this register.  */
+	  int regno = REGNO (XEXP (note, 0));
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    {
+	      move2add_record_mode (XEXP (note, 0));
+	      reg_mode[regno] = VOIDmode;
+	    }
+	}
+    }
+
+  /* There are no REG_INC notes for SP autoinc.  */
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
+    {
+      rtx mem = *iter;
+      if (mem
+	  && MEM_P (mem)
+	  && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
+	{
+	  if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx)
+	    reg_mode[STACK_POINTER_REGNUM] = VOIDmode;
+	}
+    }
+
+  note_stores (insn, move2add_note_store, insn);
+
+  /* If INSN is a conditional branch, we try to extract an
+     implicit set out of it.  */
+  if (any_condjump_p (insn))
+    {
+      rtx cnd = fis_get_condition (insn);
+
+      if (cnd != NULL_RTX
+	  && GET_CODE (cnd) == NE
+	  && REG_P (XEXP (cnd, 0))
+	  && !reg_set_p (XEXP (cnd, 0), insn)
+	  /* The following two checks, which are also in
+	     move2add_note_store, are intended to reduce the
+	     number of calls to gen_rtx_SET to avoid memory
+	     allocation if possible.  */
+	  && SCALAR_INT_MODE_P (GET_MODE (XEXP (cnd, 0)))
+	  && REG_NREGS (XEXP (cnd, 0)) == 1
+	  && CONST_INT_P (XEXP (cnd, 1)))
+	{
+	  rtx implicit_set = gen_rtx_SET (XEXP (cnd, 0), XEXP (cnd, 1));
+	  move2add_note_store (SET_DEST (implicit_set), implicit_set, insn);
+	}
+    }
+
+  /* If this is a CALL_INSN, all call used registers are stored with
+     unknown values.  */
+  if (CALL_P (insn))
+    {
+      function_abi callee_abi = insn_callee_abi (insn);
+      for (int i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
+	if (reg_mode[i] != VOIDmode
+	    && reg_mode[i] != BLKmode
+	    && callee_abi.clobbers_reg_p (reg_mode[i], i))
+	    /* Reset the information about this register.  */
+	  reg_mode[i] = VOIDmode;
+    }
+}
+
 /* Convert move insns with constant inputs to additions if they are cheaper.
    Return true if any changes were made.  */
 static bool
@@ -1921,7 +1994,7 @@  reload_cse_move2add (rtx_insn *first)
   move2add_luid = 2;
   for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++)
     {
-      rtx set, note;
+      rtx set;
 
       if (LABEL_P (insn))
 	{
@@ -2041,6 +2114,12 @@  reload_cse_move2add (rtx_insn *first)
 			delete_insn (insn);
 		      changed |= success;
 		      insn = next;
+		      /* Make sure to perform any invalidations related to
+			 NEXT/INSN since we're going to bypass the normal
+			 flow with the continue below.
+
+			 Do this before recording the new mode/offset.  */
+		      reload_cse_move2add_invalidate (insn);
 		      move2add_record_mode (reg);
 		      reg_offset[regno]
 			= trunc_int_for_mode (added_offset + base_offset,
@@ -2094,74 +2173,7 @@  reload_cse_move2add (rtx_insn *first)
 	      continue;
 	    }
 	}
-
-      for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
-	{
-	  if (REG_NOTE_KIND (note) == REG_INC
-	      && REG_P (XEXP (note, 0)))
-	    {
-	      /* Reset the information about this register.  */
-	      int regno = REGNO (XEXP (note, 0));
-	      if (regno < FIRST_PSEUDO_REGISTER)
-		{
-		  move2add_record_mode (XEXP (note, 0));
-		  reg_mode[regno] = VOIDmode;
-		}
-	    }
-	}
-
-      /* There are no REG_INC notes for SP autoinc.  */
-      subrtx_var_iterator::array_type array;
-      FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
-	{
-	  rtx mem = *iter;
-	  if (mem
-	      && MEM_P (mem)
-	      && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
-	    {
-	      if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx)
-		reg_mode[STACK_POINTER_REGNUM] = VOIDmode;
-	    }
-	}
-
-      note_stores (insn, move2add_note_store, insn);
-
-      /* If INSN is a conditional branch, we try to extract an
-	 implicit set out of it.  */
-      if (any_condjump_p (insn))
-	{
-	  rtx cnd = fis_get_condition (insn);
-
-	  if (cnd != NULL_RTX
-	      && GET_CODE (cnd) == NE
-	      && REG_P (XEXP (cnd, 0))
-	      && !reg_set_p (XEXP (cnd, 0), insn)
-	      /* The following two checks, which are also in
-		 move2add_note_store, are intended to reduce the
-		 number of calls to gen_rtx_SET to avoid memory
-		 allocation if possible.  */
-	      && SCALAR_INT_MODE_P (GET_MODE (XEXP (cnd, 0)))
-	      && REG_NREGS (XEXP (cnd, 0)) == 1
-	      && CONST_INT_P (XEXP (cnd, 1)))
-	    {
-	      rtx implicit_set =
-		gen_rtx_SET (XEXP (cnd, 0), XEXP (cnd, 1));
-	      move2add_note_store (SET_DEST (implicit_set), implicit_set, insn);
-	    }
-	}
-
-      /* If this is a CALL_INSN, all call used registers are stored with
-	 unknown values.  */
-      if (CALL_P (insn))
-	{
-	  function_abi callee_abi = insn_callee_abi (insn);
-	  for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
-	    if (reg_mode[i] != VOIDmode
-		&& reg_mode[i] != BLKmode
-		&& callee_abi.clobbers_reg_p (reg_mode[i], i))
-	      /* Reset the information about this register.  */
-	      reg_mode[i] = VOIDmode;
-	}
+      reload_cse_move2add_invalidate (insn);
     }
   return changed;
 }
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr101188.c b/gcc/testsuite/gcc.c-torture/execute/pr101188.c
new file mode 100644
index 00000000000..1bdb50b3de5
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr101188.c
@@ -0,0 +1,61 @@ 
+/* { dg-require-effective-target indirect_calls } */
+typedef __UINT8_TYPE__ uint8_t;
+typedef __UINT16_TYPE__ uint16_t;
+
+typedef uint8_t (*fn1)(void *a);
+typedef void (*fn2)(void *a, int *arg);
+
+struct S
+{
+    uint8_t buffer[64];
+    uint16_t n;
+    fn2 f2;
+    void *a;
+    fn1 f1;
+};
+
+volatile uint16_t x;
+
+void __attribute__((__noinline__,__noclone__))
+foo (uint16_t n)
+{
+  x = n;
+}
+
+void __attribute__((__noinline__,__noclone__))
+testfn (struct S *self)
+{
+    int arg;
+
+    foo (self->n);
+    self->n++;
+    self->f2 (self->a, &arg);
+    self->buffer[0] = self->f1 (self->a);
+}
+
+static unsigned char myfn2_called = 0;
+
+static void
+myfn2 (void *a, int *arg)
+{
+  myfn2_called = 1;
+}
+
+static uint8_t
+myfn1 (void *a)
+{
+  return 0;
+}
+
+int main (void)
+{
+  struct S s;
+  s.n = 0;
+  s.f2 = myfn2;
+  s.f1 = myfn1;
+  testfn (&s);
+  if (myfn2_called != 1)
+    __builtin_abort();
+  return 0;
+}
+