diff mbox series

[RFC] i386: Fix ICE with -mforce-indirect-call and -fsplit-stack [PR89316]

Message ID CAFULd4YwG2JRZCLRFPzhmz9vJm2-FkjHfk_nb60-J0aO5MsODg@mail.gmail.com
State New
Headers show
Series [RFC] i386: Fix ICE with -mforce-indirect-call and -fsplit-stack [PR89316] | expand

Commit Message

Uros Bizjak Nov. 20, 2023, 4:33 p.m. UTC
With the above two options, use a temporary register regno (as returned
from split_stack_prologue_scratch_regno) as an indirect call scratch
register to hold __morestack function address.  On 64-bit targets, two
temporary registers are always available, so load the function address in
%r11 and call __morestack_large_model with its one-argument-register value
in %r10.  On 32-bit targets, bail out with a "sorry" if the temporary
register can not be obtained.

On 32-bit targets, also emit a PIC sequence that re-uses the obtained indirect
call scratch register before moving the function address to it.  We can
not set up %ebx PIC register in this case, but __morestack is prepared
for this situation and sets it up by itself.

    PR target/89316

gcc/ChangeLog:

    * config/i386/i386.cc (ix86_expand_split_stack_prologue): Obtain
    scratch regno when flag_force_indirect_call is set.  On 64-bit
    targets, call __morestack_large_model when flag_force_indirect_call
    is set and on 32-bit targets with -fpic, manually expand PIC sequence
    to call __morestack.  Move the function address to an indirect
    call scratch register.

gcc/testsuite/ChangeLog:

    * g++.target/i386/pr89316.C: New test.
    * gcc.target/i386/pr112605-1.c: New test.
    * gcc.target/i386/pr112605-2.c: New test.
    * gcc.target/i386/pr112605.c: New test.

Jakub, I'm not entirely sure x86_32 PIC sequence is 100% correct
(please note that the missing %ebx setup situation is handled in
__morestack), so I'd be very grateful for your review of this part.

Uros.

Comments

Uros Bizjak Nov. 23, 2023, 3:20 p.m. UTC | #1
On Mon, Nov 20, 2023 at 5:33 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> With the above two options, use a temporary register regno (as returned
> from split_stack_prologue_scratch_regno) as an indirect call scratch
> register to hold __morestack function address.  On 64-bit targets, two
> temporary registers are always available, so load the function address in
> %r11 and call __morestack_large_model with its one-argument-register value
> in %r10.  On 32-bit targets, bail out with a "sorry" if the temporary
> register can not be obtained.
>
> On 32-bit targets, also emit a PIC sequence that re-uses the obtained indirect
> call scratch register before moving the function address to it.  We can
> not set up %ebx PIC register in this case, but __morestack is prepared
> for this situation and sets it up by itself.
>
>     PR target/89316
>
> gcc/ChangeLog:
>
>     * config/i386/i386.cc (ix86_expand_split_stack_prologue): Obtain
>     scratch regno when flag_force_indirect_call is set.  On 64-bit
>     targets, call __morestack_large_model when flag_force_indirect_call
>     is set and on 32-bit targets with -fpic, manually expand PIC sequence
>     to call __morestack.  Move the function address to an indirect
>     call scratch register.
>
> gcc/testsuite/ChangeLog:
>
>     * g++.target/i386/pr89316.C: New test.
>     * gcc.target/i386/pr112605-1.c: New test.
>     * gcc.target/i386/pr112605-2.c: New test.
>     * gcc.target/i386/pr112605.c: New test.
>
> Jakub, I'm not entirely sure x86_32 PIC sequence is 100% correct
> (please note that the missing %ebx setup situation is handled in
> __morestack), so I'd be very grateful for your review of this part.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 683ac643bc8..a37f683d895 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -10353,6 +10353,7 @@  ix86_expand_split_stack_prologue (void)
   rtx_code_label *label;
   rtx limit, current, allocate_rtx, call_fusage;
   rtx_insn *call_insn;
+  unsigned int scratch_regno = INVALID_REGNUM;
   rtx scratch_reg = NULL_RTX;
   rtx_code_label *varargs_label = NULL;
   rtx fn;
@@ -10375,11 +10376,16 @@  ix86_expand_split_stack_prologue (void)
 
   limit = ix86_split_stack_guard ();
 
-  if (allocate < SPLIT_STACK_AVAILABLE)
-    current = stack_pointer_rtx;
-  else
+  if (allocate >= SPLIT_STACK_AVAILABLE
+      || flag_force_indirect_call)
+    {
+      scratch_regno = split_stack_prologue_scratch_regno ();
+      if (scratch_regno == INVALID_REGNUM)
+	return;
+    }
+
+  if (allocate >= SPLIT_STACK_AVAILABLE)
     {
-      unsigned int scratch_regno;
       rtx offset;
 
       /* We need a scratch register to hold the stack pointer minus
@@ -10387,9 +10393,7 @@  ix86_expand_split_stack_prologue (void)
 	 function, the scratch register can be any caller-saved
 	 register which is not used for parameters.  */
       offset = GEN_INT (- allocate);
-      scratch_regno = split_stack_prologue_scratch_regno ();
-      if (scratch_regno == INVALID_REGNUM)
-	return;
+
       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
 	{
@@ -10407,6 +10411,8 @@  ix86_expand_split_stack_prologue (void)
 	}
       current = scratch_reg;
     }
+  else
+    current = stack_pointer_rtx;
 
   ix86_expand_branch (GEU, current, limit, label);
   rtx_insn *jump_insn = get_last_insn ();
@@ -10435,8 +10441,8 @@  ix86_expand_split_stack_prologue (void)
     {
       rtx reg10, reg11;
 
-      reg10 = gen_rtx_REG (Pmode, R10_REG);
-      reg11 = gen_rtx_REG (Pmode, R11_REG);
+      reg10 = gen_rtx_REG (DImode, R10_REG);
+      reg11 = gen_rtx_REG (DImode, R11_REG);
 
       /* If this function uses a static chain, it will be in %r10.
 	 Preserve it across the call to __morestack.  */
@@ -10449,32 +10455,25 @@  ix86_expand_split_stack_prologue (void)
 	  use_reg (&call_fusage, rax);
 	}
 
-      if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
-          && !TARGET_PECOFF)
+      if (flag_force_indirect_call
+	  || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
 	{
 	  HOST_WIDE_INT argval;
 
-	  gcc_assert (Pmode == DImode);
-	  /* When using the large model we need to load the address
-	     into a register, and we've run out of registers.  So we
-	     switch to a different calling convention, and we call a
-	     different function: __morestack_large.  We pass the
-	     argument size in the upper 32 bits of r10 and pass the
-	     frame size in the lower 32 bits.  */
-	  gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
-	  gcc_assert ((args_size & 0xffffffff) == args_size);
-
 	  if (split_stack_fn_large == NULL_RTX)
 	    {
 	      split_stack_fn_large
 		= gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
 	      SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
 	    }
+
 	  if (ix86_cmodel == CM_LARGE_PIC)
 	    {
 	      rtx_code_label *label;
 	      rtx x;
 
+	      gcc_assert (Pmode == DImode);
+
 	      label = gen_label_rtx ();
 	      emit_label (label);
 	      LABEL_PRESERVE_P (label) = 1;
@@ -10487,12 +10486,19 @@  ix86_expand_split_stack_prologue (void)
 	      emit_move_insn (reg11, x);
 	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
 	      x = gen_const_mem (Pmode, x);
-	      emit_move_insn (reg11, x);
+	      fn = copy_to_suggested_reg (x, reg11, Pmode);
 	    }
 	  else
-	    emit_move_insn (reg11, split_stack_fn_large);
+	    fn = split_stack_fn_large;
 
-	  fn = reg11;
+	  /* When using the large model we need to load the address
+	     into a register, and we've run out of registers.  So we
+	     switch to a different calling convention, and we call a
+	     different function: __morestack_large.  We pass the
+	     argument size in the upper 32 bits of r10 and pass the
+	     frame size in the lower 32 bits.  */
+	  gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
+	  gcc_assert ((args_size & 0xffffffff) == args_size);
 
 	  argval = ((args_size << 16) << 16) + allocate;
 	  emit_move_insn (reg10, GEN_INT (argval));
@@ -10508,12 +10514,40 @@  ix86_expand_split_stack_prologue (void)
     }
   else
     {
+      if (flag_force_indirect_call && flag_pic)
+	{
+	  rtx x;
+
+	  gcc_assert (Pmode == SImode);
+
+	  scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
+
+	  emit_insn (gen_set_got (scratch_reg));
+	  x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
+			      UNSPEC_GOT);
+	  x = gen_rtx_CONST (Pmode, x);
+	  x = gen_rtx_PLUS (Pmode, scratch_reg, x);
+	  x = gen_const_mem (Pmode, x);
+	  fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
+	}
+
       rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
       insn = emit_insn (gen_push (allocate_rtx));
       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
       pop = GEN_INT (2 * UNITS_PER_WORD);
     }
+
+  if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
+    {
+      scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
+
+      if (GET_MODE (fn) != word_mode)
+	fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
+
+      fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
+    }
+
   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
 				GEN_INT (UNITS_PER_WORD), constm1_rtx,
 				pop, false);
@@ -10558,7 +10592,6 @@  ix86_expand_split_stack_prologue (void)
      slot.  */
   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
     {
-      unsigned int scratch_regno;
       rtx frame_reg;
       int words;
 
diff --git a/gcc/testsuite/g++.target/i386/pr89316.C b/gcc/testsuite/g++.target/i386/pr89316.C
new file mode 100644
index 00000000000..2f05ef751a5
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr89316.C
@@ -0,0 +1,6 @@ 
+// PR target/89316
+// { dg-do compile }
+// { dg-require-effective-target split_stack }
+// { dg-options "-fsplit-stack -mforce-indirect-call" }
+
+struct foo { foo (); } foobar;
diff --git a/gcc/testsuite/gcc.target/i386/pr112605-1.c b/gcc/testsuite/gcc.target/i386/pr112605-1.c
new file mode 100644
index 00000000000..57b91909f0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112605-1.c
@@ -0,0 +1,7 @@ 
+/* PR target/112605 */
+/* { dg-do compile } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-require-effective-target fpic } */
+/* { dg-options "-fsplit-stack -fpic -mforce-indirect-call" } */
+
+#include "pr112605.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr112605-2.c b/gcc/testsuite/gcc.target/i386/pr112605-2.c
new file mode 100644
index 00000000000..7db9aba3abc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112605-2.c
@@ -0,0 +1,7 @@ 
+/* PR target/112605 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-require-effective-target fpic } */
+/* { dg-options "-fsplit-stack -fpic -mcmodel=large -mforce-indirect-call " } */
+
+#include "pr112605.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr112605.c b/gcc/testsuite/gcc.target/i386/pr112605.c
new file mode 100644
index 00000000000..da50a0f6d94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112605.c
@@ -0,0 +1,24 @@ 
+/* PR target/112605 */
+/* { dg-do compile } */
+/* { dg-require-effective-target split_stack } */
+/* { dg-options "-fsplit-stack -mforce-indirect-call" } */
+
+int x;
+int y;
+
+void __attribute__((noinline)) f1(void)
+{
+	x++;
+}
+
+static __attribute__((noinline)) void f3(void)
+{
+	y++;
+}
+
+void f2()
+{
+	f1();
+	f3();
+	f1();
+}