diff mbox

Fix a clash between temp_slot and add_frame_space free stack slot handling (PR middle-end/47893)

Message ID 20110225213121.GV30899@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Feb. 25, 2011, 9:31 p.m. UTC
Hi!

assign_stack_temp_for_type does its own temp_slot handling, in which
it includes the whole difference between old and new frame_offset.
Starting with 4.6 assign_stack_local_1 records and if possible reuses
padding areas, tracking them in frame_space_list, but when
assign_stack_local is called from assign_stack_temp_for_type and
later on the slot is reused for something larger, the padding areas
might be needed.  If frame_space handling is allowed to serve those
padding slots which are already occupied, the same stack location might be
used by two conflicting objects as the testcase below shows.

I've gathered some statistics (see the PR) and it is quite rare that
assign_stack_local_1 from assign_stack_temp_for_type actually queued
anything into frame_space_list (compared to other assign_stack_local
calls), so this patch fixes it by not doing this when assign_stack_local_1
is called from assign_stack_temp_for_type.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-02-25  Bernd Schmidt  <bernds@codesourcery.com>
	    Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/47893
	* rtl.h (ASLK_REDUCE_ALIGN, ASLK_RECORD_PAD): Define.
	(assign_stack_local_1): Change last argument type to int.
	* function.c (assign_stack_local_1): Replace reduce_alignment_ok
	argument with kind.  If bit ASLK_RECORD_PAD is not set in it,
	don't record padding space into frame_space_list nor
	use those areas.
	(assign_stack_local): Adjust caller.
	(assign_stack_temp_for_type): Call assign_stack_local_1 instead
	of assign_stack_local, pass 0 as last argument.
	* caller-save.c (setup_save_areas): Adjust assign_stack_local_1
	callers.

	* gcc.dg/pr47893.c: New test.


	Jakub

Comments

Richard Henderson March 1, 2011, 6:32 a.m. UTC | #1
On 02/26/2011 07:31 AM, Jakub Jelinek wrote:
> 2011-02-25  Bernd Schmidt  <bernds@codesourcery.com>
> 	    Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/47893
> 	* rtl.h (ASLK_REDUCE_ALIGN, ASLK_RECORD_PAD): Define.
> 	(assign_stack_local_1): Change last argument type to int.
> 	* function.c (assign_stack_local_1): Replace reduce_alignment_ok
> 	argument with kind.  If bit ASLK_RECORD_PAD is not set in it,
> 	don't record padding space into frame_space_list nor
> 	use those areas.
> 	(assign_stack_local): Adjust caller.
> 	(assign_stack_temp_for_type): Call assign_stack_local_1 instead
> 	of assign_stack_local, pass 0 as last argument.
> 	* caller-save.c (setup_save_areas): Adjust assign_stack_local_1
> 	callers.
> 
> 	* gcc.dg/pr47893.c: New test.

Ok.


r~
diff mbox

Patch

--- gcc/rtl.h.jj	2011-02-15 15:42:27.000000000 +0100
+++ gcc/rtl.h	2011-02-25 19:30:28.000000000 +0100
@@ -1685,7 +1685,9 @@  extern rtx simplify_subtraction (rtx);
 
 /* In function.c  */
 extern rtx assign_stack_local (enum machine_mode, HOST_WIDE_INT, int);
-extern rtx assign_stack_local_1 (enum machine_mode, HOST_WIDE_INT, int, bool);
+#define ASLK_REDUCE_ALIGN 1
+#define ASLK_RECORD_PAD 2
+extern rtx assign_stack_local_1 (enum machine_mode, HOST_WIDE_INT, int, int);
 extern rtx assign_stack_temp (enum machine_mode, HOST_WIDE_INT, int);
 extern rtx assign_stack_temp_for_type (enum machine_mode,
 				       HOST_WIDE_INT, int, tree);
--- gcc/function.c.jj	2011-02-02 16:30:50.000000000 +0100
+++ gcc/function.c	2011-02-25 19:30:59.000000000 +0100
@@ -355,14 +355,17 @@  add_frame_space (HOST_WIDE_INT start, HO
    -2 means use BITS_PER_UNIT,
    positive specifies alignment boundary in bits.
 
-   If REDUCE_ALIGNMENT_OK is true, it is OK to reduce alignment.
+   KIND has ASLK_REDUCE_ALIGN bit set if it is OK to reduce
+   alignment and ASLK_RECORD_PAD bit set if we should remember
+   extra space we allocated for alignment purposes.  When we are
+   called from assign_stack_temp_for_type, it is not set so we don't
+   track the same stack slot in two independent lists.
 
    We do not round to stack_boundary here.  */
 
 rtx
 assign_stack_local_1 (enum machine_mode mode, HOST_WIDE_INT size,
-		      int align,
-		      bool reduce_alignment_ok ATTRIBUTE_UNUSED)
+		      int align, int kind)
 {
   rtx x, addr;
   int bigend_correction = 0;
@@ -412,7 +415,7 @@  assign_stack_local_1 (enum machine_mode 
 		  /* It is OK to reduce the alignment as long as the
 		     requested size is 0 or the estimated stack
 		     alignment >= mode alignment.  */
-		  gcc_assert (reduce_alignment_ok
+		  gcc_assert ((kind & ASLK_REDUCE_ALIGN)
 		              || size == 0
 			      || (crtl->stack_alignment_estimated
 				  >= GET_MODE_ALIGNMENT (mode)));
@@ -430,21 +433,24 @@  assign_stack_local_1 (enum machine_mode 
 
   if (mode != BLKmode || size != 0)
     {
-      struct frame_space **psp;
-
-      for (psp = &crtl->frame_space_list; *psp; psp = &(*psp)->next)
+      if (kind & ASLK_RECORD_PAD)
 	{
-	  struct frame_space *space = *psp;
-	  if (!try_fit_stack_local (space->start, space->length, size,
-				    alignment, &slot_offset))
-	    continue;
-	  *psp = space->next;
-	  if (slot_offset > space->start)
-	    add_frame_space (space->start, slot_offset);
-	  if (slot_offset + size < space->start + space->length)
-	    add_frame_space (slot_offset + size,
-			     space->start + space->length);
-	  goto found_space;
+	  struct frame_space **psp;
+
+	  for (psp = &crtl->frame_space_list; *psp; psp = &(*psp)->next)
+	    {
+	      struct frame_space *space = *psp;
+	      if (!try_fit_stack_local (space->start, space->length, size,
+					alignment, &slot_offset))
+		continue;
+	      *psp = space->next;
+	      if (slot_offset > space->start)
+		add_frame_space (space->start, slot_offset);
+	      if (slot_offset + size < space->start + space->length)
+		add_frame_space (slot_offset + size,
+				 space->start + space->length);
+	      goto found_space;
+	    }
 	}
     }
   else if (!STACK_ALIGNMENT_NEEDED)
@@ -460,20 +466,26 @@  assign_stack_local_1 (enum machine_mode 
       frame_offset -= size;
       try_fit_stack_local (frame_offset, size, size, alignment, &slot_offset);
 
-      if (slot_offset > frame_offset)
-	add_frame_space (frame_offset, slot_offset);
-      if (slot_offset + size < old_frame_offset)
-	add_frame_space (slot_offset + size, old_frame_offset);
+      if (kind & ASLK_RECORD_PAD)
+	{
+	  if (slot_offset > frame_offset)
+	    add_frame_space (frame_offset, slot_offset);
+	  if (slot_offset + size < old_frame_offset)
+	    add_frame_space (slot_offset + size, old_frame_offset);
+	}
     }
   else
     {
       frame_offset += size;
       try_fit_stack_local (old_frame_offset, size, size, alignment, &slot_offset);
 
-      if (slot_offset > old_frame_offset)
-	add_frame_space (old_frame_offset, slot_offset);
-      if (slot_offset + size < frame_offset)
-	add_frame_space (slot_offset + size, frame_offset);
+      if (kind & ASLK_RECORD_PAD)
+	{
+	  if (slot_offset > old_frame_offset)
+	    add_frame_space (old_frame_offset, slot_offset);
+	  if (slot_offset + size < frame_offset)
+	    add_frame_space (slot_offset + size, frame_offset);
+	}
     }
 
  found_space:
@@ -513,7 +525,7 @@  assign_stack_local_1 (enum machine_mode 
 rtx
 assign_stack_local (enum machine_mode mode, HOST_WIDE_INT size, int align)
 {
-  return assign_stack_local_1 (mode, size, align, false);
+  return assign_stack_local_1 (mode, size, align, ASLK_RECORD_PAD);
 }
 
 
@@ -868,11 +880,13 @@  assign_stack_temp_for_type (enum machine
 	 and round it now.  We also make sure ALIGNMENT is at least
 	 BIGGEST_ALIGNMENT.  */
       gcc_assert (mode != BLKmode || align == BIGGEST_ALIGNMENT);
-      p->slot = assign_stack_local (mode,
-				    (mode == BLKmode
-				     ? CEIL_ROUND (size, (int) align / BITS_PER_UNIT)
-				     : size),
-				    align);
+      p->slot = assign_stack_local_1 (mode,
+				      (mode == BLKmode
+				       ? CEIL_ROUND (size,
+						     (int) align
+						     / BITS_PER_UNIT)
+				       : size),
+				      align, 0);
 
       p->align = align;
 
--- gcc/caller-save.c.jj	2011-01-25 18:40:08.000000000 +0100
+++ gcc/caller-save.c	2011-02-25 19:33:53.000000000 +0100
@@ -647,7 +647,8 @@  setup_save_areas (void)
 		  saved_reg->slot
 		    = assign_stack_local_1
 		      (regno_save_mode[regno][1],
-		       GET_MODE_SIZE (regno_save_mode[regno][1]), 0, true);
+		       GET_MODE_SIZE (regno_save_mode[regno][1]), 0,
+		       ASLK_REDUCE_ALIGN);
 		  if (dump_file != NULL)
 		    fprintf (dump_file, "%d uses a new slot\n", regno);
 		}
@@ -705,7 +706,7 @@  setup_save_areas (void)
 	    regno_save_mem[i][j]
 	      = assign_stack_local_1 (regno_save_mode[i][j],
 				      GET_MODE_SIZE (regno_save_mode[i][j]),
-				      0, true);
+				      0, ASLK_REDUCE_ALIGN);
 
 	    /* Setup single word save area just in case...  */
 	    for (k = 0; k < j; k++)
--- gcc/testsuite/gcc.dg/pr47893.c.jj	2011-02-25 19:34:32.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr47893.c	2011-02-25 19:34:47.000000000 +0100
@@ -0,0 +1,187 @@ 
+/* PR middle-end/47893 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mtune=atom -fno-omit-frame-pointer -fno-strict-aliasing" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+
+extern void abort (void);
+
+struct S
+{
+  unsigned s1:4, s2:2, s3:2, s4:2, s5:2, s6:1, s7:1, s8:1, s9:1, s10:1;
+  int s11:16; unsigned s12:4; int s13:16; unsigned s14:2;
+  int s15:16; unsigned s16:4; int s17:16; unsigned s18:2;
+};
+
+struct T
+{
+  unsigned t[3];
+};
+
+struct U
+{
+  unsigned u1, u2;
+};
+
+struct V;
+
+struct W
+{
+  char w1[24]; struct V *w2; unsigned w3; char w4[28912];
+  unsigned int w5; char w6[60];
+};
+
+struct X
+{
+  unsigned int x[2];
+};
+
+struct V
+{
+  int v1;
+  struct X v2[3];
+  char v3[28];
+};
+
+struct Y
+{
+  void *y1;
+  char y2[3076];
+  struct T y3[32];
+  char y4[1052];
+};
+
+volatile struct S v1 = { .s15 = -1, .s16 = 15, .s17 = -1, .s18 = 3 };
+
+__attribute__ ((noinline, noclone))
+int
+fn1 (int x)
+{
+  int r;
+  __asm__ volatile ("" : "=r" (r) : "0" (1), "r" (x) : "memory");
+  return r;
+}
+
+volatile int cnt;
+
+__attribute__ ((noinline, noclone))
+#ifdef __i386__
+__attribute__ ((regparm (2)))
+#endif
+struct S
+fn2 (struct Y *x, const struct X *y)
+{
+  if (++cnt > 1)
+    abort ();
+  __asm__ volatile ("" : : "r" (x), "r" (y) : "memory");
+  return v1;
+}
+
+__attribute__ ((noinline, noclone))
+void fn3 (void *x, unsigned y, const struct S *z, unsigned w)
+{
+  __asm__ volatile ("" : : "r" (x), "r" (y), "r" (z), "r" (w) : "memory");
+}
+
+volatile struct U v2;
+
+__attribute__ ((noinline, noclone))
+struct U
+fn4 (void *x, unsigned y)
+{
+  __asm__ volatile ("" : : "r" (x), "r" (y) : "memory");
+  return v2;
+}
+
+__attribute__ ((noinline, noclone))
+struct S
+fn5 (void *x)
+{
+  __asm__ volatile ("" : : "r" (x) : "memory");
+  return v1;
+}
+
+volatile struct T v3;
+
+__attribute__ ((noinline, noclone))
+struct T fn6 (void *x)
+{
+  __asm__ volatile ("" : : "r" (x) : "memory");
+  return v3;
+}
+
+__attribute__ ((noinline, noclone))
+struct T fn7 (void *x, unsigned y, unsigned z)
+{
+  __asm__ volatile ("" : : "r" (x), "r" (y), "r" (z) : "memory");
+  return v3;
+}
+
+static void
+fn8 (struct Y *x, const struct V *y)
+{
+  void *a = x->y1;
+  struct S b[4];
+  unsigned i, c;
+  c = fn1 (y->v1);
+  for (i = 0; i < c; i++)
+    b[i] = fn2 (x, &y->v2[i]);
+  fn3 (a, y->v1, b, c);
+}
+
+static inline void
+fn9 (void *x, struct S y __attribute__((unused)))
+{
+  fn4 (x, 8);
+}
+
+static void
+fn10 (struct Y *x)
+{
+  void *a = x->y1;
+  struct T b __attribute__((unused)) = fn6 (a);
+  fn9 (a, fn5 (a));
+}
+
+__attribute__((noinline, noclone))
+int
+fn11 (unsigned int x, void *y, const struct W *z,
+      unsigned int w, const char *v, const char *u)
+{
+  struct Y a, *t;
+  unsigned i;
+  t = &a;
+  __builtin_memset (t, 0, sizeof *t);
+  t->y1 = y;
+  if (x == 0)
+    {
+      if (z->w3 & 1)
+	fn10 (t);
+      for (i = 0; i < w; i++)
+	{
+	  if (v[i] == 0)
+	    t->y3[i] = fn7 (y, 0, u[i]);
+	  else
+	    return 0;
+	}
+    }
+  else
+    for (i = 0; i < w; i++)
+      t->y3[i] = fn7 (y, v[i], u[i]);
+  for (i = 0; i < z->w5; i++)
+    fn8 (t, &z->w2[i]);
+  return 0;
+}
+
+volatile int i;
+const char *volatile p = "";
+
+int
+main ()
+{
+  struct V v = { .v1 = 0 };
+  struct W w = { .w5 = 1, .w2 = &v };
+  fn11 (i + 1, (void *) p, &w, i, (const char *) p, (const char *) p);
+  if (cnt != 1)
+    abort ();
+  return 0;
+}