diff mbox

[Darwin/PPC] implement out-of-line FPR/GPR saves/restores.

Message ID AD60FAE3-6703-4D30-A386-4441179574F7@sandoe-acoustics.co.uk
State New
Headers show

Commit Message

Iain Sandoe Oct. 28, 2011, 10:40 a.m. UTC
On 14 Oct 2011, at 10:29, Mike Stump wrote:

> On Oct 14, 2011, at 2:05 AM, Iain Sandoe wrote:
>> This implements their use and also the GPRs - the latter makes an  
>> appreciable reduction in code size,
>
>> OK for trunk?
>
> Ok.  Watch for problems with async stack walking (hitting sample in  
> Activity Monitor, or the walking done by CrashReporter)...  that's  
> the only thing I can think of that might be strange.

This has taken some time to apply because of various bootstrap issues  
(version applied is attached)

In answer to your observation;
I didn't expect problems with FPR saves because the vendor's tools  
implement those.

To test what you suggested I built some code that dropped down a few  
stack levels (with saves of FPR/GPR) and then either aborts or spins  
on a sleep.

The crashlogs from the abort() and the instrumentation samples from  
the sleep were OK.

====

During doing this (and checking crosses to aix and eabisim) I noticed  
the following in rs6000/sysv4.h:

/* And similarly for general purpose registers.  */
#define GP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) < 32     \
                                    && !optimize_size)

which gives rise to code (with -Os) like:

main:
         mr 11,1  #,
         stwu 1,-504(1)   #,,
         mflr 0   #,
         bl _savegpr_31   #
         lis 31,.LANCHOR0@ha      # tmp137,

which I doubt is what was intended ....

... copying David in case he feels that should be amended.

cheers
Iain

Comments

Mike Stump Oct. 28, 2011, 6:47 p.m. UTC | #1
On Oct 28, 2011, at 3:40 AM, Iain Sandoe wrote:
> To test what you suggested I built some code that dropped down a few stack levels (with saves of FPR/GPR) and then either aborts or spins on a sleep.

Uhm, that's not enough, for async, you need to spawn threads that do the interesting stuff in those threads and then have the main code abort.
diff mbox

Patch

Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 180609)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,23 @@ 
+2011-10-28  Iain Sandoe  <iains@gcc.gnu.org>
+
+	* config/rs6000/t-darwin (LIB2FUNCS_STATIC_EXTRA): 
+	Move darwin-fpsave.asm from here to ... LIB2FUNCS_EXTRA.
+	(LIB2FUNCS_EXTRA):  Add darwin-gpsave.asm.
+	(TARGET_LIBGCC2_CFLAGS): Ensure that fPIC and -pipe are inherited from
+	config/t-darwin.
+	* config/rs6000/darwin.h (FP_SAVE_INLINE): Adjust to enable.
+	(GP_SAVE_INLINE): Likewise.
+	(SAVE_FP_PREFIX,  SAVE_FP_SUFFIX, RESTORE_FP_PREFIX,
+	RESTORE_FP_SUFFIX): Set to empty strings.
+	* config/rs6000/rs6000.c (rs6000_savres_strategy): Implement for Darwin.
+	(debug_stack_info): Print savres_strategy.
+	(rs6000_savres_routine_name): Implement for Darwin.
+	(rs6000_make_savres_rtx): Adjust used register for Darwin.
+	(rs6000_emit_prologue): Implement out-of-line saves for Darwin.
+	(rs6000_output_function_prologue): Don't emit .extern for Mach-O.
+	(rs6000_emit_epilogue): Implement out-of-line saves for Darwin.
+	* config/rs6000/darwin-gpsave.asm: New file.
+
 2011-10-28  Jakub Jelinek  <jakub@redhat.com>
 
 	* config/i386/sse.md (VI4SD_AVX2): Removed.
Index: gcc/config/rs6000/t-darwin
===================================================================
--- gcc/config/rs6000/t-darwin	(revision 180609)
+++ gcc/config/rs6000/t-darwin	(working copy)
@@ -19,21 +19,21 @@ 
 
 LIB2FUNCS_EXTRA = $(srcdir)/config/rs6000/darwin-tramp.asm \
 	$(srcdir)/config/darwin-64.c \
+	$(srcdir)/config/rs6000/darwin-fpsave.asm  \
+	$(srcdir)/config/rs6000/darwin-gpsave.asm  \
 	$(srcdir)/config/rs6000/darwin-world.asm
 
 LIB2FUNCS_STATIC_EXTRA = \
-	$(srcdir)/config/rs6000/darwin-fpsave.asm  \
 	$(srcdir)/config/rs6000/darwin-vecsave.asm
 
-# The .asm files above are designed to run on all processors,
-# even though they use AltiVec instructions.  -Wa is used because
-# -force_cpusubtype_ALL doesn't work with -dynamiclib.
-#
-# -pipe because there's an assembler bug, 4077127, which causes
-# it to not properly process the first # directive, causing temporary
-# file names to appear in stabs, causing the bootstrap to fail.  Using -pipe
-# works around this by not having any temporary file names.
-TARGET_LIBGCC2_CFLAGS = -Wa,-force_cpusubtype_ALL -pipe -mmacosx-version-min=10.4
+# The .asm files above are designed to run on all processors, even though
+# they use AltiVec instructions.
+# -Wa is used because -force_cpusubtype_ALL doesn't work with -dynamiclib.
+# -mmacosx-version-min=10.4 is used to provide compatibility for code from
+# earlier OSX versions.
 
+TARGET_LIBGCC2_CFLAGS += -Wa,-force_cpusubtype_ALL -mmacosx-version-min=10.4
+
 darwin-fpsave.o:	$(srcdir)/config/rs6000/darwin-asm.h
+darwin-gpsave.o:	$(srcdir)/config/rs6000/darwin-asm.h
 darwin-tramp.o:		$(srcdir)/config/rs6000/darwin-asm.h
Index: gcc/config/rs6000/darwin.h
===================================================================
--- gcc/config/rs6000/darwin.h	(revision 180609)
+++ gcc/config/rs6000/darwin.h	(working copy)
@@ -173,18 +173,27 @@  extern int darwin_emit_branch_islands;
   (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
    + (STACK_POINTER_OFFSET))
 
-/* Define cutoff for using external functions to save floating point.
-   Currently on Darwin, always use inline stores.  */
+/* Define cutoff for using out-of-line functions to save registers.
+   Currently on Darwin, we implement FP and GPR out-of-line-saves plus the
+   special routine for 'save everything'.  */
 
-#undef	FP_SAVE_INLINE
-#define FP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) < 64)
+#undef FP_SAVE_INLINE
+#define FP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) > 60 && (FIRST_REG) < 64)
+
 #undef GP_SAVE_INLINE
-#define GP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) < 32)
+#define GP_SAVE_INLINE(FIRST_REG) ((FIRST_REG) > 29 && (FIRST_REG) < 32)
 
 /* Darwin uses a function call if everything needs to be saved/restored.  */
+
 #undef WORLD_SAVE_P
 #define WORLD_SAVE_P(INFO) ((INFO)->world_save_p)
 
+/* We don't use these on Darwin, they are just place-holders.  */
+#define SAVE_FP_PREFIX ""
+#define SAVE_FP_SUFFIX ""
+#define RESTORE_FP_PREFIX ""
+#define RESTORE_FP_SUFFIX ""
+
 /* The assembler wants the alternate register names, but without
    leading percent sign.  */
 #undef REGISTER_NAMES
@@ -234,12 +243,6 @@  extern int darwin_emit_branch_islands;
 #undef ASM_COMMENT_START
 #define ASM_COMMENT_START ";"
 
-/* FP save and restore routines.  */
-#define	SAVE_FP_PREFIX "._savef"
-#define SAVE_FP_SUFFIX ""
-#define	RESTORE_FP_PREFIX "._restf"
-#define RESTORE_FP_SUFFIX ""
-
 /* This is how to output an assembler line that says to advance
    the location counter to a multiple of 2**LOG bytes using the
    "nop" instruction as padding.  */
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 180609)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -17983,9 +17983,11 @@  rs6000_savres_strategy (rs6000_stack_t *info,
 
   /* Don't bother to try to save things out-of-line if r11 is occupied
      by the static chain.  It would require too much fiddling and the
-     static chain is rarely used anyway.  */
+     static chain is rarely used anyway.  FPRs are saved w.r.t the stack
+     pointer on Darwin.  */
   if (using_static_chain_p)
-    strategy |= SAVE_INLINE_FPRS | SAVE_INLINE_GPRS;
+    strategy |= (DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
+		| SAVE_INLINE_GPRS;
 
   /* If we are going to use store multiple, then don't even bother
      with the out-of-line routines, since the store-multiple
@@ -18033,6 +18035,9 @@  rs6000_savres_strategy (rs6000_stack_t *info,
   if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
     strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
 #endif
+  if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
+    strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
+
   return strategy;
 }
 
@@ -18628,6 +18633,8 @@  debug_stack_info (rs6000_stack_t *info)
   if (info->reg_size != 4)
     fprintf (stderr, "\treg_size            = %5d\n", info->reg_size);
 
+    fprintf (stderr, "\tsave-strategy       =  %04x\n", info->savres_strategy);
+
   fprintf (stderr, "\n");
 }
 
@@ -19573,10 +19580,25 @@  rs6000_savres_routine_name (rs6000_stack_t *info,
 	  suffix = savep ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
 	}
     }
-  else if (DEFAULT_ABI == ABI_DARWIN)
-    sorry ("out-of-line save/restore routines not supported on Darwin");
 
-  sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
+   if (DEFAULT_ABI == ABI_DARWIN)
+    {
+      /* The Darwin approach is (slightly) different, in order to be
+	 compatible with code generated by the system toolchain.  There is a
+	 single symbol for the start of save sequence, and the code here
+	 embeds an offset into that code on the basis of the first register
+	 to be saved.  */
+      prefix = savep ? "save" : "rest" ;
+      if (gpr)
+	sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31",
+	       prefix, (lr ? "x" : ""), (regno == 13 ? "" : "+"),
+	       (regno-13) * 4, prefix, regno);
+      else
+	sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31",
+	       prefix, (regno == 14 ? "" : "+"),  (regno-14) * 4, prefix, regno);
+    }
+  else
+    sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
 
   return savres_routine_name;
 }
@@ -19678,7 +19700,7 @@  rs6000_emit_savres_rtx (rs6000_stack_t *info,
 			bool savep, bool gpr, bool lr)
 {
   int i;
-  int offset, start_reg, end_reg, n_regs;
+  int offset, start_reg, end_reg, n_regs, use_reg;
   int reg_size = GET_MODE_SIZE (reg_mode);
   rtx sym;
   rtvec p;
@@ -19700,11 +19722,12 @@  rs6000_emit_savres_rtx (rs6000_stack_t *info,
 
   sym = rs6000_savres_routine_sym (info, savep, gpr, lr);
   RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
+  use_reg = DEFAULT_ABI == ABI_AIX ? (gpr && !lr ? 12 : 1)
+				   : DEFAULT_ABI == ABI_DARWIN && !gpr ? 1
+								       : 11;
   RTVEC_ELT (p, offset++)
     = gen_rtx_USE (VOIDmode,
-		   gen_rtx_REG (Pmode, DEFAULT_ABI != ABI_AIX ? 11
-				       : gpr && !lr ? 12
-				       : 1));
+		   gen_rtx_REG (Pmode, use_reg));
 
   for (i = 0; i < end_reg - start_reg; i++)
     {
@@ -20148,8 +20171,21 @@  rs6000_emit_prologue (void)
     }
   else if (!WORLD_SAVE_P (info) && !saving_GPRs_inline)
     {
+      if (DEFAULT_ABI == ABI_DARWIN)
+	{
+	  rtx dest_reg = gen_rtx_REG (reg_mode, 11);
+	  if (info->first_fp_reg_save == 64)
+	    /* we only need a copy, no fprs were saved.  */
+	    emit_move_insn (dest_reg, frame_reg_rtx);
+	  else
+	    {
+	      rtx offset = GEN_INT (sp_offset
+				    + (-8 * (64-info->first_fp_reg_save)));
+	      emit_insn (gen_add3_insn (dest_reg, frame_reg_rtx, offset));
+	    }
+	}
       /* Need to adjust r11 (r12) if we saved any FPRs.  */
-      if (info->first_fp_reg_save != 64)
+      else if (info->first_fp_reg_save != 64)
         {
 	  rtx dest_reg = gen_rtx_REG (reg_mode, DEFAULT_ABI == ABI_AIX
 				      ? 12 : 11);
@@ -20515,7 +20551,8 @@  rs6000_output_function_prologue (FILE *file,
 
   /* Write .extern for any function we will call to save and restore
      fp values.  */
-  if (info->first_fp_reg_save < 64)
+  if (info->first_fp_reg_save < 64
+      && !TARGET_MACHO)
     {
       char *name;
       int regno = info->first_fp_reg_save - 32;
@@ -21138,7 +21175,11 @@  rs6000_emit_epilogue (int sibcall)
       if (can_use_exit)
 	{
 	  rs6000_emit_stack_reset (info, sp_reg_rtx, frame_reg_rtx,
-				   sp_offset, can_use_exit);
+				 sp_offset, can_use_exit);
+	  if (DEFAULT_ABI == ABI_DARWIN)
+	    /* we only need a copy, no fprs were saved.  */
+	    emit_move_insn (gen_rtx_REG (reg_mode, 11), frame_reg_rtx);
+
 	  if (info->cr_save_p)
 	    rs6000_restore_saved_cr (cr_save_reg, using_mtcr_multiple);
 	}
Index: gcc/config/rs6000/darwin-gpsave.asm
===================================================================
--- gcc/config/rs6000/darwin-gpsave.asm	(revision 0)
+++ gcc/config/rs6000/darwin-gpsave.asm	(revision 0)
@@ -0,0 +1,118 @@ 
+/*  This file contains the GPR save and restore routines for Darwin.
+ *
+ *   Copyright (C) 2011 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* Contributed by Iain Sandoe  <iains@gcc.gnu.org> */
+
+/* Like their FP and VEC counterparts, these routines have only one externally
+   visible entry point.  Calls have to be constructed as offsets from this.
+   (I.E. callers have to jump to "saveGPR+((x-13)*4" to save registers x..31).
+
+   Each save/load instruction is 4 bytes long (for both m32 and m64 builds).
+
+   The save/restores here are done w.r.t r11.
+
+   restGPRx restores the link reg from the stack and returns to the saved
+   address.
+
+   */
+
+#include "darwin-asm.h"
+
+	.text
+	.align 2
+
+	.private_extern saveGPR
+saveGPR:
+	stg r13,(-19 * GPR_BYTES)(r11)
+	stg r14,(-18 * GPR_BYTES)(r11)
+	stg r15,(-17 * GPR_BYTES)(r11)
+	stg r16,(-16 * GPR_BYTES)(r11)
+	stg r17,(-15 * GPR_BYTES)(r11)
+	stg r18,(-14 * GPR_BYTES)(r11)
+	stg r19,(-13 * GPR_BYTES)(r11)
+	stg r20,(-12 * GPR_BYTES)(r11)
+	stg r21,(-11 * GPR_BYTES)(r11)
+	stg r22,(-10 * GPR_BYTES)(r11)
+	stg r23,( -9 * GPR_BYTES)(r11)
+	stg r24,( -8 * GPR_BYTES)(r11)
+	stg r25,( -7 * GPR_BYTES)(r11)
+	stg r26,( -6 * GPR_BYTES)(r11)
+	stg r27,( -5 * GPR_BYTES)(r11)
+	stg r28,( -4 * GPR_BYTES)(r11)
+	stg r29,( -3 * GPR_BYTES)(r11)
+	stg r30,( -2 * GPR_BYTES)(r11)
+	stg r31,( -1 * GPR_BYTES)(r11)
+	blr
+
+/* */
+
+	.private_extern restGPR
+restGPR:
+	lg r13,(-19 * GPR_BYTES)(r11)
+	lg r14,(-18 * GPR_BYTES)(r11)
+	lg r15,(-17 * GPR_BYTES)(r11)
+	lg r16,(-16 * GPR_BYTES)(r11)
+	lg r17,(-15 * GPR_BYTES)(r11)
+	lg r18,(-14 * GPR_BYTES)(r11)
+	lg r19,(-13 * GPR_BYTES)(r11)
+	lg r20,(-12 * GPR_BYTES)(r11)
+	lg r21,(-11 * GPR_BYTES)(r11)
+	lg r22,(-10 * GPR_BYTES)(r11)
+	lg r23,( -9 * GPR_BYTES)(r11)
+	lg r24,( -8 * GPR_BYTES)(r11)
+	lg r25,( -7 * GPR_BYTES)(r11)
+	lg r26,( -6 * GPR_BYTES)(r11)
+	lg r27,( -5 * GPR_BYTES)(r11)
+	lg r28,( -4 * GPR_BYTES)(r11)
+	lg r29,( -3 * GPR_BYTES)(r11)
+	lg r30,( -2 * GPR_BYTES)(r11)
+	lg r31,( -1 * GPR_BYTES)(r11)
+	blr
+
+	.private_extern restGPRx
+restGPRx:
+	lg r13,(-19 * GPR_BYTES)(r11)
+	lg r14,(-18 * GPR_BYTES)(r11)
+	lg r15,(-17 * GPR_BYTES)(r11)
+	lg r16,(-16 * GPR_BYTES)(r11)
+	lg r17,(-15 * GPR_BYTES)(r11)
+	lg r18,(-14 * GPR_BYTES)(r11)
+	lg r19,(-13 * GPR_BYTES)(r11)
+	lg r20,(-12 * GPR_BYTES)(r11)
+	lg r21,(-11 * GPR_BYTES)(r11)
+	lg r22,(-10 * GPR_BYTES)(r11)
+	lg r23,( -9 * GPR_BYTES)(r11)
+	lg r24,( -8 * GPR_BYTES)(r11)
+	lg r25,( -7 * GPR_BYTES)(r11)
+	lg r26,( -6 * GPR_BYTES)(r11)
+	lg r27,( -5 * GPR_BYTES)(r11)
+	lg r28,( -4 * GPR_BYTES)(r11)
+	lg r29,( -3 * GPR_BYTES)(r11)
+	/* Like the FP restore, we start from the offset for r30
+	   thus a restore of only r31 is not going to work.  */
+	lg r0,SAVED_LR_OFFSET(r1)
+	lg r30,( -2 * GPR_BYTES)(r11)
+	mtlr r0
+	lg r31,( -1 * GPR_BYTES)(r11)
+	blr