diff mbox series

[RS6000] Adjust gcc asm for power10

Message ID 20200930073657.GH15011@bubble.grove.modra.org
State New
Headers show
Series [RS6000] Adjust gcc asm for power10 | expand

Commit Message

Alan Modra Sept. 30, 2020, 7:36 a.m. UTC
Generate assembly that is .localentry 1 with @notoc calls to match.

Bootstrapped and regression tested powerpc64le-linux on power8, and
bootstrapped on power10.  (I lost the power10 machine to someone else
before I could build a baseline to compare against.)

gcc/
	* config/rs6000/ppc-asm.h: Support __PCREL__ code.
libgcc/
	* config/rs6000/morestack.S,
	* config/rs6000/tramp.S,
	* config/powerpc/sjlj.S: Support __PCREL__ code.

Comments

Segher Boessenkool Sept. 30, 2020, 10:36 p.m. UTC | #1
On Wed, Sep 30, 2020 at 05:06:57PM +0930, Alan Modra wrote:
> Generate assembly that is .localentry 1 with @notoc calls to match.

What is the purpose of this?  Non-obvious patchexs without any
explanation like that cost needless extra time to review :-/

"Support __PCREL__ code." suggests that it did not even build before?
Or did not work?  Or is this just a perfomance improvement?

> gcc/
> 	* config/rs6000/ppc-asm.h: Support __PCREL__ code.
> libgcc/
> 	* config/rs6000/morestack.S,
> 	* config/rs6000/tramp.S,
> 	* config/powerpc/sjlj.S: Support __PCREL__ code.

The patch does look fine.  Okay for trunk (and backports if those are
wanted; discuss with Bill I guess).  Thanks!

(But please explain the purpose of this, in the commit message if that
makes sense.)


Segher
Alan Modra Sept. 30, 2020, 11:19 p.m. UTC | #2
On Wed, Sep 30, 2020 at 05:36:08PM -0500, Segher Boessenkool wrote:
> On Wed, Sep 30, 2020 at 05:06:57PM +0930, Alan Modra wrote:
> > Generate assembly that is .localentry 1 with @notoc calls to match.
> 
> What is the purpose of this?  Non-obvious patchexs without any
> explanation like that cost needless extra time to review :-/
> 
> "Support __PCREL__ code." suggests that it did not even build before?
> Or did not work?  Or is this just a perfomance improvement?

Sorry, I sometimes credit you with super-human powers.  It's a
performance improvement for libgcc.a.  Calling between functions that
advertise as using the TOC and those that don't, will require linker
call stubs.

To recap, a function that uses a TOC pointer advertises that fact by a
value of 2 or larger in the symbol st_other localentry bits.  A call
advertises that it is from a function that needs to preserve r2 by
using an R_PPC64_REL24 reloc on the call, a function that doesn't have
a valid TOC pointer uses R_PPC64_REL24_NOTOC.

Note that the extra stubs I'm talking about are in statically linked
code.  Calls to shared library functions have no extra overhead due to
mis-matched toc/notoc code.  Those calls need a plt call stub anyway.
Also, indirect calls are not affected.

> > gcc/
> > 	* config/rs6000/ppc-asm.h: Support __PCREL__ code.
> > libgcc/
> > 	* config/rs6000/morestack.S,
> > 	* config/rs6000/tramp.S,
> > 	* config/powerpc/sjlj.S: Support __PCREL__ code.
> 
> The patch does look fine.  Okay for trunk (and backports if those are
> wanted; discuss with Bill I guess).  Thanks!
> 
> (But please explain the purpose of this, in the commit message if that
> makes sense.)
> 
> 
> Segher
Segher Boessenkool Oct. 1, 2020, 12:15 a.m. UTC | #3
Hi Alan,

On Thu, Oct 01, 2020 at 08:49:44AM +0930, Alan Modra wrote:
> On Wed, Sep 30, 2020 at 05:36:08PM -0500, Segher Boessenkool wrote:
> > On Wed, Sep 30, 2020 at 05:06:57PM +0930, Alan Modra wrote:
> > > Generate assembly that is .localentry 1 with @notoc calls to match.
> > 
> > What is the purpose of this?  Non-obvious patchexs without any
> > explanation like that cost needless extra time to review :-/
> > 
> > "Support __PCREL__ code." suggests that it did not even build before?
> > Or did not work?  Or is this just a perfomance improvement?
> 
> Sorry, I sometimes credit you with super-human powers.  It's a
> performance improvement for libgcc.a.  Calling between functions that
> advertise as using the TOC and those that don't, will require linker
> call stubs.

Thanks for the explanation!


Segher
diff mbox series

Patch

diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h
index 48edc9945d7..e0bce9c5aec 100644
--- a/gcc/config/rs6000/ppc-asm.h
+++ b/gcc/config/rs6000/ppc-asm.h
@@ -262,6 +262,14 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #undef toc
 
 #define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#ifdef __PCREL__
+#define JUMP_TARGET(name) GLUE(FUNC_NAME(name),@notoc)
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+	.localentry FUNC_NAME(name),1
+#else
 #define JUMP_TARGET(name) FUNC_NAME(name)
 #define FUNC_START(name) \
 	.type FUNC_NAME(name),@function; \
@@ -270,6 +278,7 @@  FUNC_NAME(name): \
 0:	addis 2,12,(.TOC.-0b)@ha; \
 	addi 2,2,(.TOC.-0b)@l; \
 	.localentry FUNC_NAME(name),.-FUNC_NAME(name)
+#endif /* !__PCREL__ */
 
 #define HIDDEN_FUNC(name) \
   FUNC_START(name) \
diff --git a/libgcc/config/rs6000/morestack.S b/libgcc/config/rs6000/morestack.S
index 1b8ebb5dc3b..ac33c882c30 100644
--- a/libgcc/config/rs6000/morestack.S
+++ b/libgcc/config/rs6000/morestack.S
@@ -55,11 +55,18 @@ 
 	.type name,@function;				\
 name##:
 
+#ifdef __PCREL__
+#define ENTRY(name)					\
+	ENTRY0(name);					\
+	.localentry name, 1
+#define JUMP_TARGET(name) name##@notoc
+#else
 #define ENTRY(name)					\
 	ENTRY0(name);					\
 0:	addis %r2,%r12,.TOC.-0b@ha;			\
         addi %r2,%r2,.TOC.-0b@l;			\
 	.localentry name, .-name
+#endif
 
 #else
 
@@ -81,6 +88,9 @@  BODY_LABEL(name)##:
 
 #define SIZE(name) .size name, .-BODY_LABEL(name)
 
+#ifndef JUMP_TARGET
+#define JUMP_TARGET(name) name
+#endif
 
 	.text
 # Just like __morestack, but with larger excess allocation
@@ -156,7 +166,7 @@  ENTRY0(__morestack)
 	stdu %r1,-MORESTACK_FRAMESIZE(%r1)
 
 	# void __morestack_block_signals (void)
-	bl __morestack_block_signals
+	bl JUMP_TARGET(__morestack_block_signals)
 
 	# void *__generic_morestack (size_t *pframe_size,
 	#			     void *old_stack,
@@ -164,7 +174,7 @@  ENTRY0(__morestack)
 	addi %r3,%r29,NEWSTACKSIZE_SAVE
 	mr %r4,%r29
 	li %r5,0			# no copying from old stack
-	bl __generic_morestack
+	bl JUMP_TARGET(__generic_morestack)
 
 # Start using new stack
 	stdu %r29,-32(%r3)		# back-chain
@@ -183,7 +193,7 @@  ENTRY0(__morestack)
 	std %r3,-0x7000-64(%r13)	# tcbhead_t.__private_ss
 
 	# void __morestack_unblock_signals (void)
-	bl __morestack_unblock_signals
+	bl JUMP_TARGET(__morestack_unblock_signals)
 
 # Set up for a call to the target function, located 3
 # instructions after __morestack's return address.
@@ -218,11 +228,11 @@  ENTRY0(__morestack)
 	std %r10,PARAMREG_SAVE+56(%r29)
 #endif
 
-	bl __morestack_block_signals
+	bl JUMP_TARGET(__morestack_block_signals)
 
 	# void *__generic_releasestack (size_t *pavailable)
 	addi %r3,%r29,NEWSTACKSIZE_SAVE
-	bl __generic_releasestack
+	bl JUMP_TARGET(__generic_releasestack)
 
 # Reset __private_ss stack guard to value for old stack
 	ld %r12,NEWSTACKSIZE_SAVE(%r29)
@@ -231,7 +241,7 @@  ENTRY0(__morestack)
 .LEHE0:
 	std %r3,-0x7000-64(%r13)	# tcbhead_t.__private_ss
 
-	bl __morestack_unblock_signals
+	bl JUMP_TARGET(__morestack_unblock_signals)
 
 # Use old stack again.
 	mr %r1,%r29
@@ -260,13 +270,15 @@  cleanup:
 	std %r3,PARAMREG_SAVE(%r29)	# Save exception header
 	# size_t __generic_findstack (void *stack)
 	mr %r3,%r29
-	bl __generic_findstack
+	bl JUMP_TARGET(__generic_findstack)
 	sub %r3,%r29,%r3
 	addi %r3,%r3,BACKOFF
 	std %r3,-0x7000-64(%r13)	# tcbhead_t.__private_ss
 	ld %r3,PARAMREG_SAVE(%r29)
-	bl _Unwind_Resume
+	bl JUMP_TARGET(_Unwind_Resume)
+#ifndef __PCREL__
 	nop
+#endif
 	.cfi_endproc
 	SIZE (__morestack)
 
@@ -310,7 +322,7 @@  ENTRY(__stack_split_initialize)
 	# void __generic_morestack_set_initial_sp (void *sp, size_t len)
 	mr %r3,%r1
 	li %r4, 0x4000
-	b __generic_morestack_set_initial_sp
+	b JUMP_TARGET(__generic_morestack_set_initial_sp)
 # The lack of .cfi_endproc here is deliberate.  This function and the
 # following ones can all use the default FDE.
 	SIZE (__stack_split_initialize)
diff --git a/libgcc/config/rs6000/tramp.S b/libgcc/config/rs6000/tramp.S
index 9b8fcfe34a3..b165faa31ec 100644
--- a/libgcc/config/rs6000/tramp.S
+++ b/libgcc/config/rs6000/tramp.S
@@ -140,15 +140,21 @@  trampoline_size = .-trampoline_initial
 /* R5 = function address */
 /* R6 = static chain */
 
+#ifndef __PCREL__
 	.pushsection ".toc","aw"
 .LC0:
 	.quad	trampoline_initial-8
 	.popsection
+#endif
 
 FUNC_START(__trampoline_setup)
 	.cfi_startproc
+#ifdef __PCREL__
+	pla 7,(trampoline_initial-8)@pcrel
+#else
 	addis 7,2,.LC0@toc@ha
 	ld 7,.LC0@toc@l(7)	/* trampoline address -8 */
+#endif
 
 	li	r8,trampoline_size	/* verify that the trampoline is big enough */
 	cmpw	cr1,r8,r4
diff --git a/libitm/config/powerpc/sjlj.S b/libitm/config/powerpc/sjlj.S
index a963fc350bb..bcb0e73a285 100644
--- a/libitm/config/powerpc/sjlj.S
+++ b/libitm/config/powerpc/sjlj.S
@@ -26,7 +26,23 @@ 
 
 #include "asmcfi.h"
 
-#if defined(__powerpc64__) && _CALL_ELF == 2
+#if defined(__powerpc64__) && _CALL_ELF == 2 && defined(__PCREL__)
+.macro FUNC name
+	.globl	\name
+	.type	\name, @function
+\name:
+	.localentry \name, 1
+.endm
+.macro END name
+	.size	\name, . - \name
+.endm
+.macro HIDDEN name
+	.hidden	\name
+.endm
+.macro CALL name
+	bl	\name @notoc
+.endm
+#elif defined(__powerpc64__) && _CALL_ELF == 2
 .macro FUNC name
         .globl  \name
         .type   \name, @function