diff mbox series

[2/3] aarch64: Clean up _dl_runtime_profile

Message ID 20180801222347.18903-3-rth@twiddle.net
State New
Headers show
Series aarch64: Update ld.so for vector abi | expand

Commit Message

Richard Henderson Aug. 1, 2018, 10:23 p.m. UTC
From: Richard Henderson <richard.henderson@linaro.org>

Not adjusting La_aarch64_regs or La_aarch64_retval for the new AdvSIMD
vector ABI; that will require more thought and coordination.  In the
meantime, this will at least pass the proper values to each callee,
even if the values are not visible to auditing.

	* sysdeps/aarch64/dl-trampoline.S (_dl_runtime_profile):
	Do not record unwind info for arguments -- this is unneeded;
	properly include the 16 bytes of PLT stack into the unwind;
	save and restore the structure return pointer, x8;
	save all of the AdvSIMD registers defined for the vector ABI.
---
 sysdeps/aarch64/dl-trampoline.S | 138 ++++++++++++++++----------------
 1 file changed, 71 insertions(+), 67 deletions(-)

Comments

Szabolcs Nagy Aug. 2, 2018, 5:25 p.m. UTC | #1
On 01/08/18 23:23, rth@twiddle.net wrote:
> From: Richard Henderson <richard.henderson@linaro.org>
> 
> Not adjusting La_aarch64_regs or La_aarch64_retval for the new AdvSIMD
> vector ABI; that will require more thought and coordination.  In the
> meantime, this will at least pass the proper values to each callee,
> even if the values are not visible to auditing.
> 
> 	* sysdeps/aarch64/dl-trampoline.S (_dl_runtime_profile):
> 	Do not record unwind info for arguments -- this is unneeded;
> 	properly include the 16 bytes of PLT stack into the unwind;
> 	save and restore the structure return pointer, x8;
> 	save all of the AdvSIMD registers defined for the vector ABI.

This is OK to commit, thanks.

Reviewed-By: Szabolcs Nagy
diff mbox series

Patch

diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index e8e2af485a..67a7c1b207 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -101,7 +101,6 @@  _dl_runtime_resolve:
 #ifndef PROF
 	.globl _dl_runtime_profile
 	.type _dl_runtime_profile, #function
-	cfi_startproc
 	.align 2
 _dl_runtime_profile:
 	/* AArch64 we get called with:
@@ -111,15 +110,16 @@  _dl_runtime_profile:
 	   [sp, #0]	&PLTGOT[n]
 
 	   Stack frame layout:
-	   [sp,   #...] lr
-	   [sp,   #...] &PLTGOT[n]
-	   [sp,    #96] La_aarch64_regs
-	   [sp,    #48] La_aarch64_retval
-	   [sp,    #40] frame size return from pltenter
-	   [sp,    #32] dl_profile_call saved x1
-	   [sp,    #24] dl_profile_call saved x0
-	   [sp,    #16] t1
-	   [sp,     #0] x29, lr   <- x29
+	   [x29,  #...] lr
+	   [x29,  #...] &PLTGOT[n]
+	   [x29,   #96] La_aarch64_regs
+	   [x29,   #48] La_aarch64_retval
+	   [x29,   #40] frame size return from pltenter
+	   [x29,   #32] dl_profile_call saved x1
+	   [x29,   #24] dl_profile_call saved x0
+	   [x29,   #16] t1
+	   [x29,    #0] x29, x8
+	   [x29, #-128] full q[0-7] contents
 	 */
 
 # define OFFSET_T1		16
@@ -127,46 +127,39 @@  _dl_runtime_profile:
 # define OFFSET_FS		OFFSET_SAVED_CALL_X0 + 16
 # define OFFSET_RV		OFFSET_FS + 8
 # define OFFSET_RG		OFFSET_RV + DL_SIZEOF_RV
+# define OFFSET_SAVED_VEC	(-16 * 8)
 
-# define SF_SIZE		OFFSET_RG + DL_SIZEOF_RG
+# define SF_SIZE		(OFFSET_RG + DL_SIZEOF_RG)
 
 # define OFFSET_PLTGOTN		SF_SIZE
 # define OFFSET_LR		OFFSET_PLTGOTN + 8
 
-	/* Save arguments.  */
-	sub	sp, sp, #SF_SIZE
-	cfi_adjust_cfa_offset (SF_SIZE)
-	stp	x29, x30, [SP, #0]
-	mov	x29, sp
-	cfi_def_cfa_register (x29)
-	cfi_rel_offset (x29, 0)
+	cfi_startproc
+	cfi_adjust_cfa_offset(16)	/* Incorporate PLT */
 	cfi_rel_offset (lr, 8)
 
-	stp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
-	cfi_rel_offset (x0, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 0)
-	cfi_rel_offset (x1, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 8)
-	stp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
-	cfi_rel_offset (x2, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 0)
-	cfi_rel_offset (x3, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 8)
-	stp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
-	cfi_rel_offset (x4, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 0)
-	cfi_rel_offset (x5, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 8)
-	stp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0)
-	cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8)
+	stp	x29, x8, [SP, #-SF_SIZE]!
+	cfi_adjust_cfa_offset (SF_SIZE)
+	cfi_rel_offset (x29, 0)
+	mov	x29, sp
+	cfi_def_cfa_register (x29)
+	sub	sp, sp, #-OFFSET_SAVED_VEC
 
-	stp	d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0)
-	cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8)
-	stp	d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1]
-	cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0)
-	cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8)
-	stp	d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0)
-	cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8)
-	stp	d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
-	cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0)
-	cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8)
+	/* Save La_aarch64_regs.  */
+	stp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
+	stp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
+	stp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
+	stp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
+	stp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
+	stp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
+	stp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
+	stp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+
+	/* Re-save the full contents of the vector arguments.  */
+	stp	q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+	stp	q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+	stp	q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+	stp	q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
 
 	add     x0, x29, #SF_SIZE + 16
 	ldr	x1, [x29, #OFFSET_LR]
@@ -201,31 +194,28 @@  _dl_runtime_profile:
 	mov	ip0, x0
 
 	/* Get arguments and return address back.  */
+	ldr	lr, [x29, #OFFSET_LR]
 	ldp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldp	q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+	ldp	q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+	ldp	q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+	ldp	q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
 
-	cfi_def_cfa_register (sp)
-	ldp	x29, x30, [x29, #0]
-	cfi_restore(x29)
-	cfi_restore(x30)
-
-	add	sp, sp, SF_SIZE + 16
-	cfi_adjust_cfa_offset (- SF_SIZE - 16)
+	mov	sp, x29
+	ldp	x29, x8, [sp], SF_SIZE + 16
+	cfi_def_cfa (sp, 0)
+	cfi_restore (x29)
+	cfi_restore (lr)
 
 	/* Jump to the newly found address.  */
 	br	ip0
 
 	cfi_restore_state
-1:
-	/* The new frame size is in ip0.  */
-
-	sub	PTR_REG (1), PTR_REG (29), ip0l
+	/* The new frame size is in ip0, extended for pointer size.  */
+1:	sub	x1, sp, ip0
 	and	sp, x1, #0xfffffffffffffff0
 
 	str	x0, [x29, #OFFSET_T1]
@@ -237,42 +227,56 @@  _dl_runtime_profile:
 
 	ldr	ip0, [x29, #OFFSET_T1]
 
-	/* Call the function.  */
+	/* Load the original arguments.  */
 	ldp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldr	x8, [x29, 8]
+	ldp	q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+	ldp	q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+	ldp	q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+	ldp	q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
+
+	/* Call the function.  */
 	blr	ip0
+
+	/* Save La_aarch64_retval.  */
 	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
 	stp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
 	stp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
 
+	/* Re-save the full contents of the vector return.  */
+	stp	q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+	stp	q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+	stp	q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+	stp	q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
+
 	/* Setup call to pltexit  */
 	ldp	x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
 	add	x2, x29, #OFFSET_RG
 	add	x3, x29, #OFFSET_RV
 	bl	_dl_call_pltexit
 
+	/* Restore the full return value.  */
 	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
-	ldp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	ldp	q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+	ldp	q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+	ldp	q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+	ldp	q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
+
 	/* LR from within La_aarch64_reg */
 	ldr	lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
-	cfi_restore(lr)
 	mov	sp, x29
 	cfi_def_cfa_register (sp)
 	ldr	x29, [x29, #0]
-	cfi_restore(x29)
 	add	sp, sp, SF_SIZE + 16
 	cfi_adjust_cfa_offset (- SF_SIZE - 16)
+	cfi_restore(x29)
+	cfi_restore(lr)
 
 	br	lr
 
 	cfi_endproc
 	.size _dl_runtime_profile, .-_dl_runtime_profile
 #endif
-	.previous