@@ -101,7 +101,6 @@ _dl_runtime_resolve:
#ifndef PROF
.globl _dl_runtime_profile
.type _dl_runtime_profile, #function
- cfi_startproc
.align 2
_dl_runtime_profile:
/* AArch64 we get called with:
@@ -111,15 +110,16 @@ _dl_runtime_profile:
[sp, #0] &PLTGOT[n]
Stack frame layout:
- [sp, #...] lr
- [sp, #...] &PLTGOT[n]
- [sp, #96] La_aarch64_regs
- [sp, #48] La_aarch64_retval
- [sp, #40] frame size return from pltenter
- [sp, #32] dl_profile_call saved x1
- [sp, #24] dl_profile_call saved x0
- [sp, #16] t1
- [sp, #0] x29, lr <- x29
+ [x29, #...] lr
+ [x29, #...] &PLTGOT[n]
+ [x29, #96] La_aarch64_regs
+ [x29, #48] La_aarch64_retval
+ [x29, #40] frame size return from pltenter
+ [x29, #32] dl_profile_call saved x1
+ [x29, #24] dl_profile_call saved x0
+ [x29, #16] t1
+ [x29, #0] x29, x8
+ [x29, #-128] full q[0-7] contents
*/
# define OFFSET_T1 16
@@ -127,46 +127,39 @@ _dl_runtime_profile:
# define OFFSET_FS OFFSET_SAVED_CALL_X0 + 16
# define OFFSET_RV OFFSET_FS + 8
# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV
+# define OFFSET_SAVED_VEC (-16 * 8)
-# define SF_SIZE OFFSET_RG + DL_SIZEOF_RG
+# define SF_SIZE (OFFSET_RG + DL_SIZEOF_RG)
# define OFFSET_PLTGOTN SF_SIZE
# define OFFSET_LR OFFSET_PLTGOTN + 8
- /* Save arguments. */
- sub sp, sp, #SF_SIZE
- cfi_adjust_cfa_offset (SF_SIZE)
- stp x29, x30, [SP, #0]
- mov x29, sp
- cfi_def_cfa_register (x29)
- cfi_rel_offset (x29, 0)
+ cfi_startproc
+ cfi_adjust_cfa_offset(16) /* Incorporate PLT */
cfi_rel_offset (lr, 8)
- stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
- cfi_rel_offset (x0, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 0)
- cfi_rel_offset (x1, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 8)
- stp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
- cfi_rel_offset (x2, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 0)
- cfi_rel_offset (x3, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 8)
- stp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
- cfi_rel_offset (x4, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 0)
- cfi_rel_offset (x5, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 8)
- stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
- cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0)
- cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8)
+ stp x29, x8, [SP, #-SF_SIZE]!
+ cfi_adjust_cfa_offset (SF_SIZE)
+ cfi_rel_offset (x29, 0)
+ mov x29, sp
+ cfi_def_cfa_register (x29)
+ sub sp, sp, #-OFFSET_SAVED_VEC
- stp d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
- cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0)
- cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8)
- stp d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1]
- cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0)
- cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8)
- stp d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
- cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0)
- cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8)
- stp d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
- cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0)
- cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8)
+ /* Save La_aarch64_regs. */
+ stp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
+ stp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
+ stp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
+ stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
+ stp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
+ stp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
+ stp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
+ stp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+
+ /* Re-save the full contents of the vector arguments. */
+ stp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+ stp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+ stp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+ stp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
add x0, x29, #SF_SIZE + 16
ldr x1, [x29, #OFFSET_LR]
@@ -201,31 +194,28 @@ _dl_runtime_profile:
mov ip0, x0
/* Get arguments and return address back. */
+ ldr lr, [x29, #OFFSET_LR]
ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+ ldp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+ ldp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+ ldp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+ ldp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
- cfi_def_cfa_register (sp)
- ldp x29, x30, [x29, #0]
- cfi_restore(x29)
- cfi_restore(x30)
-
- add sp, sp, SF_SIZE + 16
- cfi_adjust_cfa_offset (- SF_SIZE - 16)
+ mov sp, x29
+ ldp x29, x8, [sp], SF_SIZE + 16
+ cfi_def_cfa (sp, 0)
+ cfi_restore (x29)
+ cfi_restore (lr)
/* Jump to the newly found address. */
br ip0
cfi_restore_state
-1:
- /* The new frame size is in ip0. */
-
- sub PTR_REG (1), PTR_REG (29), ip0l
+ /* The new frame size is in ip0, extended for pointer size. */
+1: sub x1, sp, ip0
and sp, x1, #0xfffffffffffffff0
str x0, [x29, #OFFSET_T1]
@@ -237,42 +227,56 @@ _dl_runtime_profile:
ldr ip0, [x29, #OFFSET_T1]
- /* Call the function. */
+ /* Load the original arguments. */
ldp x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+ ldr x8, [x29, 8]
+ ldp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+ ldp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+ ldp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+ ldp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
+
+ /* Call the function. */
blr ip0
+
+ /* Save La_aarch64_retval. */
stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
stp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
stp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+ /* Re-save the full contents of the vector return. */
+ stp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+ stp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+ stp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+ stp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
+
/* Setup call to pltexit */
ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
add x2, x29, #OFFSET_RG
add x3, x29, #OFFSET_RV
bl _dl_call_pltexit
+ /* Restore the full return value. */
ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
- ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
- ldp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+ ldp q0, q1, [x29, #OFFSET_SAVED_VEC + 16*0]
+ ldp q2, q3, [x29, #OFFSET_SAVED_VEC + 16*2]
+ ldp q4, q5, [x29, #OFFSET_SAVED_VEC + 16*4]
+ ldp q6, q7, [x29, #OFFSET_SAVED_VEC + 16*6]
+
/* LR from within La_aarch64_reg */
ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
- cfi_restore(lr)
mov sp, x29
cfi_def_cfa_register (sp)
ldr x29, [x29, #0]
- cfi_restore(x29)
add sp, sp, SF_SIZE + 16
cfi_adjust_cfa_offset (- SF_SIZE - 16)
+ cfi_restore(x29)
+ cfi_restore(lr)
br lr
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
#endif
- .previous
From: Richard Henderson <richard.henderson@linaro.org> Not adjusting La_aarch64_regs or La_aarch64_retval for the new AdvSIMD vector ABI; that will require more thought and coordination. In the meantime, this will at least pass the proper values to each callee, even if the values are not visible to auditing. * sysdeps/aarch64/dl-trampoline.S (_dl_runtime_profile): Do not record unwind info for arguments -- this is unneeded; properly include the 16 bytes of PLT stack into the unwind; save and restore the structure return pointer, x8; save all of the AdvSIMD registers defined for the vector ABI. --- sysdeps/aarch64/dl-trampoline.S | 138 ++++++++++++++++---------------- 1 file changed, 71 insertions(+), 67 deletions(-)