Message ID | 20240607081444.4054046-1-mengqinggang@loongson.cn |
---|---|
State | New |
Headers | show |
Series | [v1] LoongArch: Add cfi pseudo instructions to _dl_tlsdes_dynamic | expand |
On 2024-06-07 16:14, mengqinggang wrote: > Add cfi pseudo instructions to _dl_tlsdes_dynamic. But it only works > on LASX situation. I don't know how to write cfi pseudo instructions for > LSX part and Float part. The problem is that cfi instructions are always > executed in stack unwinding, resulting in incorrect CFA address. > > Save FCSR to 8-byte aligned address. > --- > sysdeps/loongarch/dl-tlsdesc.S | 46 ++++++++++++++++++++++++++++++---- > 1 file changed, 41 insertions(+), 5 deletions(-) > > diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S > index 15d5fa1c42..35f028c53e 100644 > --- a/sysdeps/loongarch/dl-tlsdesc.S > +++ b/sysdeps/loongarch/dl-tlsdesc.S > @@ -81,7 +81,7 @@ _dl_tlsdesc_undefweak: > _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > { > struct tlsdesc_dynamic_arg *td = tdp->arg; > - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - TCBHEAD_DTV); > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); > if (__glibc_likely (td->gen_count <= dtv[0].counter > && (dtv[td->tlsinfo.ti_module].pointer.val > != TLS_DTV_UNALLOCATED), > @@ -101,9 +101,13 @@ _dl_tlsdesc_dynamic: > /* Save just enough registers to support fast path, if we fall > into slow path we will save additional registers. */ > ADDI sp, sp, -24 > + cfi_adjust_cfa_offset (24) As [1] point out that the stack should be 16bytes aligment, I think it may need some fix. [1] https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#the-stack Thanks, Jinyang > REG_S t0, sp, 0 > REG_S t1, sp, 8 > REG_S t2, sp, 16 > + cfi_rel_offset (12, 0) > + cfi_rel_offset (13, 8) > + cfi_rel_offset (14, 16) > > /* Runtime Storage Layout of Thread-Local Storage > TP point to the start of TLS block. > @@ -133,6 +137,7 @@ Hign address dynamic_block1 <----- dtv5 */ > goto slow path. */ > beq t1, t2, .Lslow > > + cfi_remember_state > REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ > /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ > add.d a0, t1, t2 > @@ -142,6 +147,7 @@ Hign address dynamic_block1 <----- dtv5 */ > REG_L t1, sp, 8 > REG_L t2, sp, 16 > ADDI sp, sp, 24 > + cfi_adjust_cfa_offset (-24) > RET > > .Lslow: > @@ -150,7 +156,9 @@ Hign address dynamic_block1 <----- dtv5 */ > callee will trash. */ > > /* Save the remaining registers that we must treat as caller save. */ > + cfi_restore_state > ADDI sp, sp, -FRAME_SIZE > + cfi_adjust_cfa_offset (FRAME_SIZE) > REG_S ra, sp, 0 * SZREG > REG_S a1, sp, 1 * SZREG > REG_S a2, sp, 2 * SZREG > @@ -165,15 +173,30 @@ Hign address dynamic_block1 <----- dtv5 */ > REG_S t6, sp, 11 * SZREG > REG_S t7, sp, 12 * SZREG > REG_S t8, sp, 13 * SZREG > + cfi_rel_offset (1, 0 * SZREG) > + cfi_rel_offset (5, 1 * SZREG) > + cfi_rel_offset (6, 2 * SZREG) > + cfi_rel_offset (7, 3 * SZREG) > + cfi_rel_offset (8, 4 * SZREG) > + cfi_rel_offset (9, 5 * SZREG) > + cfi_rel_offset (10, 6 * SZREG) > + cfi_rel_offset (11, 7 * SZREG) > + cfi_rel_offset (15, 8 * SZREG) > + cfi_rel_offset (16, 9 * SZREG) > + cfi_rel_offset (17, 10 * SZREG) > + cfi_rel_offset (18, 11 * SZREG) > + cfi_rel_offset (19, 12 * SZREG) > + cfi_rel_offset (20, 13 * SZREG) > > #ifndef __loongarch_soft_float > > /* Save fcsr0 register. > Only one physical fcsr0 register, fcsr1-fcsr3 are aliases > of some fields in fcsr0. */ > - ADDI sp, sp, -SZFCSREG > + ADDI sp, sp, -SZREG > + cfi_adjust_cfa_offset (SZREG) > movfcsr2gr t0, fcsr0 > - st.w t0, sp, 0 > + st.d t0, sp, 0 > > /* Whether support LASX. */ > la.global t0, _rtld_global_ro > @@ -184,6 +207,7 @@ Hign address dynamic_block1 <----- dtv5 */ > /* Save 256-bit vector registers. > FIXME: Without vector ABI, save all vector registers. */ > ADDI sp, sp, -FRAME_SIZE_LASX > + cfi_adjust_cfa_offset (FRAME_SIZE_LASX) > xvst xr0, sp, 0*SZXREG > xvst xr1, sp, 1*SZXREG > xvst xr2, sp, 2*SZXREG > @@ -225,6 +249,8 @@ Hign address dynamic_block1 <----- dtv5 */ > > /* Save 128-bit vector registers. */ > ADDI sp, sp, -FRAME_SIZE_LSX > + # FIXME: cfi > + # cfi_adjust_cfa_offset (FRAME_SIZE_LSX) > vst vr0, sp, 0*SZVREG > vst vr1, sp, 1*SZVREG > vst vr2, sp, 2*SZVREG > @@ -262,6 +288,8 @@ Hign address dynamic_block1 <----- dtv5 */ > .Lfloat: > /* Save float registers. */ > ADDI sp, sp, -FRAME_SIZE_FLOAT > + # FIXME: cfi > + # cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT) > FREG_S fa0, sp, 0*SZFREG > FREG_S fa1, sp, 1*SZFREG > FREG_S fa2, sp, 2*SZFREG > @@ -334,6 +362,7 @@ Hign address dynamic_block1 <----- dtv5 */ > xvld xr30, sp, 30*SZXREG > xvld xr31, sp, 31*SZXREG > ADDI sp, sp, FRAME_SIZE_LASX > + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX) > b .Lfcsr > > .Llsx1: > @@ -374,6 +403,8 @@ Hign address dynamic_block1 <----- dtv5 */ > vld vr30, sp, 30*SZVREG > vld vr31, sp, 31*SZVREG > ADDI sp, sp, FRAME_SIZE_LSX > + # FIXME: cfi > + # cfi_adjust_cfa_offset (-FRAME_SIZE_LSX) > b .Lfcsr > > .Lfloat1: > @@ -403,12 +434,15 @@ Hign address dynamic_block1 <----- dtv5 */ > FREG_L ft14, sp, 22*SZFREG > FREG_L ft15, sp, 23*SZFREG > ADDI sp, sp, FRAME_SIZE_FLOAT > + # FIXME: cfi > + # cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT) > > .Lfcsr: > /* Restore fcsr0 register. */ > - ld.w t0, sp, 0 > + ld.d t0, sp, 0 > movgr2fcsr fcsr0, t0 > - ADDI sp, sp, SZFCSREG > + ADDI sp, sp, SZREG > + cfi_adjust_cfa_offset (-SZREG) > > #endif /* #ifndef __loongarch_soft_float */ > > @@ -427,6 +461,8 @@ Hign address dynamic_block1 <----- dtv5 */ > REG_L t7, sp, 12 * SZREG > REG_L t8, sp, 13 * SZREG > ADDI sp, sp, FRAME_SIZE > + cfi_adjust_cfa_offset (-FRAME_SIZE) > + cfi_restore (1) > > b .Lret > cfi_endproc
On Wed, 2024-06-12 at 18:02 +0800, Jinyang He wrote: > On 2024-06-07 16:14, mengqinggang wrote: > > ADDI sp, sp, -24 > > + cfi_adjust_cfa_offset (24) > As [1] point out that the stack should be 16bytes aligment, I think it > may need some fix. > [1] > https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#the-stack https://sourceware.org/pipermail/libc-alpha/2024-June/157444.html
diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S index 15d5fa1c42..35f028c53e 100644 --- a/sysdeps/loongarch/dl-tlsdesc.S +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -81,7 +81,7 @@ _dl_tlsdesc_undefweak: _dl_tlsdesc_dynamic (struct tlsdesc *tdp) { struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - TCBHEAD_DTV); + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); if (__glibc_likely (td->gen_count <= dtv[0].counter && (dtv[td->tlsinfo.ti_module].pointer.val != TLS_DTV_UNALLOCATED), @@ -101,9 +101,13 @@ _dl_tlsdesc_dynamic: /* Save just enough registers to support fast path, if we fall into slow path we will save additional registers. */ ADDI sp, sp, -24 + cfi_adjust_cfa_offset (24) REG_S t0, sp, 0 REG_S t1, sp, 8 REG_S t2, sp, 16 + cfi_rel_offset (12, 0) + cfi_rel_offset (13, 8) + cfi_rel_offset (14, 16) /* Runtime Storage Layout of Thread-Local Storage TP point to the start of TLS block. @@ -133,6 +137,7 @@ Hign address dynamic_block1 <----- dtv5 */ goto slow path. */ beq t1, t2, .Lslow + cfi_remember_state REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ add.d a0, t1, t2 @@ -142,6 +147,7 @@ Hign address dynamic_block1 <----- dtv5 */ REG_L t1, sp, 8 REG_L t2, sp, 16 ADDI sp, sp, 24 + cfi_adjust_cfa_offset (-24) RET .Lslow: @@ -150,7 +156,9 @@ Hign address dynamic_block1 <----- dtv5 */ callee will trash. */ /* Save the remaining registers that we must treat as caller save. */ + cfi_restore_state ADDI sp, sp, -FRAME_SIZE + cfi_adjust_cfa_offset (FRAME_SIZE) REG_S ra, sp, 0 * SZREG REG_S a1, sp, 1 * SZREG REG_S a2, sp, 2 * SZREG @@ -165,15 +173,30 @@ Hign address dynamic_block1 <----- dtv5 */ REG_S t6, sp, 11 * SZREG REG_S t7, sp, 12 * SZREG REG_S t8, sp, 13 * SZREG + cfi_rel_offset (1, 0 * SZREG) + cfi_rel_offset (5, 1 * SZREG) + cfi_rel_offset (6, 2 * SZREG) + cfi_rel_offset (7, 3 * SZREG) + cfi_rel_offset (8, 4 * SZREG) + cfi_rel_offset (9, 5 * SZREG) + cfi_rel_offset (10, 6 * SZREG) + cfi_rel_offset (11, 7 * SZREG) + cfi_rel_offset (15, 8 * SZREG) + cfi_rel_offset (16, 9 * SZREG) + cfi_rel_offset (17, 10 * SZREG) + cfi_rel_offset (18, 11 * SZREG) + cfi_rel_offset (19, 12 * SZREG) + cfi_rel_offset (20, 13 * SZREG) #ifndef __loongarch_soft_float /* Save fcsr0 register. Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of some fields in fcsr0. */ - ADDI sp, sp, -SZFCSREG + ADDI sp, sp, -SZREG + cfi_adjust_cfa_offset (SZREG) movfcsr2gr t0, fcsr0 - st.w t0, sp, 0 + st.d t0, sp, 0 /* Whether support LASX. */ la.global t0, _rtld_global_ro @@ -184,6 +207,7 @@ Hign address dynamic_block1 <----- dtv5 */ /* Save 256-bit vector registers. FIXME: Without vector ABI, save all vector registers. */ ADDI sp, sp, -FRAME_SIZE_LASX + cfi_adjust_cfa_offset (FRAME_SIZE_LASX) xvst xr0, sp, 0*SZXREG xvst xr1, sp, 1*SZXREG xvst xr2, sp, 2*SZXREG @@ -225,6 +249,8 @@ Hign address dynamic_block1 <----- dtv5 */ /* Save 128-bit vector registers. */ ADDI sp, sp, -FRAME_SIZE_LSX + # FIXME: cfi + # cfi_adjust_cfa_offset (FRAME_SIZE_LSX) vst vr0, sp, 0*SZVREG vst vr1, sp, 1*SZVREG vst vr2, sp, 2*SZVREG @@ -262,6 +288,8 @@ Hign address dynamic_block1 <----- dtv5 */ .Lfloat: /* Save float registers. */ ADDI sp, sp, -FRAME_SIZE_FLOAT + # FIXME: cfi + # cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT) FREG_S fa0, sp, 0*SZFREG FREG_S fa1, sp, 1*SZFREG FREG_S fa2, sp, 2*SZFREG @@ -334,6 +362,7 @@ Hign address dynamic_block1 <----- dtv5 */ xvld xr30, sp, 30*SZXREG xvld xr31, sp, 31*SZXREG ADDI sp, sp, FRAME_SIZE_LASX + cfi_adjust_cfa_offset (-FRAME_SIZE_LASX) b .Lfcsr .Llsx1: @@ -374,6 +403,8 @@ Hign address dynamic_block1 <----- dtv5 */ vld vr30, sp, 30*SZVREG vld vr31, sp, 31*SZVREG ADDI sp, sp, FRAME_SIZE_LSX + # FIXME: cfi + # cfi_adjust_cfa_offset (-FRAME_SIZE_LSX) b .Lfcsr .Lfloat1: @@ -403,12 +434,15 @@ Hign address dynamic_block1 <----- dtv5 */ FREG_L ft14, sp, 22*SZFREG FREG_L ft15, sp, 23*SZFREG ADDI sp, sp, FRAME_SIZE_FLOAT + # FIXME: cfi + # cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT) .Lfcsr: /* Restore fcsr0 register. */ - ld.w t0, sp, 0 + ld.d t0, sp, 0 movgr2fcsr fcsr0, t0 - ADDI sp, sp, SZFCSREG + ADDI sp, sp, SZREG + cfi_adjust_cfa_offset (-SZREG) #endif /* #ifndef __loongarch_soft_float */ @@ -427,6 +461,8 @@ Hign address dynamic_block1 <----- dtv5 */ REG_L t7, sp, 12 * SZREG REG_L t8, sp, 13 * SZREG ADDI sp, sp, FRAME_SIZE + cfi_adjust_cfa_offset (-FRAME_SIZE) + cfi_restore (1) b .Lret cfi_endproc