Message ID | 20240717132556.364360-1-dilfridge@gentoo.org |
---|---|
State | New |
Headers | show |
Series | Revert "LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic" | expand |
Sorry. Here is the v3 version of the patch. See: https://sourceware.org/pipermail/libc-alpha/2024-July/158025.html I have tested and reviewed this patch. And I plan to merge it after the 2.40 release. 在 2024/7/17 下午9:25, Andreas K. Hüttel 写道: > We're in freeze for the 2.40 release. > > This reverts commit 43224b1379d60b1ad98d29ef3d7905d55f828a9f. > > Signed-off-by: Andreas K. Hüttel<dilfridge@gentoo.org> > --- > sysdeps/loongarch/dl-machine.h | 7 - > sysdeps/loongarch/dl-tlsdesc-dynamic.h | 225 -------------- > sysdeps/loongarch/dl-tlsdesc.S | 386 +++++++++++++++++++++++-- > sysdeps/loongarch/dl-tlsdesc.h | 4 - > sysdeps/loongarch/tlsdesc.sym | 9 + > 5 files changed, 373 insertions(+), 258 deletions(-) > delete mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h > > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index a15d8e0ab6..ab6f1da7c0 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -223,13 +223,6 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], > { > td->arg = _dl_make_tlsdesc_dynamic (sym_map, > sym->st_value + reloc->r_addend); > -# ifndef __loongarch_soft_float > - if (RTLD_SUPPORT_LASX) > - td->entry = _dl_tlsdesc_dynamic_lasx; > - else if (RTLD_SUPPORT_LSX) > - td->entry = _dl_tlsdesc_dynamic_lsx; > - else > -# endif > td->entry = _dl_tlsdesc_dynamic; > } > else > diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > deleted file mode 100644 > index d10f4a8800..0000000000 > --- a/sysdeps/loongarch/dl-tlsdesc-dynamic.h > +++ /dev/null > @@ -1,225 +0,0 @@ > -/* Thread-local storage handling in the ELF dynamic linker. > - LoongArch version. > - Copyright (C) 2024 Free Software Foundation, Inc. > - > - This file is part of the GNU C Library. > - > - The GNU C Library is free software; you can redistribute it and/or > - modify it under the terms of the GNU Lesser General Public > - License as published by the Free Software Foundation; either > - version 2.1 of the License, or (at your option) any later version. > - > - The GNU C Library is distributed in the hope that it will be useful, > - but WITHOUT ANY WARRANTY; without even the implied warranty of > - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > - Lesser General Public License for more details. > - > - You should have received a copy of the GNU Lesser General Public > - License along with the GNU C Library; if not, see > -<https://www.gnu.org/licenses/>. */ > - > -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) > -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) > -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) > -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) > - > - /* Handler for dynamic TLS symbols. > - Prototype: > - _dl_tlsdesc_dynamic (tlsdesc *) ; > - > - The second word of the descriptor points to a > - tlsdesc_dynamic_arg structure. > - > - Returns the offset between the thread pointer and the > - object referenced by the argument. > - > - ptrdiff_t > - _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > - { > - struct tlsdesc_dynamic_arg *td = tdp->arg; > - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); > - if (__glibc_likely (td->gen_count <= dtv[0].counter > - && (dtv[td->tlsinfo.ti_module].pointer.val > - != TLS_DTV_UNALLOCATED), > - 1)) > - return dtv[td->tlsinfo.ti_module].pointer.val > - + td->tlsinfo.ti_offset > - - __thread_pointer; > - > - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > - } */ > - .hidden _dl_tlsdesc_dynamic > - .global _dl_tlsdesc_dynamic > - .type _dl_tlsdesc_dynamic,%function > - cfi_startproc > - .align 2 > -_dl_tlsdesc_dynamic: > - /* Save just enough registers to support fast path, if we fall > - into slow path we will save additional registers. */ > - ADDI sp, sp, -32 > - cfi_adjust_cfa_offset (32) > - REG_S t0, sp, 0 > - REG_S t1, sp, 8 > - REG_S t2, sp, 16 > - cfi_rel_offset (12, 0) > - cfi_rel_offset (13, 8) > - cfi_rel_offset (14, 16) > - > -/* Runtime Storage Layout of Thread-Local Storage > - TP point to the start of TLS block. > - > - dtv > -Low address TCB ----------------> dtv0(counter) > - TP --> static_block0 <----- dtv1 > - static_block1 <----- dtv2 > - static_block2 <----- dtv3 > - dynamic_block0 <----- dtv4 > -Hign address dynamic_block1 <----- dtv5 */ > - > - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ > - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ > - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ > - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ > - /* If dtv[0].counter < td->gen_count, goto slow path. */ > - bltu t2, t1, .Lslow > - > - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ > - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ > - slli.d t1, t1, 4 > - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ > - REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ > - li.d t2, TLS_DTV_UNALLOCATED > - /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, > - goto slow path. */ > - beq t1, t2, .Lslow > - > - cfi_remember_state > - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ > - /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ > - add.d a0, t1, t2 > -.Lret: > - sub.d a0, a0, tp > - REG_L t0, sp, 0 > - REG_L t1, sp, 8 > - REG_L t2, sp, 16 > - ADDI sp, sp, 32 > - cfi_adjust_cfa_offset (-32) > - RET > - > -.Lslow: > - /* This is the slow path. We need to call __tls_get_addr() which > - means we need to save and restore all the register that the > - callee will trash. */ > - > - /* Save the remaining registers that we must treat as caller save. */ > - cfi_restore_state > - ADDI sp, sp, -FRAME_SIZE > - cfi_adjust_cfa_offset (FRAME_SIZE) > - REG_S ra, sp, 0 * SZREG > - REG_S a1, sp, 1 * SZREG > - REG_S a2, sp, 2 * SZREG > - REG_S a3, sp, 3 * SZREG > - REG_S a4, sp, 4 * SZREG > - REG_S a5, sp, 5 * SZREG > - REG_S a6, sp, 6 * SZREG > - REG_S a7, sp, 7 * SZREG > - REG_S t3, sp, 8 * SZREG > - REG_S t4, sp, 9 * SZREG > - REG_S t5, sp, 10 * SZREG > - REG_S t6, sp, 11 * SZREG > - REG_S t7, sp, 12 * SZREG > - REG_S t8, sp, 13 * SZREG > - cfi_rel_offset (1, 0 * SZREG) > - cfi_rel_offset (5, 1 * SZREG) > - cfi_rel_offset (6, 2 * SZREG) > - cfi_rel_offset (7, 3 * SZREG) > - cfi_rel_offset (8, 4 * SZREG) > - cfi_rel_offset (9, 5 * SZREG) > - cfi_rel_offset (10, 6 * SZREG) > - cfi_rel_offset (11, 7 * SZREG) > - cfi_rel_offset (15, 8 * SZREG) > - cfi_rel_offset (16, 9 * SZREG) > - cfi_rel_offset (17, 10 * SZREG) > - cfi_rel_offset (18, 11 * SZREG) > - cfi_rel_offset (19, 12 * SZREG) > - cfi_rel_offset (20, 13 * SZREG) > - > -#ifndef __loongarch_soft_float > - > - /* Save fcsr0 register. > - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases > - of some fields in fcsr0. */ > - movfcsr2gr t0, fcsr0 > - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */ > - > -#ifdef USE_LASX > - #define V_REG_S xvst > - #define V_REG_L xvld > - #define V_SPACE FRAME_SIZE_LASX > - #define V_REG(n) $xr##n > - #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ > - 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 > - #define V_REGSZ SZXREG > -#elif defined USE_LSX > - #define V_REG_S vst > - #define V_REG_L vld > - #define V_SPACE FRAME_SIZE_LSX > - #define V_REG(n) $vr##n > - #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ > - 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 > - #define V_REGSZ SZVREG > -#else > - #define V_REG_S fst.d > - #define V_REG_L fld.d > - #define V_SPACE FRAME_SIZE_FLOAT > - #define V_REG(n) $f##n > - #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 > - #define V_REGSZ SZFREG > -#endif > - > - ADDI sp, sp, -V_SPACE > - cfi_adjust_cfa_offset (V_SPACE) > - .irp i,V_REGS > - V_REG_S V_REG(\i), sp, \i * V_REGSZ > - .endr > - > -#endif /* #ifndef __loongarch_soft_float */ > - > - bl HIDDEN_JUMPTARGET(__tls_get_addr) > - ADDI a0, a0, -TLS_DTV_OFFSET > - > -#ifndef __loongarch_soft_float > - > - .irp i,V_REGS > - V_REG_L V_REG(\i), sp, \i * V_REGSZ > - .endr > - ADDI sp, sp, V_SPACE > - cfi_adjust_cfa_offset (-V_SPACE) > - > - /* Restore fcsr0 register. */ > - ld.w t0, sp, FRAME_SIZE + 24 > - movgr2fcsr fcsr0, t0 > - > -#endif /* #ifndef __loongarch_soft_float */ > - > - REG_L ra, sp, 0 * SZREG > - REG_L a1, sp, 1 * SZREG > - REG_L a2, sp, 2 * SZREG > - REG_L a3, sp, 3 * SZREG > - REG_L a4, sp, 4 * SZREG > - REG_L a5, sp, 5 * SZREG > - REG_L a6, sp, 6 * SZREG > - REG_L a7, sp, 7 * SZREG > - REG_L t3, sp, 8 * SZREG > - REG_L t4, sp, 9 * SZREG > - REG_L t5, sp, 10 * SZREG > - REG_L t6, sp, 11 * SZREG > - REG_L t7, sp, 12 * SZREG > - REG_L t8, sp, 13 * SZREG > - ADDI sp, sp, FRAME_SIZE > - cfi_adjust_cfa_offset (-FRAME_SIZE) > - > - b .Lret > - cfi_endproc > - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > - .hidden HIDDEN_JUMPTARGET(__tls_get_addr) > diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S > index b6cfd6121d..a6627cc754 100644 > --- a/sysdeps/loongarch/dl-tlsdesc.S > +++ b/sysdeps/loongarch/dl-tlsdesc.S > @@ -59,34 +59,376 @@ _dl_tlsdesc_undefweak: > cfi_endproc > .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak > > + > #ifdef SHARED > > +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) > +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) > +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) > +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) > + > + /* Handler for dynamic TLS symbols. > + Prototype: > + _dl_tlsdesc_dynamic (tlsdesc *) ; > + > + The second word of the descriptor points to a > + tlsdesc_dynamic_arg structure. > + > + Returns the offset between the thread pointer and the > + object referenced by the argument. > + > + ptrdiff_t > + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > + { > + struct tlsdesc_dynamic_arg *td = tdp->arg; > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); > + if (__glibc_likely (td->gen_count <= dtv[0].counter > + && (dtv[td->tlsinfo.ti_module].pointer.val > + != TLS_DTV_UNALLOCATED), > + 1)) > + return dtv[td->tlsinfo.ti_module].pointer.val > + + td->tlsinfo.ti_offset > + - __thread_pointer; > + > + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > + } */ > + .hidden _dl_tlsdesc_dynamic > + .global _dl_tlsdesc_dynamic > + .type _dl_tlsdesc_dynamic,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_dynamic: > + /* Save just enough registers to support fast path, if we fall > + into slow path we will save additional registers. */ > + ADDI sp, sp, -32 > + REG_S t0, sp, 0 > + REG_S t1, sp, 8 > + REG_S t2, sp, 16 > + > +/* Runtime Storage Layout of Thread-Local Storage > + TP point to the start of TLS block. > + > + dtv > +Low address TCB ----------------> dtv0(counter) > + TP --> static_block0 <----- dtv1 > + static_block1 <----- dtv2 > + static_block2 <----- dtv3 > + dynamic_block0 <----- dtv4 > +Hign address dynamic_block1 <----- dtv5 */ > + > + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ > + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ > + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ > + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ > + /* If dtv[0].counter < td->gen_count, goto slow path. */ > + bltu t2, t1, .Lslow > + > + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ > + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ > + slli.d t1, t1, 4 > + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ > + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ > + li.d t2, TLS_DTV_UNALLOCATED > + /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, > + goto slow path. */ > + beq t1, t2, .Lslow > + > + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ > + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ > + add.d a0, t1, t2 > +.Lret: > + sub.d a0, a0, tp > + REG_L t0, sp, 0 > + REG_L t1, sp, 8 > + REG_L t2, sp, 16 > + ADDI sp, sp, 32 > + RET > + > +.Lslow: > + /* This is the slow path. We need to call __tls_get_addr() which > + means we need to save and restore all the register that the > + callee will trash. */ > + > + /* Save the remaining registers that we must treat as caller save. */ > + ADDI sp, sp, -FRAME_SIZE > + REG_S ra, sp, 0 * SZREG > + REG_S a1, sp, 1 * SZREG > + REG_S a2, sp, 2 * SZREG > + REG_S a3, sp, 3 * SZREG > + REG_S a4, sp, 4 * SZREG > + REG_S a5, sp, 5 * SZREG > + REG_S a6, sp, 6 * SZREG > + REG_S a7, sp, 7 * SZREG > + REG_S t3, sp, 8 * SZREG > + REG_S t4, sp, 9 * SZREG > + REG_S t5, sp, 10 * SZREG > + REG_S t6, sp, 11 * SZREG > + REG_S t7, sp, 12 * SZREG > + REG_S t8, sp, 13 * SZREG > + > #ifndef __loongarch_soft_float > > -#define USE_LASX > -#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx > -#define Lret Lret_lasx > -#define Lslow Lslow_lasx > -#include "dl-tlsdesc-dynamic.h" > -#undef FRAME_SIZE > -#undef USE_LASX > -#undef _dl_tlsdesc_dynamic > -#undef Lret > -#undef Lslow > - > -#define USE_LSX > -#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx > -#define Lret Lret_lsx > -#define Lslow Lslow_lsx > -#include "dl-tlsdesc-dynamic.h" > -#undef FRAME_SIZE > -#undef USE_LSX > -#undef _dl_tlsdesc_dynamic > -#undef Lret > -#undef Lslow > + /* Save fcsr0 register. > + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases > + of some fields in fcsr0. */ > + movfcsr2gr t0, fcsr0 > + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */ > + > + /* Whether support LASX. */ > + la.global t0, _rtld_global_ro > + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET > + andi t1, t0, HWCAP_LOONGARCH_LASX > + beqz t1, .Llsx > + > + /* Save 256-bit vector registers. > + FIXME: Without vector ABI, save all vector registers. */ > + ADDI sp, sp, -FRAME_SIZE_LASX > + xvst xr0, sp, 0*SZXREG > + xvst xr1, sp, 1*SZXREG > + xvst xr2, sp, 2*SZXREG > + xvst xr3, sp, 3*SZXREG > + xvst xr4, sp, 4*SZXREG > + xvst xr5, sp, 5*SZXREG > + xvst xr6, sp, 6*SZXREG > + xvst xr7, sp, 7*SZXREG > + xvst xr8, sp, 8*SZXREG > + xvst xr9, sp, 9*SZXREG > + xvst xr10, sp, 10*SZXREG > + xvst xr11, sp, 11*SZXREG > + xvst xr12, sp, 12*SZXREG > + xvst xr13, sp, 13*SZXREG > + xvst xr14, sp, 14*SZXREG > + xvst xr15, sp, 15*SZXREG > + xvst xr16, sp, 16*SZXREG > + xvst xr17, sp, 17*SZXREG > + xvst xr18, sp, 18*SZXREG > + xvst xr19, sp, 19*SZXREG > + xvst xr20, sp, 20*SZXREG > + xvst xr21, sp, 21*SZXREG > + xvst xr22, sp, 22*SZXREG > + xvst xr23, sp, 23*SZXREG > + xvst xr24, sp, 24*SZXREG > + xvst xr25, sp, 25*SZXREG > + xvst xr26, sp, 26*SZXREG > + xvst xr27, sp, 27*SZXREG > + xvst xr28, sp, 28*SZXREG > + xvst xr29, sp, 29*SZXREG > + xvst xr30, sp, 30*SZXREG > + xvst xr31, sp, 31*SZXREG > + b .Ltga > + > +.Llsx: > + /* Whether support LSX. */ > + andi t1, t0, HWCAP_LOONGARCH_LSX > + beqz t1, .Lfloat > + > + /* Save 128-bit vector registers. */ > + ADDI sp, sp, -FRAME_SIZE_LSX > + vst vr0, sp, 0*SZVREG > + vst vr1, sp, 1*SZVREG > + vst vr2, sp, 2*SZVREG > + vst vr3, sp, 3*SZVREG > + vst vr4, sp, 4*SZVREG > + vst vr5, sp, 5*SZVREG > + vst vr6, sp, 6*SZVREG > + vst vr7, sp, 7*SZVREG > + vst vr8, sp, 8*SZVREG > + vst vr9, sp, 9*SZVREG > + vst vr10, sp, 10*SZVREG > + vst vr11, sp, 11*SZVREG > + vst vr12, sp, 12*SZVREG > + vst vr13, sp, 13*SZVREG > + vst vr14, sp, 14*SZVREG > + vst vr15, sp, 15*SZVREG > + vst vr16, sp, 16*SZVREG > + vst vr17, sp, 17*SZVREG > + vst vr18, sp, 18*SZVREG > + vst vr19, sp, 19*SZVREG > + vst vr20, sp, 20*SZVREG > + vst vr21, sp, 21*SZVREG > + vst vr22, sp, 22*SZVREG > + vst vr23, sp, 23*SZVREG > + vst vr24, sp, 24*SZVREG > + vst vr25, sp, 25*SZVREG > + vst vr26, sp, 26*SZVREG > + vst vr27, sp, 27*SZVREG > + vst vr28, sp, 28*SZVREG > + vst vr29, sp, 29*SZVREG > + vst vr30, sp, 30*SZVREG > + vst vr31, sp, 31*SZVREG > + b .Ltga > + > +.Lfloat: > + /* Save float registers. */ > + ADDI sp, sp, -FRAME_SIZE_FLOAT > + FREG_S fa0, sp, 0*SZFREG > + FREG_S fa1, sp, 1*SZFREG > + FREG_S fa2, sp, 2*SZFREG > + FREG_S fa3, sp, 3*SZFREG > + FREG_S fa4, sp, 4*SZFREG > + FREG_S fa5, sp, 5*SZFREG > + FREG_S fa6, sp, 6*SZFREG > + FREG_S fa7, sp, 7*SZFREG > + FREG_S ft0, sp, 8*SZFREG > + FREG_S ft1, sp, 9*SZFREG > + FREG_S ft2, sp, 10*SZFREG > + FREG_S ft3, sp, 11*SZFREG > + FREG_S ft4, sp, 12*SZFREG > + FREG_S ft5, sp, 13*SZFREG > + FREG_S ft6, sp, 14*SZFREG > + FREG_S ft7, sp, 15*SZFREG > + FREG_S ft8, sp, 16*SZFREG > + FREG_S ft9, sp, 17*SZFREG > + FREG_S ft10, sp, 18*SZFREG > + FREG_S ft11, sp, 19*SZFREG > + FREG_S ft12, sp, 20*SZFREG > + FREG_S ft13, sp, 21*SZFREG > + FREG_S ft14, sp, 22*SZFREG > + FREG_S ft15, sp, 23*SZFREG > + > +#endif /* #ifndef __loongarch_soft_float */ > + > +.Ltga: > + bl HIDDEN_JUMPTARGET(__tls_get_addr) > + ADDI a0, a0, -TLS_DTV_OFFSET > + > +#ifndef __loongarch_soft_float > + > + la.global t0, _rtld_global_ro > + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET > + andi t1, t0, HWCAP_LOONGARCH_LASX > + beqz t1, .Llsx1 > + > + /* Restore 256-bit vector registers. */ > + xvld xr0, sp, 0*SZXREG > + xvld xr1, sp, 1*SZXREG > + xvld xr2, sp, 2*SZXREG > + xvld xr3, sp, 3*SZXREG > + xvld xr4, sp, 4*SZXREG > + xvld xr5, sp, 5*SZXREG > + xvld xr6, sp, 6*SZXREG > + xvld xr7, sp, 7*SZXREG > + xvld xr8, sp, 8*SZXREG > + xvld xr9, sp, 9*SZXREG > + xvld xr10, sp, 10*SZXREG > + xvld xr11, sp, 11*SZXREG > + xvld xr12, sp, 12*SZXREG > + xvld xr13, sp, 13*SZXREG > + xvld xr14, sp, 14*SZXREG > + xvld xr15, sp, 15*SZXREG > + xvld xr16, sp, 16*SZXREG > + xvld xr17, sp, 17*SZXREG > + xvld xr18, sp, 18*SZXREG > + xvld xr19, sp, 19*SZXREG > + xvld xr20, sp, 20*SZXREG > + xvld xr21, sp, 21*SZXREG > + xvld xr22, sp, 22*SZXREG > + xvld xr23, sp, 23*SZXREG > + xvld xr24, sp, 24*SZXREG > + xvld xr25, sp, 25*SZXREG > + xvld xr26, sp, 26*SZXREG > + xvld xr27, sp, 27*SZXREG > + xvld xr28, sp, 28*SZXREG > + xvld xr29, sp, 29*SZXREG > + xvld xr30, sp, 30*SZXREG > + xvld xr31, sp, 31*SZXREG > + ADDI sp, sp, FRAME_SIZE_LASX > + b .Lfcsr > + > +.Llsx1: > + andi t1, t0, HWCAP_LOONGARCH_LSX > + beqz t1, .Lfloat1 > + > + /* Restore 128-bit vector registers. */ > + vld vr0, sp, 0*SZVREG > + vld vr1, sp, 1*SZVREG > + vld vr2, sp, 2*SZVREG > + vld vr3, sp, 3*SZVREG > + vld vr4, sp, 4*SZVREG > + vld vr5, sp, 5*SZVREG > + vld vr6, sp, 6*SZVREG > + vld vr7, sp, 7*SZVREG > + vld vr8, sp, 8*SZVREG > + vld vr9, sp, 9*SZVREG > + vld vr10, sp, 10*SZVREG > + vld vr11, sp, 11*SZVREG > + vld vr12, sp, 12*SZVREG > + vld vr13, sp, 13*SZVREG > + vld vr14, sp, 14*SZVREG > + vld vr15, sp, 15*SZVREG > + vld vr16, sp, 16*SZVREG > + vld vr17, sp, 17*SZVREG > + vld vr18, sp, 18*SZVREG > + vld vr19, sp, 19*SZVREG > + vld vr20, sp, 20*SZVREG > + vld vr21, sp, 21*SZVREG > + vld vr22, sp, 22*SZVREG > + vld vr23, sp, 23*SZVREG > + vld vr24, sp, 24*SZVREG > + vld vr25, sp, 25*SZVREG > + vld vr26, sp, 26*SZVREG > + vld vr27, sp, 27*SZVREG > + vld vr28, sp, 28*SZVREG > + vld vr29, sp, 29*SZVREG > + vld vr30, sp, 30*SZVREG > + vld vr31, sp, 31*SZVREG > + ADDI sp, sp, FRAME_SIZE_LSX > + b .Lfcsr > + > +.Lfloat1: > + /* Restore float registers. */ > + FREG_L fa0, sp, 0*SZFREG > + FREG_L fa1, sp, 1*SZFREG > + FREG_L fa2, sp, 2*SZFREG > + FREG_L fa3, sp, 3*SZFREG > + FREG_L fa4, sp, 4*SZFREG > + FREG_L fa5, sp, 5*SZFREG > + FREG_L fa6, sp, 6*SZFREG > + FREG_L fa7, sp, 7*SZFREG > + FREG_L ft0, sp, 8*SZFREG > + FREG_L ft1, sp, 9*SZFREG > + FREG_L ft2, sp, 10*SZFREG > + FREG_L ft3, sp, 11*SZFREG > + FREG_L ft4, sp, 12*SZFREG > + FREG_L ft5, sp, 13*SZFREG > + FREG_L ft6, sp, 14*SZFREG > + FREG_L ft7, sp, 15*SZFREG > + FREG_L ft8, sp, 16*SZFREG > + FREG_L ft9, sp, 17*SZFREG > + FREG_L ft10, sp, 18*SZFREG > + FREG_L ft11, sp, 19*SZFREG > + FREG_L ft12, sp, 20*SZFREG > + FREG_L ft13, sp, 21*SZFREG > + FREG_L ft14, sp, 22*SZFREG > + FREG_L ft15, sp, 23*SZFREG > + ADDI sp, sp, FRAME_SIZE_FLOAT > + > +.Lfcsr: > + /* Restore fcsr0 register. */ > + ld.w t0, sp, FRAME_SIZE + 24 > + movgr2fcsr fcsr0, t0 > > #endif /* #ifndef __loongarch_soft_float */ > > -#include "dl-tlsdesc-dynamic.h" > + REG_L ra, sp, 0 * SZREG > + REG_L a1, sp, 1 * SZREG > + REG_L a2, sp, 2 * SZREG > + REG_L a3, sp, 3 * SZREG > + REG_L a4, sp, 4 * SZREG > + REG_L a5, sp, 5 * SZREG > + REG_L a6, sp, 6 * SZREG > + REG_L a7, sp, 7 * SZREG > + REG_L t3, sp, 8 * SZREG > + REG_L t4, sp, 9 * SZREG > + REG_L t5, sp, 10 * SZREG > + REG_L t6, sp, 11 * SZREG > + REG_L t7, sp, 12 * SZREG > + REG_L t8, sp, 13 * SZREG > + ADDI sp, sp, FRAME_SIZE > + > + b .Lret > + cfi_endproc > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) > > #endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h > index 45c43a5b52..ff8c69cb93 100644 > --- a/sysdeps/loongarch/dl-tlsdesc.h > +++ b/sysdeps/loongarch/dl-tlsdesc.h > @@ -43,10 +43,6 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); > > #ifdef SHARED > extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); > -#ifndef __loongarch_soft_float > -extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); > -extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); > -#endif > extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); > #endif > > diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym > index 9f80fceca6..213d0b3074 100644 > --- a/sysdeps/loongarch/tlsdesc.sym > +++ b/sysdeps/loongarch/tlsdesc.sym > @@ -4,6 +4,12 @@ > #include <link.h> > #include <dl-tlsdesc.h> > > +#define SHARED 1 > + > +#include <ldsodefs.h> > + > +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) > + > -- > > -- Abuse tls.h macros to derive offsets relative to the thread register. > @@ -17,3 +23,6 @@ DTV_COUNTER offsetof(dtv_t, counter) > TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED > TLS_DTV_OFFSET TLS_DTV_OFFSET > SIZE_OF_TCB sizeof(tcbhead_t) > +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) > +HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX > +HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX
diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h index a15d8e0ab6..ab6f1da7c0 100644 --- a/sysdeps/loongarch/dl-machine.h +++ b/sysdeps/loongarch/dl-machine.h @@ -223,13 +223,6 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], { td->arg = _dl_make_tlsdesc_dynamic (sym_map, sym->st_value + reloc->r_addend); -# ifndef __loongarch_soft_float - if (RTLD_SUPPORT_LASX) - td->entry = _dl_tlsdesc_dynamic_lasx; - else if (RTLD_SUPPORT_LSX) - td->entry = _dl_tlsdesc_dynamic_lsx; - else -# endif td->entry = _dl_tlsdesc_dynamic; } else diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h deleted file mode 100644 index d10f4a8800..0000000000 --- a/sysdeps/loongarch/dl-tlsdesc-dynamic.h +++ /dev/null @@ -1,225 +0,0 @@ -/* Thread-local storage handling in the ELF dynamic linker. - LoongArch version. - Copyright (C) 2024 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <https://www.gnu.org/licenses/>. */ - -#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) -#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) -#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) -#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) - - /* Handler for dynamic TLS symbols. - Prototype: - _dl_tlsdesc_dynamic (tlsdesc *) ; - - The second word of the descriptor points to a - tlsdesc_dynamic_arg structure. - - Returns the offset between the thread pointer and the - object referenced by the argument. - - ptrdiff_t - _dl_tlsdesc_dynamic (struct tlsdesc *tdp) - { - struct tlsdesc_dynamic_arg *td = tdp->arg; - dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); - if (__glibc_likely (td->gen_count <= dtv[0].counter - && (dtv[td->tlsinfo.ti_module].pointer.val - != TLS_DTV_UNALLOCATED), - 1)) - return dtv[td->tlsinfo.ti_module].pointer.val - + td->tlsinfo.ti_offset - - __thread_pointer; - - return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; - } */ - .hidden _dl_tlsdesc_dynamic - .global _dl_tlsdesc_dynamic - .type _dl_tlsdesc_dynamic,%function - cfi_startproc - .align 2 -_dl_tlsdesc_dynamic: - /* Save just enough registers to support fast path, if we fall - into slow path we will save additional registers. */ - ADDI sp, sp, -32 - cfi_adjust_cfa_offset (32) - REG_S t0, sp, 0 - REG_S t1, sp, 8 - REG_S t2, sp, 16 - cfi_rel_offset (12, 0) - cfi_rel_offset (13, 8) - cfi_rel_offset (14, 16) - -/* Runtime Storage Layout of Thread-Local Storage - TP point to the start of TLS block. - - dtv -Low address TCB ----------------> dtv0(counter) - TP --> static_block0 <----- dtv1 - static_block1 <----- dtv2 - static_block2 <----- dtv3 - dynamic_block0 <----- dtv4 -Hign address dynamic_block1 <----- dtv5 */ - - REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ - REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ - REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ - REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ - /* If dtv[0].counter < td->gen_count, goto slow path. */ - bltu t2, t1, .Lslow - - REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ - /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ - slli.d t1, t1, 4 - add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ - REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ - li.d t2, TLS_DTV_UNALLOCATED - /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, - goto slow path. */ - beq t1, t2, .Lslow - - cfi_remember_state - REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ - /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ - add.d a0, t1, t2 -.Lret: - sub.d a0, a0, tp - REG_L t0, sp, 0 - REG_L t1, sp, 8 - REG_L t2, sp, 16 - ADDI sp, sp, 32 - cfi_adjust_cfa_offset (-32) - RET - -.Lslow: - /* This is the slow path. We need to call __tls_get_addr() which - means we need to save and restore all the register that the - callee will trash. */ - - /* Save the remaining registers that we must treat as caller save. */ - cfi_restore_state - ADDI sp, sp, -FRAME_SIZE - cfi_adjust_cfa_offset (FRAME_SIZE) - REG_S ra, sp, 0 * SZREG - REG_S a1, sp, 1 * SZREG - REG_S a2, sp, 2 * SZREG - REG_S a3, sp, 3 * SZREG - REG_S a4, sp, 4 * SZREG - REG_S a5, sp, 5 * SZREG - REG_S a6, sp, 6 * SZREG - REG_S a7, sp, 7 * SZREG - REG_S t3, sp, 8 * SZREG - REG_S t4, sp, 9 * SZREG - REG_S t5, sp, 10 * SZREG - REG_S t6, sp, 11 * SZREG - REG_S t7, sp, 12 * SZREG - REG_S t8, sp, 13 * SZREG - cfi_rel_offset (1, 0 * SZREG) - cfi_rel_offset (5, 1 * SZREG) - cfi_rel_offset (6, 2 * SZREG) - cfi_rel_offset (7, 3 * SZREG) - cfi_rel_offset (8, 4 * SZREG) - cfi_rel_offset (9, 5 * SZREG) - cfi_rel_offset (10, 6 * SZREG) - cfi_rel_offset (11, 7 * SZREG) - cfi_rel_offset (15, 8 * SZREG) - cfi_rel_offset (16, 9 * SZREG) - cfi_rel_offset (17, 10 * SZREG) - cfi_rel_offset (18, 11 * SZREG) - cfi_rel_offset (19, 12 * SZREG) - cfi_rel_offset (20, 13 * SZREG) - -#ifndef __loongarch_soft_float - - /* Save fcsr0 register. - Only one physical fcsr0 register, fcsr1-fcsr3 are aliases - of some fields in fcsr0. */ - movfcsr2gr t0, fcsr0 - st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2. */ - -#ifdef USE_LASX - #define V_REG_S xvst - #define V_REG_L xvld - #define V_SPACE FRAME_SIZE_LASX - #define V_REG(n) $xr##n - #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ - 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 - #define V_REGSZ SZXREG -#elif defined USE_LSX - #define V_REG_S vst - #define V_REG_L vld - #define V_SPACE FRAME_SIZE_LSX - #define V_REG(n) $vr##n - #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \ - 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 - #define V_REGSZ SZVREG -#else - #define V_REG_S fst.d - #define V_REG_L fld.d - #define V_SPACE FRAME_SIZE_FLOAT - #define V_REG(n) $f##n - #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 - #define V_REGSZ SZFREG -#endif - - ADDI sp, sp, -V_SPACE - cfi_adjust_cfa_offset (V_SPACE) - .irp i,V_REGS - V_REG_S V_REG(\i), sp, \i * V_REGSZ - .endr - -#endif /* #ifndef __loongarch_soft_float */ - - bl HIDDEN_JUMPTARGET(__tls_get_addr) - ADDI a0, a0, -TLS_DTV_OFFSET - -#ifndef __loongarch_soft_float - - .irp i,V_REGS - V_REG_L V_REG(\i), sp, \i * V_REGSZ - .endr - ADDI sp, sp, V_SPACE - cfi_adjust_cfa_offset (-V_SPACE) - - /* Restore fcsr0 register. */ - ld.w t0, sp, FRAME_SIZE + 24 - movgr2fcsr fcsr0, t0 - -#endif /* #ifndef __loongarch_soft_float */ - - REG_L ra, sp, 0 * SZREG - REG_L a1, sp, 1 * SZREG - REG_L a2, sp, 2 * SZREG - REG_L a3, sp, 3 * SZREG - REG_L a4, sp, 4 * SZREG - REG_L a5, sp, 5 * SZREG - REG_L a6, sp, 6 * SZREG - REG_L a7, sp, 7 * SZREG - REG_L t3, sp, 8 * SZREG - REG_L t4, sp, 9 * SZREG - REG_L t5, sp, 10 * SZREG - REG_L t6, sp, 11 * SZREG - REG_L t7, sp, 12 * SZREG - REG_L t8, sp, 13 * SZREG - ADDI sp, sp, FRAME_SIZE - cfi_adjust_cfa_offset (-FRAME_SIZE) - - b .Lret - cfi_endproc - .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic - .hidden HIDDEN_JUMPTARGET(__tls_get_addr) diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S index b6cfd6121d..a6627cc754 100644 --- a/sysdeps/loongarch/dl-tlsdesc.S +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -59,34 +59,376 @@ _dl_tlsdesc_undefweak: cfi_endproc .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + #ifdef SHARED +#define FRAME_SIZE (-((-14 * SZREG) & ALMASK)) +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) + + /* Handler for dynamic TLS symbols. + Prototype: + _dl_tlsdesc_dynamic (tlsdesc *) ; + + The second word of the descriptor points to a + tlsdesc_dynamic_arg structure. + + Returns the offset between the thread pointer and the + object referenced by the argument. + + ptrdiff_t + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) + { + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB); + if (__glibc_likely (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; + } */ + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,%function + cfi_startproc + .align 2 +_dl_tlsdesc_dynamic: + /* Save just enough registers to support fast path, if we fall + into slow path we will save additional registers. */ + ADDI sp, sp, -32 + REG_S t0, sp, 0 + REG_S t1, sp, 8 + REG_S t2, sp, 16 + +/* Runtime Storage Layout of Thread-Local Storage + TP point to the start of TLS block. + + dtv +Low address TCB ----------------> dtv0(counter) + TP --> static_block0 <----- dtv1 + static_block1 <----- dtv2 + static_block2 <----- dtv3 + dynamic_block0 <----- dtv4 +Hign address dynamic_block1 <----- dtv5 */ + + REG_L t0, tp, -SIZE_OF_TCB /* t0 = dtv */ + REG_L a0, a0, TLSDESC_ARG /* a0(td) = tdp->arg */ + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ + /* If dtv[0].counter < td->gen_count, goto slow path. */ + bltu t2, t1, .Lslow + + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ + /* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */ + slli.d t1, t1, 4 + add.d t1, t1, t0 /* t1 = dtv[td->tlsinfo.ti_module] */ + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ + li.d t2, TLS_DTV_UNALLOCATED + /* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED, + goto slow path. */ + beq t1, t2, .Lslow + + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ + add.d a0, t1, t2 +.Lret: + sub.d a0, a0, tp + REG_L t0, sp, 0 + REG_L t1, sp, 8 + REG_L t2, sp, 16 + ADDI sp, sp, 32 + RET + +.Lslow: + /* This is the slow path. We need to call __tls_get_addr() which + means we need to save and restore all the register that the + callee will trash. */ + + /* Save the remaining registers that we must treat as caller save. */ + ADDI sp, sp, -FRAME_SIZE + REG_S ra, sp, 0 * SZREG + REG_S a1, sp, 1 * SZREG + REG_S a2, sp, 2 * SZREG + REG_S a3, sp, 3 * SZREG + REG_S a4, sp, 4 * SZREG + REG_S a5, sp, 5 * SZREG + REG_S a6, sp, 6 * SZREG + REG_S a7, sp, 7 * SZREG + REG_S t3, sp, 8 * SZREG + REG_S t4, sp, 9 * SZREG + REG_S t5, sp, 10 * SZREG + REG_S t6, sp, 11 * SZREG + REG_S t7, sp, 12 * SZREG + REG_S t8, sp, 13 * SZREG + #ifndef __loongarch_soft_float -#define USE_LASX -#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx -#define Lret Lret_lasx -#define Lslow Lslow_lasx -#include "dl-tlsdesc-dynamic.h" -#undef FRAME_SIZE -#undef USE_LASX -#undef _dl_tlsdesc_dynamic -#undef Lret -#undef Lslow - -#define USE_LSX -#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx -#define Lret Lret_lsx -#define Lslow Lslow_lsx -#include "dl-tlsdesc-dynamic.h" -#undef FRAME_SIZE -#undef USE_LSX -#undef _dl_tlsdesc_dynamic -#undef Lret -#undef Lslow + /* Save fcsr0 register. + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases + of some fields in fcsr0. */ + movfcsr2gr t0, fcsr0 + st.w t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */ + + /* Whether support LASX. */ + la.global t0, _rtld_global_ro + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET + andi t1, t0, HWCAP_LOONGARCH_LASX + beqz t1, .Llsx + + /* Save 256-bit vector registers. + FIXME: Without vector ABI, save all vector registers. */ + ADDI sp, sp, -FRAME_SIZE_LASX + xvst xr0, sp, 0*SZXREG + xvst xr1, sp, 1*SZXREG + xvst xr2, sp, 2*SZXREG + xvst xr3, sp, 3*SZXREG + xvst xr4, sp, 4*SZXREG + xvst xr5, sp, 5*SZXREG + xvst xr6, sp, 6*SZXREG + xvst xr7, sp, 7*SZXREG + xvst xr8, sp, 8*SZXREG + xvst xr9, sp, 9*SZXREG + xvst xr10, sp, 10*SZXREG + xvst xr11, sp, 11*SZXREG + xvst xr12, sp, 12*SZXREG + xvst xr13, sp, 13*SZXREG + xvst xr14, sp, 14*SZXREG + xvst xr15, sp, 15*SZXREG + xvst xr16, sp, 16*SZXREG + xvst xr17, sp, 17*SZXREG + xvst xr18, sp, 18*SZXREG + xvst xr19, sp, 19*SZXREG + xvst xr20, sp, 20*SZXREG + xvst xr21, sp, 21*SZXREG + xvst xr22, sp, 22*SZXREG + xvst xr23, sp, 23*SZXREG + xvst xr24, sp, 24*SZXREG + xvst xr25, sp, 25*SZXREG + xvst xr26, sp, 26*SZXREG + xvst xr27, sp, 27*SZXREG + xvst xr28, sp, 28*SZXREG + xvst xr29, sp, 29*SZXREG + xvst xr30, sp, 30*SZXREG + xvst xr31, sp, 31*SZXREG + b .Ltga + +.Llsx: + /* Whether support LSX. */ + andi t1, t0, HWCAP_LOONGARCH_LSX + beqz t1, .Lfloat + + /* Save 128-bit vector registers. */ + ADDI sp, sp, -FRAME_SIZE_LSX + vst vr0, sp, 0*SZVREG + vst vr1, sp, 1*SZVREG + vst vr2, sp, 2*SZVREG + vst vr3, sp, 3*SZVREG + vst vr4, sp, 4*SZVREG + vst vr5, sp, 5*SZVREG + vst vr6, sp, 6*SZVREG + vst vr7, sp, 7*SZVREG + vst vr8, sp, 8*SZVREG + vst vr9, sp, 9*SZVREG + vst vr10, sp, 10*SZVREG + vst vr11, sp, 11*SZVREG + vst vr12, sp, 12*SZVREG + vst vr13, sp, 13*SZVREG + vst vr14, sp, 14*SZVREG + vst vr15, sp, 15*SZVREG + vst vr16, sp, 16*SZVREG + vst vr17, sp, 17*SZVREG + vst vr18, sp, 18*SZVREG + vst vr19, sp, 19*SZVREG + vst vr20, sp, 20*SZVREG + vst vr21, sp, 21*SZVREG + vst vr22, sp, 22*SZVREG + vst vr23, sp, 23*SZVREG + vst vr24, sp, 24*SZVREG + vst vr25, sp, 25*SZVREG + vst vr26, sp, 26*SZVREG + vst vr27, sp, 27*SZVREG + vst vr28, sp, 28*SZVREG + vst vr29, sp, 29*SZVREG + vst vr30, sp, 30*SZVREG + vst vr31, sp, 31*SZVREG + b .Ltga + +.Lfloat: + /* Save float registers. */ + ADDI sp, sp, -FRAME_SIZE_FLOAT + FREG_S fa0, sp, 0*SZFREG + FREG_S fa1, sp, 1*SZFREG + FREG_S fa2, sp, 2*SZFREG + FREG_S fa3, sp, 3*SZFREG + FREG_S fa4, sp, 4*SZFREG + FREG_S fa5, sp, 5*SZFREG + FREG_S fa6, sp, 6*SZFREG + FREG_S fa7, sp, 7*SZFREG + FREG_S ft0, sp, 8*SZFREG + FREG_S ft1, sp, 9*SZFREG + FREG_S ft2, sp, 10*SZFREG + FREG_S ft3, sp, 11*SZFREG + FREG_S ft4, sp, 12*SZFREG + FREG_S ft5, sp, 13*SZFREG + FREG_S ft6, sp, 14*SZFREG + FREG_S ft7, sp, 15*SZFREG + FREG_S ft8, sp, 16*SZFREG + FREG_S ft9, sp, 17*SZFREG + FREG_S ft10, sp, 18*SZFREG + FREG_S ft11, sp, 19*SZFREG + FREG_S ft12, sp, 20*SZFREG + FREG_S ft13, sp, 21*SZFREG + FREG_S ft14, sp, 22*SZFREG + FREG_S ft15, sp, 23*SZFREG + +#endif /* #ifndef __loongarch_soft_float */ + +.Ltga: + bl HIDDEN_JUMPTARGET(__tls_get_addr) + ADDI a0, a0, -TLS_DTV_OFFSET + +#ifndef __loongarch_soft_float + + la.global t0, _rtld_global_ro + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET + andi t1, t0, HWCAP_LOONGARCH_LASX + beqz t1, .Llsx1 + + /* Restore 256-bit vector registers. */ + xvld xr0, sp, 0*SZXREG + xvld xr1, sp, 1*SZXREG + xvld xr2, sp, 2*SZXREG + xvld xr3, sp, 3*SZXREG + xvld xr4, sp, 4*SZXREG + xvld xr5, sp, 5*SZXREG + xvld xr6, sp, 6*SZXREG + xvld xr7, sp, 7*SZXREG + xvld xr8, sp, 8*SZXREG + xvld xr9, sp, 9*SZXREG + xvld xr10, sp, 10*SZXREG + xvld xr11, sp, 11*SZXREG + xvld xr12, sp, 12*SZXREG + xvld xr13, sp, 13*SZXREG + xvld xr14, sp, 14*SZXREG + xvld xr15, sp, 15*SZXREG + xvld xr16, sp, 16*SZXREG + xvld xr17, sp, 17*SZXREG + xvld xr18, sp, 18*SZXREG + xvld xr19, sp, 19*SZXREG + xvld xr20, sp, 20*SZXREG + xvld xr21, sp, 21*SZXREG + xvld xr22, sp, 22*SZXREG + xvld xr23, sp, 23*SZXREG + xvld xr24, sp, 24*SZXREG + xvld xr25, sp, 25*SZXREG + xvld xr26, sp, 26*SZXREG + xvld xr27, sp, 27*SZXREG + xvld xr28, sp, 28*SZXREG + xvld xr29, sp, 29*SZXREG + xvld xr30, sp, 30*SZXREG + xvld xr31, sp, 31*SZXREG + ADDI sp, sp, FRAME_SIZE_LASX + b .Lfcsr + +.Llsx1: + andi t1, t0, HWCAP_LOONGARCH_LSX + beqz t1, .Lfloat1 + + /* Restore 128-bit vector registers. */ + vld vr0, sp, 0*SZVREG + vld vr1, sp, 1*SZVREG + vld vr2, sp, 2*SZVREG + vld vr3, sp, 3*SZVREG + vld vr4, sp, 4*SZVREG + vld vr5, sp, 5*SZVREG + vld vr6, sp, 6*SZVREG + vld vr7, sp, 7*SZVREG + vld vr8, sp, 8*SZVREG + vld vr9, sp, 9*SZVREG + vld vr10, sp, 10*SZVREG + vld vr11, sp, 11*SZVREG + vld vr12, sp, 12*SZVREG + vld vr13, sp, 13*SZVREG + vld vr14, sp, 14*SZVREG + vld vr15, sp, 15*SZVREG + vld vr16, sp, 16*SZVREG + vld vr17, sp, 17*SZVREG + vld vr18, sp, 18*SZVREG + vld vr19, sp, 19*SZVREG + vld vr20, sp, 20*SZVREG + vld vr21, sp, 21*SZVREG + vld vr22, sp, 22*SZVREG + vld vr23, sp, 23*SZVREG + vld vr24, sp, 24*SZVREG + vld vr25, sp, 25*SZVREG + vld vr26, sp, 26*SZVREG + vld vr27, sp, 27*SZVREG + vld vr28, sp, 28*SZVREG + vld vr29, sp, 29*SZVREG + vld vr30, sp, 30*SZVREG + vld vr31, sp, 31*SZVREG + ADDI sp, sp, FRAME_SIZE_LSX + b .Lfcsr + +.Lfloat1: + /* Restore float registers. */ + FREG_L fa0, sp, 0*SZFREG + FREG_L fa1, sp, 1*SZFREG + FREG_L fa2, sp, 2*SZFREG + FREG_L fa3, sp, 3*SZFREG + FREG_L fa4, sp, 4*SZFREG + FREG_L fa5, sp, 5*SZFREG + FREG_L fa6, sp, 6*SZFREG + FREG_L fa7, sp, 7*SZFREG + FREG_L ft0, sp, 8*SZFREG + FREG_L ft1, sp, 9*SZFREG + FREG_L ft2, sp, 10*SZFREG + FREG_L ft3, sp, 11*SZFREG + FREG_L ft4, sp, 12*SZFREG + FREG_L ft5, sp, 13*SZFREG + FREG_L ft6, sp, 14*SZFREG + FREG_L ft7, sp, 15*SZFREG + FREG_L ft8, sp, 16*SZFREG + FREG_L ft9, sp, 17*SZFREG + FREG_L ft10, sp, 18*SZFREG + FREG_L ft11, sp, 19*SZFREG + FREG_L ft12, sp, 20*SZFREG + FREG_L ft13, sp, 21*SZFREG + FREG_L ft14, sp, 22*SZFREG + FREG_L ft15, sp, 23*SZFREG + ADDI sp, sp, FRAME_SIZE_FLOAT + +.Lfcsr: + /* Restore fcsr0 register. */ + ld.w t0, sp, FRAME_SIZE + 24 + movgr2fcsr fcsr0, t0 #endif /* #ifndef __loongarch_soft_float */ -#include "dl-tlsdesc-dynamic.h" + REG_L ra, sp, 0 * SZREG + REG_L a1, sp, 1 * SZREG + REG_L a2, sp, 2 * SZREG + REG_L a3, sp, 3 * SZREG + REG_L a4, sp, 4 * SZREG + REG_L a5, sp, 5 * SZREG + REG_L a6, sp, 6 * SZREG + REG_L a7, sp, 7 * SZREG + REG_L t3, sp, 8 * SZREG + REG_L t4, sp, 9 * SZREG + REG_L t5, sp, 10 * SZREG + REG_L t6, sp, 11 * SZREG + REG_L t7, sp, 12 * SZREG + REG_L t8, sp, 13 * SZREG + ADDI sp, sp, FRAME_SIZE + + b .Lret + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) #endif /* #ifdef SHARED */ diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h index 45c43a5b52..ff8c69cb93 100644 --- a/sysdeps/loongarch/dl-tlsdesc.h +++ b/sysdeps/loongarch/dl-tlsdesc.h @@ -43,10 +43,6 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); #ifdef SHARED extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); -#ifndef __loongarch_soft_float -extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); -extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); -#endif extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); #endif diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym index 9f80fceca6..213d0b3074 100644 --- a/sysdeps/loongarch/tlsdesc.sym +++ b/sysdeps/loongarch/tlsdesc.sym @@ -4,6 +4,12 @@ #include <link.h> #include <dl-tlsdesc.h> +#define SHARED 1 + +#include <ldsodefs.h> + +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) + -- -- Abuse tls.h macros to derive offsets relative to the thread register. @@ -17,3 +23,6 @@ DTV_COUNTER offsetof(dtv_t, counter) TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED TLS_DTV_OFFSET TLS_DTV_OFFSET SIZE_OF_TCB sizeof(tcbhead_t) +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) +HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX +HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX
We're in freeze for the 2.40 release. This reverts commit 43224b1379d60b1ad98d29ef3d7905d55f828a9f. Signed-off-by: Andreas K. Hüttel <dilfridge@gentoo.org> --- sysdeps/loongarch/dl-machine.h | 7 - sysdeps/loongarch/dl-tlsdesc-dynamic.h | 225 -------------- sysdeps/loongarch/dl-tlsdesc.S | 386 +++++++++++++++++++++++-- sysdeps/loongarch/dl-tlsdesc.h | 4 - sysdeps/loongarch/tlsdesc.sym | 9 + 5 files changed, 373 insertions(+), 258 deletions(-) delete mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h