Message ID | 20240229014328.3559028-1-mengqinggang@loongson.cn |
---|---|
State | New |
Headers | show |
Series | [v2] LoongArch: Add support for TLS Descriptors | expand |
在 2024/2/29 上午9:43, mengqinggang 写道: > This is mostly based on AArch64 and RISC-V implementation. > > Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. > > For _dl_tlsdesc_dynamic function slow path, temporarily save and restore > all vector registers. > --- > Changes v1 -> v2: > - Fix vr24-vr31, xr24-xr31 typo. > - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. > - Save and restore fcsr0 in _dl_tlsdesc_dynamic. > > v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html > > elf/elf.h | 2 + > sysdeps/loongarch/Makefile | 6 + > sysdeps/loongarch/dl-link.sym | 1 + > sysdeps/loongarch/dl-machine.h | 60 ++- > sysdeps/loongarch/dl-tls.h | 9 +- > sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++ > sysdeps/loongarch/dl-tlsdesc.S | 93 +++++ > sysdeps/loongarch/dl-tlsdesc.h | 53 +++ > sysdeps/loongarch/linkmap.h | 1 + > sysdeps/loongarch/sys/asm.h | 1 + > sysdeps/loongarch/sys/regdef.h | 1 + > sysdeps/loongarch/tlsdesc.c | 39 ++ > sysdeps/loongarch/tlsdesc.sym | 19 + > .../unix/sysv/linux/loongarch/localplt.data | 2 + > 14 files changed, 625 insertions(+), 3 deletions(-) > create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h > create mode 100644 sysdeps/loongarch/dl-tlsdesc.S > create mode 100644 sysdeps/loongarch/dl-tlsdesc.h > create mode 100644 sysdeps/loongarch/tlsdesc.c > create mode 100644 sysdeps/loongarch/tlsdesc.sym > > diff --git a/elf/elf.h b/elf/elf.h > index f2206e5c06..eec24ea049 100644 > --- a/elf/elf.h > +++ b/elf/elf.h > @@ -4237,6 +4237,8 @@ enum > #define R_LARCH_TLS_TPREL32 10 > #define R_LARCH_TLS_TPREL64 11 > #define R_LARCH_IRELATIVE 12 > +#define R_LARCH_TLS_DESC32 13 > +#define R_LARCH_TLS_DESC64 14 > > /* Reserved for future relocs that the dynamic linker must understand. */ > > diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile > index 43d2f583cd..181389e787 100644 > --- a/sysdeps/loongarch/Makefile > +++ b/sysdeps/loongarch/Makefile > @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h > endif > > ifeq ($(subdir),elf) > +sysdep-dl-routines += tlsdesc dl-tlsdesc > gen-as-const-headers += dl-link.sym > endif > > +ifeq ($(subdir),csu) > +gen-as-const-headers += tlsdesc.sym > +endif > + > + > # LoongArch's assembler also needs to know about PIC as it changes the > # definition of some assembler macros. > ASFLAGS-.os += $(pic-ccflag) > diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym > index b534968e30..fd81ef37d5 100644 > --- a/sysdeps/loongarch/dl-link.sym > +++ b/sysdeps/loongarch/dl-link.sym > @@ -1,6 +1,7 @@ > #include <stddef.h> > #include <sysdep.h> > #include <link.h> > +#include <dl-tlsdesc.h> > > DL_SIZEOF_RG sizeof(struct La_loongarch_regs) > DL_SIZEOF_RV sizeof(struct La_loongarch_retval) > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index ab81b82d95..8ca6c224f6 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -25,7 +25,7 @@ > #include <entry.h> > #include <elf/elf.h> > #include <sys/asm.h> > -#include <dl-tls.h> > +#include <dl-tlsdesc.h> > #include <dl-static-tls.h> > #include <dl-machine-rel.h> > > @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], > *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; > break; > > + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: > + { > + struct tlsdesc volatile *td = > + (struct tlsdesc volatile *)addr_field; > + if (! sym) Use sym != NULL instead of ! sym, the same applies to other similar cases. > + { > + td->arg = (void*)reloc->r_addend; > + td->entry = _dl_tlsdesc_undefweak; > + } > + else > + { > +# ifndef SHARED > + CHECK_STATIC_TLS (map, sym_map); > +# else > + if (!TRY_STATIC_TLS (map, sym_map)) > + { > + td->arg = _dl_make_tlsdesc_dynamic > + (sym_map, sym->st_value + reloc->r_addend); > +# if !defined __loongarch_soft_float > + if (SUPPORT_LASX) > + td->entry = _dl_tlsdesc_dynamic_lasx; > + else > + if (SUPPORT_LSX) > + td->entry = _dl_tlsdesc_dynamic_lsx; > + else > +# endif > + td->entry = _dl_tlsdesc_dynamic; > + } > + else > +# endif > + { > + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) > + + reloc->r_addend); > + td->entry = _dl_tlsdesc_return; > + } > + } > + break; > + } > + > case R_LARCH_COPY: > { > if (sym == NULL) > @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], > else > *reloc_addr = map->l_mach.plt; > } > + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) Use __glibc_unlikely/likely instead of __builtin_expect, the same applies blow. > + { > + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); > + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); > + const ElfW (Sym) *sym = &symtab[symndx]; > + const struct r_found_version *version = NULL; > + > + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) > + { > + const ElfW (Half) *vernum = > + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); > + version = &map->l_versions[vernum[symndx] & 0x7fff]; > + } > + > + /* Always initialize TLS descriptors completely, because lazy > + initialization requires synchronization at every TLS access. */ > + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, > + skip_ifunc); > + } > else > _dl_reloc_bad_type (map, r_type, 1); > } > diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h > index 29924b866d..de593c002d 100644 > --- a/sysdeps/loongarch/dl-tls.h > +++ b/sysdeps/loongarch/dl-tls.h > @@ -16,6 +16,9 @@ > License along with the GNU C Library. If not, see > <https://www.gnu.org/licenses/>. */ > > +#ifndef _DL_TLS_H > +#define _DL_TLS_H > + > /* Type used for the representation of TLS information in the GOT. */ > typedef struct > { > @@ -23,6 +26,8 @@ typedef struct > unsigned long int ti_offset; > } tls_index; > > +extern void *__tls_get_addr (tls_index *ti); > + > /* The thread pointer points to the first static TLS block. */ > #define TLS_TP_OFFSET 0 > > @@ -37,10 +42,10 @@ typedef struct > /* Compute the value for a DTPREL reloc. */ > #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) > > -extern void *__tls_get_addr (tls_index *ti); > - > #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) > #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) > > /* Value used for dtv entries for which the allocation is delayed. */ > #define TLS_DTV_UNALLOCATED ((void *) -1l) > + > +#endif > diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > new file mode 100644 > index 0000000000..0d8c9bb991 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > @@ -0,0 +1,341 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifdef USE_LASX > +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) > +#elif defined USE_LSX > +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) > +#elif !defined __loongarch_soft_float > +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) > +#else > +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) > +#endif > + > +#ifdef SHARED > + /* Handler for dynamic TLS symbols. > + Prototype: > + _dl_tlsdesc_dynamic (tlsdesc *) ; > + > + The second word of the descriptor points to a > + tlsdesc_dynamic_arg structure. > + > + Returns the offset between the thread pointer and the > + object referenced by the argument. > + > + ptrdiff_t > + __attribute__ ((__regparm__ (1))) > + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > + { > + struct tlsdesc_dynamic_arg *td = tdp->arg; > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); > + if (__builtin_expect (td->gen_count <= dtv[0].counter > + && (dtv[td->tlsinfo.ti_module].pointer.val > + != TLS_DTV_UNALLOCATED), > + 1)) > + return dtv[td->tlsinfo.ti_module].pointer.val > + + td->tlsinfo.ti_offset > + - __thread_pointer; > + > + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > + } > + */ > + .hidden _dl_tlsdesc_dynamic > + .global _dl_tlsdesc_dynamic > + .type _dl_tlsdesc_dynamic,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_dynamic: > + /* Save just enough registers to support fast path, if we fall > + into slow path we will save additional registers. */ > + ADDI sp, sp,-24 > + REG_S t0, sp, 0 > + REG_S t1, sp, 8 > + REG_S t2, sp, 16 > + > + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start > + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg > + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count > + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter > + bltu t2, t1, Lslow > + > + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module > + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ > + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) > + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val > + li.d t2, TLS_DTV_UNALLOCATED > + beq t1, t2, Lslow > + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset > + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset > + add.d a0, t1, t2 > +Lret: > + sub.d a0, a0, tp > + REG_L t0, sp, 0 > + REG_L t1, sp, 8 > + REG_L t2, sp, 16 > + ADDI sp, sp, 24 > + RET > + > +Lslow: > + /* This is the slow path. We need to call __tls_get_addr() which > + means we need to save and restore all the register that the > + callee will trash. */ > + > + /* Save the remaining registers that we must treat as caller save. */ > + ADDI sp, sp, -FRAME_SIZE > + REG_S ra, sp, 0 * SZREG > + REG_S a1, sp, 1 * SZREG > + REG_S a2, sp, 2 * SZREG > + REG_S a3, sp, 3 * SZREG > + REG_S a4, sp, 4 * SZREG > + REG_S a5, sp, 5 * SZREG > + REG_S a6, sp, 6 * SZREG > + REG_S a7, sp, 7 * SZREG > + REG_S t4, sp, 8 * SZREG > + REG_S t5, sp, 9 * SZREG > + REG_S t6, sp, 10 * SZREG > + REG_S t7, sp, 11 * SZREG > + REG_S t8, sp, 12 * SZREG > + > +#ifdef USE_LASX > + xvst xr0, sp, 13*SZREG + 0*SZXREG > + xvst xr1, sp, 13*SZREG + 1*SZXREG > + xvst xr2, sp, 13*SZREG + 2*SZXREG > + xvst xr3, sp, 13*SZREG + 3*SZXREG > + xvst xr4, sp, 13*SZREG + 4*SZXREG > + xvst xr5, sp, 13*SZREG + 5*SZXREG > + xvst xr6, sp, 13*SZREG + 6*SZXREG > + xvst xr7, sp, 13*SZREG + 7*SZXREG > + xvst xr8, sp, 13*SZREG + 8*SZXREG > + xvst xr9, sp, 13*SZREG + 9*SZXREG > + xvst xr10, sp, 13*SZREG + 10*SZXREG > + xvst xr11, sp, 13*SZREG + 11*SZXREG > + xvst xr12, sp, 13*SZREG + 12*SZXREG > + xvst xr13, sp, 13*SZREG + 13*SZXREG > + xvst xr14, sp, 13*SZREG + 14*SZXREG > + xvst xr15, sp, 13*SZREG + 15*SZXREG > + xvst xr16, sp, 13*SZREG + 16*SZXREG > + xvst xr17, sp, 13*SZREG + 17*SZXREG > + xvst xr18, sp, 13*SZREG + 18*SZXREG > + xvst xr19, sp, 13*SZREG + 19*SZXREG > + xvst xr20, sp, 13*SZREG + 20*SZXREG > + xvst xr21, sp, 13*SZREG + 21*SZXREG > + xvst xr22, sp, 13*SZREG + 22*SZXREG > + xvst xr23, sp, 13*SZREG + 23*SZXREG > + xvst xr24, sp, 13*SZREG + 24*SZXREG > + xvst xr25, sp, 13*SZREG + 25*SZXREG > + xvst xr26, sp, 13*SZREG + 26*SZXREG > + xvst xr27, sp, 13*SZREG + 27*SZXREG > + xvst xr28, sp, 13*SZREG + 28*SZXREG > + xvst xr29, sp, 13*SZREG + 29*SZXREG > + xvst xr30, sp, 13*SZREG + 30*SZXREG > + xvst xr31, sp, 13*SZREG + 31*SZXREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 32*SZXREG > +#elif defined USE_LSX > + vst vr0, sp, 13*SZREG + 0*SZVREG > + vst vr1, sp, 13*SZREG + 1*SZVREG > + vst vr2, sp, 13*SZREG + 2*SZVREG > + vst vr3, sp, 13*SZREG + 3*SZVREG > + vst vr4, sp, 13*SZREG + 4*SZVREG > + vst vr5, sp, 13*SZREG + 5*SZVREG > + vst vr6, sp, 13*SZREG + 6*SZVREG > + vst vr7, sp, 13*SZREG + 7*SZVREG > + vst vr8, sp, 13*SZREG + 8*SZVREG > + vst vr9, sp, 13*SZREG + 9*SZVREG > + vst vr10, sp, 13*SZREG + 10*SZVREG > + vst vr11, sp, 13*SZREG + 11*SZVREG > + vst vr12, sp, 13*SZREG + 12*SZVREG > + vst vr13, sp, 13*SZREG + 13*SZVREG > + vst vr14, sp, 13*SZREG + 14*SZVREG > + vst vr15, sp, 13*SZREG + 15*SZVREG > + vst vr16, sp, 13*SZREG + 16*SZVREG > + vst vr17, sp, 13*SZREG + 17*SZVREG > + vst vr18, sp, 13*SZREG + 18*SZVREG > + vst vr19, sp, 13*SZREG + 19*SZVREG > + vst vr20, sp, 13*SZREG + 20*SZVREG > + vst vr21, sp, 13*SZREG + 21*SZVREG > + vst vr22, sp, 13*SZREG + 22*SZVREG > + vst vr23, sp, 13*SZREG + 23*SZVREG > + vst vr24, sp, 13*SZREG + 24*SZVREG > + vst vr25, sp, 13*SZREG + 25*SZVREG > + vst vr26, sp, 13*SZREG + 26*SZVREG > + vst vr27, sp, 13*SZREG + 27*SZVREG > + vst vr28, sp, 13*SZREG + 28*SZVREG > + vst vr29, sp, 13*SZREG + 29*SZVREG > + vst vr30, sp, 13*SZREG + 30*SZVREG > + vst vr31, sp, 13*SZREG + 31*SZVREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 32*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_S fa0, sp, 13*SZREG + 0*SZFREG > + FREG_S fa1, sp, 13*SZREG + 1*SZFREG > + FREG_S fa2, sp, 13*SZREG + 2*SZFREG > + FREG_S fa3, sp, 13*SZREG + 3*SZFREG > + FREG_S fa4, sp, 13*SZREG + 4*SZFREG > + FREG_S fa5, sp, 13*SZREG + 5*SZFREG > + FREG_S fa6, sp, 13*SZREG + 6*SZFREG > + FREG_S fa7, sp, 13*SZREG + 7*SZFREG > + FREG_S ft0, sp, 13*SZREG + 8*SZFREG > + FREG_S ft1, sp, 13*SZREG + 9*SZFREG > + FREG_S ft2, sp, 13*SZREG + 10*SZFREG > + FREG_S ft3, sp, 13*SZREG + 11*SZFREG > + FREG_S ft4, sp, 13*SZREG + 12*SZFREG > + FREG_S ft5, sp, 13*SZREG + 13*SZFREG > + FREG_S ft6, sp, 13*SZREG + 14*SZFREG > + FREG_S ft7, sp, 13*SZREG + 15*SZFREG > + FREG_S ft8, sp, 13*SZREG + 16*SZFREG > + FREG_S ft9, sp, 13*SZREG + 17*SZFREG > + FREG_S ft10, sp, 13*SZREG + 18*SZFREG > + FREG_S ft11, sp, 13*SZREG + 19*SZFREG > + FREG_S ft12, sp, 13*SZREG + 20*SZFREG > + FREG_S ft13, sp, 13*SZREG + 21*SZFREG > + FREG_S ft14, sp, 13*SZREG + 22*SZFREG > + FREG_S ft15, sp, 13*SZREG + 23*SZFREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 24*SZFREG > +#endif /* #ifdef USE_LASX */ > + > + bl __tls_get_addr > + ADDI a0, a0, -TLS_DTV_OFFSET > + > + REG_L ra, sp, 0 > + REG_L a1, sp, 1 * 8 > + REG_L a2, sp, 2 * 8 > + REG_L a3, sp, 3 * 8 > + REG_L a4, sp, 4 * 8 > + REG_L a5, sp, 5 * 8 > + REG_L a6, sp, 6 * 8 > + REG_L a7, sp, 7 * 8 > + REG_L t4, sp, 8 * 8 > + REG_L t5, sp, 9 * 8 > + REG_L t6, sp, 10 * 8 > + REG_L t7, sp, 11 * 8 > + REG_L t8, sp, 12 * 8 > + > +#ifdef USE_LASX > + xvld xr0, sp, 13*SZREG + 0*SZXREG > + xvld xr1, sp, 13*SZREG + 1*SZXREG > + xvld xr2, sp, 13*SZREG + 2*SZXREG > + xvld xr3, sp, 13*SZREG + 3*SZXREG > + xvld xr4, sp, 13*SZREG + 4*SZXREG > + xvld xr5, sp, 13*SZREG + 5*SZXREG > + xvld xr6, sp, 13*SZREG + 6*SZXREG > + xvld xr7, sp, 13*SZREG + 7*SZXREG > + xvld xr8, sp, 13*SZREG + 8*SZXREG > + xvld xr9, sp, 13*SZREG + 9*SZXREG > + xvld xr10, sp, 13*SZREG + 10*SZXREG > + xvld xr11, sp, 13*SZREG + 11*SZXREG > + xvld xr12, sp, 13*SZREG + 12*SZXREG > + xvld xr13, sp, 13*SZREG + 13*SZXREG > + xvld xr14, sp, 13*SZREG + 14*SZXREG > + xvld xr15, sp, 13*SZREG + 15*SZXREG > + xvld xr16, sp, 13*SZREG + 16*SZXREG > + xvld xr17, sp, 13*SZREG + 17*SZXREG > + xvld xr18, sp, 13*SZREG + 18*SZXREG > + xvld xr19, sp, 13*SZREG + 19*SZXREG > + xvld xr20, sp, 13*SZREG + 20*SZXREG > + xvld xr21, sp, 13*SZREG + 21*SZXREG > + xvld xr22, sp, 13*SZREG + 22*SZXREG > + xvld xr23, sp, 13*SZREG + 23*SZXREG > + xvld xr24, sp, 13*SZREG + 24*SZXREG > + xvld xr25, sp, 13*SZREG + 25*SZXREG > + xvld xr26, sp, 13*SZREG + 26*SZXREG > + xvld xr27, sp, 13*SZREG + 27*SZXREG > + xvld xr28, sp, 13*SZREG + 28*SZXREG > + xvld xr29, sp, 13*SZREG + 29*SZXREG > + xvld xr30, sp, 13*SZREG + 30*SZXREG > + xvld xr31, sp, 13*SZREG + 31*SZXREG > + REG_L t0, sp, 32*SZXREG > + movgr2fcsr fcsr0, t0 > +#elif defined USE_LSX > + vld vr0, sp, 13*SZREG + 0*SZVREG > + vld vr1, sp, 13*SZREG + 1*SZVREG > + vld vr2, sp, 13*SZREG + 2*SZVREG > + vld vr3, sp, 13*SZREG + 3*SZVREG > + vld vr4, sp, 13*SZREG + 4*SZVREG > + vld vr5, sp, 13*SZREG + 5*SZVREG > + vld vr6, sp, 13*SZREG + 6*SZVREG > + vld vr7, sp, 13*SZREG + 7*SZVREG > + vld vr8, sp, 13*SZREG + 8*SZVREG > + vld vr9, sp, 13*SZREG + 9*SZVREG > + vld vr10, sp, 13*SZREG + 10*SZVREG > + vld vr11, sp, 13*SZREG + 11*SZVREG > + vld vr12, sp, 13*SZREG + 12*SZVREG > + vld vr13, sp, 13*SZREG + 13*SZVREG > + vld vr14, sp, 13*SZREG + 14*SZVREG > + vld vr15, sp, 13*SZREG + 15*SZVREG > + vld vr16, sp, 13*SZREG + 16*SZVREG > + vld vr17, sp, 13*SZREG + 17*SZVREG > + vld vr18, sp, 13*SZREG + 18*SZVREG > + vld vr19, sp, 13*SZREG + 19*SZVREG > + vld vr20, sp, 13*SZREG + 20*SZVREG > + vld vr21, sp, 13*SZREG + 21*SZVREG > + vld vr22, sp, 13*SZREG + 22*SZVREG > + vld vr23, sp, 13*SZREG + 23*SZVREG > + vld vr24, sp, 13*SZREG + 24*SZVREG > + vld vr25, sp, 13*SZREG + 25*SZVREG > + vld vr26, sp, 13*SZREG + 26*SZVREG > + vld vr27, sp, 13*SZREG + 27*SZVREG > + vld vr28, sp, 13*SZREG + 28*SZVREG > + vld vr29, sp, 13*SZREG + 29*SZVREG > + vld vr30, sp, 13*SZREG + 30*SZVREG > + vld vr31, sp, 13*SZREG + 31*SZVREG > + REG_L t0, sp, 32*SZVREG > + movgr2fcsr fcsr0, t0 > +#elif !defined __loongarch_soft_float > + FREG_L fa0, sp, 13*SZREG + 0*SZFREG > + FREG_L fa1, sp, 13*SZREG + 1*SZFREG > + FREG_L fa2, sp, 13*SZREG + 2*SZFREG > + FREG_L fa3, sp, 13*SZREG + 3*SZFREG > + FREG_L fa4, sp, 13*SZREG + 4*SZFREG > + FREG_L fa5, sp, 13*SZREG + 5*SZFREG > + FREG_L fa6, sp, 13*SZREG + 6*SZFREG > + FREG_L fa7, sp, 13*SZREG + 7*SZFREG > + FREG_L ft0, sp, 13*SZREG + 8*SZFREG > + FREG_L ft1, sp, 13*SZREG + 9*SZFREG > + FREG_L ft2, sp, 13*SZREG + 10*SZFREG > + FREG_L ft3, sp, 13*SZREG + 11*SZFREG > + FREG_L ft4, sp, 13*SZREG + 12*SZFREG > + FREG_L ft5, sp, 13*SZREG + 13*SZFREG > + FREG_L ft6, sp, 13*SZREG + 14*SZFREG > + FREG_L ft7, sp, 13*SZREG + 15*SZFREG > + FREG_L ft8, sp, 13*SZREG + 16*SZFREG > + FREG_L ft9, sp, 13*SZREG + 17*SZFREG > + FREG_L ft10, sp, 13*SZREG + 18*SZFREG > + FREG_L ft11, sp, 13*SZREG + 19*SZFREG > + FREG_L ft12, sp, 13*SZREG + 20*SZFREG > + FREG_L ft13, sp, 13*SZREG + 21*SZFREG > + FREG_L ft14, sp, 13*SZREG + 22*SZFREG > + FREG_L ft15, sp, 13*SZREG + 23*SZFREG > + REG_L t0, sp, 24*SZFREG > + movgr2fcsr fcsr0, t0 > +#endif /* #ifdef USE_LASX */ > + > + ADDI sp, sp, FRAME_SIZE > + b Lret > + cfi_endproc > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > +#endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S > new file mode 100644 > index 0000000000..4a17079169 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.S > @@ -0,0 +1,93 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include <tls.h> > +#include "tlsdesc.h" > + > + .text > + > + /* Compute the thread pointer offset for symbols in the static > + TLS block. The offset is the same for all threads. > + Prototype: > + _dl_tlsdesc_return (tlsdesc *); */ > + .hidden _dl_tlsdesc_return > + .global _dl_tlsdesc_return > + .type _dl_tlsdesc_return,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_return: > + REG_L a0, a0, 8 > + RET > + cfi_endproc > + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return > + > + /* Handler for undefined weak TLS symbols. > + Prototype: > + _dl_tlsdesc_undefweak (tlsdesc *); > + > + The second word of the descriptor contains the addend. > + Return the addend minus the thread pointer. This ensures > + that when the caller adds on the thread pointer it gets back > + the addend. */ > + .hidden _dl_tlsdesc_undefweak > + .global _dl_tlsdesc_undefweak > + .type _dl_tlsdesc_undefweak,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_undefweak: > + REG_L a0, a0, 8 > + sub.d a0, a0, tp > + RET > + cfi_endproc > + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak > + > + > +#ifdef SHARED > + > +#if !defined __loongarch_soft_float > + > +#define USE_LASX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx > +#define Lret Lret_lasx > +#define Lslow Lslow_lasx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LASX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > + > +#define USE_LSX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx > +#define Lret Lret_lsx > +#define Lslow Lslow_lsx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LSX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > + > +#endif > + > +#include "dl-tlsdesc-dynamic.h" > + > +#endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h > new file mode 100644 > index 0000000000..988037a714 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.h > @@ -0,0 +1,53 @@ > +/* Thread-local storage descriptor handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _DL_TLSDESC_H > +#define _DL_TLSDESC_H > + > +#include <dl-tls.h> > + > +/* Type used to represent a TLS descriptor in the GOT. */ > +struct tlsdesc > +{ > + ptrdiff_t (*entry) (struct tlsdesc *); > + void *arg; > +}; > + > +/* Type used as the argument in a TLS descriptor for a symbol that > + needs dynamic TLS offsets. */ > +struct tlsdesc_dynamic_arg > +{ > + tls_index tlsinfo; > + size_t gen_count; > +}; > + > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); > + > +# ifdef SHARED > +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); > +#if !defined __loongarch_soft_float > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); > +#endif > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); > +#endif > + > +#endif > diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h > index 4d8737ee7f..9b1773634c 100644 > --- a/sysdeps/loongarch/linkmap.h > +++ b/sysdeps/loongarch/linkmap.h > @@ -19,4 +19,5 @@ > struct link_map_machine > { > ElfW (Addr) plt; /* Address of .plt. */ > + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ > }; > diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h > index 51521a7eb4..23c1d12914 100644 > --- a/sysdeps/loongarch/sys/asm.h > +++ b/sysdeps/loongarch/sys/asm.h > @@ -25,6 +25,7 @@ > /* Macros to handle different pointer/register sizes for 32/64-bit code. */ > #define SZREG 8 > #define SZFREG 8 > +#define SZFCSREG 4 > #define SZVREG 16 > #define SZXREG 32 > #define REG_L ld.d > diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h > index f61ee25b25..80ce3e9c00 100644 > --- a/sysdeps/loongarch/sys/regdef.h > +++ b/sysdeps/loongarch/sys/regdef.h > @@ -97,6 +97,7 @@ > #define fcc5 $fcc5 > #define fcc6 $fcc6 > #define fcc7 $fcc7 > +#define fcsr0 $fcsr0 > > #define vr0 $vr0 > #define vr1 $vr1 > diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c > new file mode 100644 > index 0000000000..a357e7619f > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.c > @@ -0,0 +1,39 @@ > +/* Manage TLS descriptors. AArch64 version. > + > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <ldsodefs.h> > +#include <tls.h> > +#include <dl-tlsdesc.h> > +#include <dl-unmap-segments.h> > +#include <tlsdeschtab.h> > + > +/* Unmap the dynamic object, but also release its TLS descriptor table > + if there is one. */ > + > +void > +_dl_unmap (struct link_map *map) > +{ > + _dl_unmap_segments (map); > + > +#ifdef SHARED > + if (map->l_mach.tlsdesc_table) > + htab_delete (map->l_mach.tlsdesc_table); > +#endif > +} > diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym > new file mode 100644 > index 0000000000..bcab218631 > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.sym > @@ -0,0 +1,19 @@ > +#include <stddef.h> > +#include <sysdep.h> > +#include <tls.h> > +#include <link.h> > +#include <dl-tlsdesc.h> > + > +-- > + > +-- Abuse tls.h macros to derive offsets relative to the thread register. > + > +TLSDESC_ARG offsetof(struct tlsdesc, arg) > +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) > +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) > +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) > +TCBHEAD_DTV offsetof(tcbhead_t, dtv) > +DTV_COUNTER offsetof(dtv_t, counter) > +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED > +TLS_DTV_OFFSET TLS_DTV_OFFSET > +SIZE_OF_DTV sizeof(tcbhead_t) > diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data > index 547b1c1b7f..ec32e6d13f 100644 > --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data > +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data > @@ -5,3 +5,5 @@ libc.so: calloc > libc.so: free > libc.so: malloc > libc.so: realloc > +# The dynamic loader needs __tls_get_addr for TLS. > +ld.so: __tls_get_addr
On Wed, Feb 28, 2024 at 5:44 PM mengqinggang <mengqinggang@loongson.cn> wrote: > > This is mostly based on AArch64 and RISC-V implementation. > > Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. > > For _dl_tlsdesc_dynamic function slow path, temporarily save and restore > all vector registers. > --- > Changes v1 -> v2: > - Fix vr24-vr31, xr24-xr31 typo. > - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. > - Save and restore fcsr0 in _dl_tlsdesc_dynamic. > > v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html > > elf/elf.h | 2 + > sysdeps/loongarch/Makefile | 6 + > sysdeps/loongarch/dl-link.sym | 1 + > sysdeps/loongarch/dl-machine.h | 60 ++- > sysdeps/loongarch/dl-tls.h | 9 +- > sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++ > sysdeps/loongarch/dl-tlsdesc.S | 93 +++++ > sysdeps/loongarch/dl-tlsdesc.h | 53 +++ > sysdeps/loongarch/linkmap.h | 1 + > sysdeps/loongarch/sys/asm.h | 1 + > sysdeps/loongarch/sys/regdef.h | 1 + > sysdeps/loongarch/tlsdesc.c | 39 ++ > sysdeps/loongarch/tlsdesc.sym | 19 + > .../unix/sysv/linux/loongarch/localplt.data | 2 + > 14 files changed, 625 insertions(+), 3 deletions(-) > create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h > create mode 100644 sysdeps/loongarch/dl-tlsdesc.S > create mode 100644 sysdeps/loongarch/dl-tlsdesc.h > create mode 100644 sysdeps/loongarch/tlsdesc.c > create mode 100644 sysdeps/loongarch/tlsdesc.sym > > diff --git a/elf/elf.h b/elf/elf.h > index f2206e5c06..eec24ea049 100644 > --- a/elf/elf.h > +++ b/elf/elf.h > @@ -4237,6 +4237,8 @@ enum > #define R_LARCH_TLS_TPREL32 10 > #define R_LARCH_TLS_TPREL64 11 > #define R_LARCH_IRELATIVE 12 > +#define R_LARCH_TLS_DESC32 13 > +#define R_LARCH_TLS_DESC64 14 > > /* Reserved for future relocs that the dynamic linker must understand. */ > > diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile > index 43d2f583cd..181389e787 100644 > --- a/sysdeps/loongarch/Makefile > +++ b/sysdeps/loongarch/Makefile > @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h > endif > > ifeq ($(subdir),elf) > +sysdep-dl-routines += tlsdesc dl-tlsdesc > gen-as-const-headers += dl-link.sym > endif > > +ifeq ($(subdir),csu) > +gen-as-const-headers += tlsdesc.sym > +endif > + > + > # LoongArch's assembler also needs to know about PIC as it changes the > # definition of some assembler macros. > ASFLAGS-.os += $(pic-ccflag) > diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym > index b534968e30..fd81ef37d5 100644 > --- a/sysdeps/loongarch/dl-link.sym > +++ b/sysdeps/loongarch/dl-link.sym > @@ -1,6 +1,7 @@ > #include <stddef.h> > #include <sysdep.h> > #include <link.h> > +#include <dl-tlsdesc.h> > > DL_SIZEOF_RG sizeof(struct La_loongarch_regs) > DL_SIZEOF_RV sizeof(struct La_loongarch_retval) > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index ab81b82d95..8ca6c224f6 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -25,7 +25,7 @@ > #include <entry.h> > #include <elf/elf.h> > #include <sys/asm.h> > -#include <dl-tls.h> > +#include <dl-tlsdesc.h> > #include <dl-static-tls.h> > #include <dl-machine-rel.h> > > @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], > *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; > break; > > + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: > + { > + struct tlsdesc volatile *td = > + (struct tlsdesc volatile *)addr_field; > + if (! sym) > + { > + td->arg = (void*)reloc->r_addend; > + td->entry = _dl_tlsdesc_undefweak; > + } > + else > + { > +# ifndef SHARED > + CHECK_STATIC_TLS (map, sym_map); > +# else > + if (!TRY_STATIC_TLS (map, sym_map)) > + { > + td->arg = _dl_make_tlsdesc_dynamic > + (sym_map, sym->st_value + reloc->r_addend); > +# if !defined __loongarch_soft_float > + if (SUPPORT_LASX) > + td->entry = _dl_tlsdesc_dynamic_lasx; > + else > + if (SUPPORT_LSX) > + td->entry = _dl_tlsdesc_dynamic_lsx; > + else > +# endif > + td->entry = _dl_tlsdesc_dynamic; > + } > + else > +# endif > + { > + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) > + + reloc->r_addend); > + td->entry = _dl_tlsdesc_return; > + } > + } > + break; > + } > + > case R_LARCH_COPY: > { > if (sym == NULL) > @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], > else > *reloc_addr = map->l_mach.plt; > } > + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) > + { > + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); > + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); > + const ElfW (Sym) *sym = &symtab[symndx]; > + const struct r_found_version *version = NULL; > + > + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) > + { > + const ElfW (Half) *vernum = > + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); > + version = &map->l_versions[vernum[symndx] & 0x7fff]; > + } > + > + /* Always initialize TLS descriptors completely, because lazy > + initialization requires synchronization at every TLS access. */ > + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, > + skip_ifunc); > + } > else > _dl_reloc_bad_type (map, r_type, 1); > } > diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h > index 29924b866d..de593c002d 100644 > --- a/sysdeps/loongarch/dl-tls.h > +++ b/sysdeps/loongarch/dl-tls.h > @@ -16,6 +16,9 @@ > License along with the GNU C Library. If not, see > <https://www.gnu.org/licenses/>. */ > > +#ifndef _DL_TLS_H > +#define _DL_TLS_H > + > /* Type used for the representation of TLS information in the GOT. */ > typedef struct > { > @@ -23,6 +26,8 @@ typedef struct > unsigned long int ti_offset; > } tls_index; > > +extern void *__tls_get_addr (tls_index *ti); > + > /* The thread pointer points to the first static TLS block. */ > #define TLS_TP_OFFSET 0 > > @@ -37,10 +42,10 @@ typedef struct > /* Compute the value for a DTPREL reloc. */ > #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) > > -extern void *__tls_get_addr (tls_index *ti); > - > #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) > #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) > > /* Value used for dtv entries for which the allocation is delayed. */ > #define TLS_DTV_UNALLOCATED ((void *) -1l) > + > +#endif > diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > new file mode 100644 > index 0000000000..0d8c9bb991 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > @@ -0,0 +1,341 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifdef USE_LASX > +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) > +#elif defined USE_LSX > +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) > +#elif !defined __loongarch_soft_float > +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) > +#else > +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) > +#endif > + > +#ifdef SHARED > + /* Handler for dynamic TLS symbols. > + Prototype: > + _dl_tlsdesc_dynamic (tlsdesc *) ; > + > + The second word of the descriptor points to a > + tlsdesc_dynamic_arg structure. > + > + Returns the offset between the thread pointer and the > + object referenced by the argument. > + > + ptrdiff_t > + __attribute__ ((__regparm__ (1))) > + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > + { > + struct tlsdesc_dynamic_arg *td = tdp->arg; > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); > + if (__builtin_expect (td->gen_count <= dtv[0].counter > + && (dtv[td->tlsinfo.ti_module].pointer.val > + != TLS_DTV_UNALLOCATED), > + 1)) > + return dtv[td->tlsinfo.ti_module].pointer.val > + + td->tlsinfo.ti_offset > + - __thread_pointer; > + > + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > + } > + */ > + .hidden _dl_tlsdesc_dynamic > + .global _dl_tlsdesc_dynamic > + .type _dl_tlsdesc_dynamic,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_dynamic: > + /* Save just enough registers to support fast path, if we fall > + into slow path we will save additional registers. */ > + ADDI sp, sp,-24 > + REG_S t0, sp, 0 > + REG_S t1, sp, 8 > + REG_S t2, sp, 16 > + > + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start > + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg > + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count > + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter > + bltu t2, t1, Lslow > + > + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module > + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ > + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) > + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val > + li.d t2, TLS_DTV_UNALLOCATED > + beq t1, t2, Lslow > + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset > + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset > + add.d a0, t1, t2 > +Lret: > + sub.d a0, a0, tp > + REG_L t0, sp, 0 > + REG_L t1, sp, 8 > + REG_L t2, sp, 16 > + ADDI sp, sp, 24 > + RET > + > +Lslow: > + /* This is the slow path. We need to call __tls_get_addr() which > + means we need to save and restore all the register that the > + callee will trash. */ > + > + /* Save the remaining registers that we must treat as caller save. */ > + ADDI sp, sp, -FRAME_SIZE > + REG_S ra, sp, 0 * SZREG > + REG_S a1, sp, 1 * SZREG > + REG_S a2, sp, 2 * SZREG > + REG_S a3, sp, 3 * SZREG > + REG_S a4, sp, 4 * SZREG > + REG_S a5, sp, 5 * SZREG > + REG_S a6, sp, 6 * SZREG > + REG_S a7, sp, 7 * SZREG > + REG_S t4, sp, 8 * SZREG > + REG_S t5, sp, 9 * SZREG > + REG_S t6, sp, 10 * SZREG > + REG_S t7, sp, 11 * SZREG > + REG_S t8, sp, 12 * SZREG > + > +#ifdef USE_LASX > + xvst xr0, sp, 13*SZREG + 0*SZXREG > + xvst xr1, sp, 13*SZREG + 1*SZXREG > + xvst xr2, sp, 13*SZREG + 2*SZXREG > + xvst xr3, sp, 13*SZREG + 3*SZXREG > + xvst xr4, sp, 13*SZREG + 4*SZXREG > + xvst xr5, sp, 13*SZREG + 5*SZXREG > + xvst xr6, sp, 13*SZREG + 6*SZXREG > + xvst xr7, sp, 13*SZREG + 7*SZXREG > + xvst xr8, sp, 13*SZREG + 8*SZXREG > + xvst xr9, sp, 13*SZREG + 9*SZXREG > + xvst xr10, sp, 13*SZREG + 10*SZXREG > + xvst xr11, sp, 13*SZREG + 11*SZXREG > + xvst xr12, sp, 13*SZREG + 12*SZXREG > + xvst xr13, sp, 13*SZREG + 13*SZXREG > + xvst xr14, sp, 13*SZREG + 14*SZXREG > + xvst xr15, sp, 13*SZREG + 15*SZXREG > + xvst xr16, sp, 13*SZREG + 16*SZXREG > + xvst xr17, sp, 13*SZREG + 17*SZXREG > + xvst xr18, sp, 13*SZREG + 18*SZXREG > + xvst xr19, sp, 13*SZREG + 19*SZXREG > + xvst xr20, sp, 13*SZREG + 20*SZXREG > + xvst xr21, sp, 13*SZREG + 21*SZXREG > + xvst xr22, sp, 13*SZREG + 22*SZXREG > + xvst xr23, sp, 13*SZREG + 23*SZXREG > + xvst xr24, sp, 13*SZREG + 24*SZXREG > + xvst xr25, sp, 13*SZREG + 25*SZXREG > + xvst xr26, sp, 13*SZREG + 26*SZXREG > + xvst xr27, sp, 13*SZREG + 27*SZXREG > + xvst xr28, sp, 13*SZREG + 28*SZXREG > + xvst xr29, sp, 13*SZREG + 29*SZXREG > + xvst xr30, sp, 13*SZREG + 30*SZXREG > + xvst xr31, sp, 13*SZREG + 31*SZXREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 32*SZXREG > +#elif defined USE_LSX > + vst vr0, sp, 13*SZREG + 0*SZVREG > + vst vr1, sp, 13*SZREG + 1*SZVREG > + vst vr2, sp, 13*SZREG + 2*SZVREG > + vst vr3, sp, 13*SZREG + 3*SZVREG > + vst vr4, sp, 13*SZREG + 4*SZVREG > + vst vr5, sp, 13*SZREG + 5*SZVREG > + vst vr6, sp, 13*SZREG + 6*SZVREG > + vst vr7, sp, 13*SZREG + 7*SZVREG > + vst vr8, sp, 13*SZREG + 8*SZVREG > + vst vr9, sp, 13*SZREG + 9*SZVREG > + vst vr10, sp, 13*SZREG + 10*SZVREG > + vst vr11, sp, 13*SZREG + 11*SZVREG > + vst vr12, sp, 13*SZREG + 12*SZVREG > + vst vr13, sp, 13*SZREG + 13*SZVREG > + vst vr14, sp, 13*SZREG + 14*SZVREG > + vst vr15, sp, 13*SZREG + 15*SZVREG > + vst vr16, sp, 13*SZREG + 16*SZVREG > + vst vr17, sp, 13*SZREG + 17*SZVREG > + vst vr18, sp, 13*SZREG + 18*SZVREG > + vst vr19, sp, 13*SZREG + 19*SZVREG > + vst vr20, sp, 13*SZREG + 20*SZVREG > + vst vr21, sp, 13*SZREG + 21*SZVREG > + vst vr22, sp, 13*SZREG + 22*SZVREG > + vst vr23, sp, 13*SZREG + 23*SZVREG > + vst vr24, sp, 13*SZREG + 24*SZVREG > + vst vr25, sp, 13*SZREG + 25*SZVREG > + vst vr26, sp, 13*SZREG + 26*SZVREG > + vst vr27, sp, 13*SZREG + 27*SZVREG > + vst vr28, sp, 13*SZREG + 28*SZVREG > + vst vr29, sp, 13*SZREG + 29*SZVREG > + vst vr30, sp, 13*SZREG + 30*SZVREG > + vst vr31, sp, 13*SZREG + 31*SZVREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 32*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_S fa0, sp, 13*SZREG + 0*SZFREG > + FREG_S fa1, sp, 13*SZREG + 1*SZFREG > + FREG_S fa2, sp, 13*SZREG + 2*SZFREG > + FREG_S fa3, sp, 13*SZREG + 3*SZFREG > + FREG_S fa4, sp, 13*SZREG + 4*SZFREG > + FREG_S fa5, sp, 13*SZREG + 5*SZFREG > + FREG_S fa6, sp, 13*SZREG + 6*SZFREG > + FREG_S fa7, sp, 13*SZREG + 7*SZFREG > + FREG_S ft0, sp, 13*SZREG + 8*SZFREG > + FREG_S ft1, sp, 13*SZREG + 9*SZFREG > + FREG_S ft2, sp, 13*SZREG + 10*SZFREG > + FREG_S ft3, sp, 13*SZREG + 11*SZFREG > + FREG_S ft4, sp, 13*SZREG + 12*SZFREG > + FREG_S ft5, sp, 13*SZREG + 13*SZFREG > + FREG_S ft6, sp, 13*SZREG + 14*SZFREG > + FREG_S ft7, sp, 13*SZREG + 15*SZFREG > + FREG_S ft8, sp, 13*SZREG + 16*SZFREG > + FREG_S ft9, sp, 13*SZREG + 17*SZFREG > + FREG_S ft10, sp, 13*SZREG + 18*SZFREG > + FREG_S ft11, sp, 13*SZREG + 19*SZFREG > + FREG_S ft12, sp, 13*SZREG + 20*SZFREG > + FREG_S ft13, sp, 13*SZREG + 21*SZFREG > + FREG_S ft14, sp, 13*SZREG + 22*SZFREG > + FREG_S ft15, sp, 13*SZREG + 23*SZFREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 24*SZFREG > +#endif /* #ifdef USE_LASX */ > + > + bl __tls_get_addr > + ADDI a0, a0, -TLS_DTV_OFFSET > + > + REG_L ra, sp, 0 > + REG_L a1, sp, 1 * 8 > + REG_L a2, sp, 2 * 8 > + REG_L a3, sp, 3 * 8 > + REG_L a4, sp, 4 * 8 > + REG_L a5, sp, 5 * 8 > + REG_L a6, sp, 6 * 8 > + REG_L a7, sp, 7 * 8 > + REG_L t4, sp, 8 * 8 > + REG_L t5, sp, 9 * 8 > + REG_L t6, sp, 10 * 8 > + REG_L t7, sp, 11 * 8 > + REG_L t8, sp, 12 * 8 > + > +#ifdef USE_LASX > + xvld xr0, sp, 13*SZREG + 0*SZXREG > + xvld xr1, sp, 13*SZREG + 1*SZXREG > + xvld xr2, sp, 13*SZREG + 2*SZXREG > + xvld xr3, sp, 13*SZREG + 3*SZXREG > + xvld xr4, sp, 13*SZREG + 4*SZXREG > + xvld xr5, sp, 13*SZREG + 5*SZXREG > + xvld xr6, sp, 13*SZREG + 6*SZXREG > + xvld xr7, sp, 13*SZREG + 7*SZXREG > + xvld xr8, sp, 13*SZREG + 8*SZXREG > + xvld xr9, sp, 13*SZREG + 9*SZXREG > + xvld xr10, sp, 13*SZREG + 10*SZXREG > + xvld xr11, sp, 13*SZREG + 11*SZXREG > + xvld xr12, sp, 13*SZREG + 12*SZXREG > + xvld xr13, sp, 13*SZREG + 13*SZXREG > + xvld xr14, sp, 13*SZREG + 14*SZXREG > + xvld xr15, sp, 13*SZREG + 15*SZXREG > + xvld xr16, sp, 13*SZREG + 16*SZXREG > + xvld xr17, sp, 13*SZREG + 17*SZXREG > + xvld xr18, sp, 13*SZREG + 18*SZXREG > + xvld xr19, sp, 13*SZREG + 19*SZXREG > + xvld xr20, sp, 13*SZREG + 20*SZXREG > + xvld xr21, sp, 13*SZREG + 21*SZXREG > + xvld xr22, sp, 13*SZREG + 22*SZXREG > + xvld xr23, sp, 13*SZREG + 23*SZXREG > + xvld xr24, sp, 13*SZREG + 24*SZXREG > + xvld xr25, sp, 13*SZREG + 25*SZXREG > + xvld xr26, sp, 13*SZREG + 26*SZXREG > + xvld xr27, sp, 13*SZREG + 27*SZXREG > + xvld xr28, sp, 13*SZREG + 28*SZXREG > + xvld xr29, sp, 13*SZREG + 29*SZXREG > + xvld xr30, sp, 13*SZREG + 30*SZXREG > + xvld xr31, sp, 13*SZREG + 31*SZXREG > + REG_L t0, sp, 32*SZXREG > + movgr2fcsr fcsr0, t0 > +#elif defined USE_LSX > + vld vr0, sp, 13*SZREG + 0*SZVREG > + vld vr1, sp, 13*SZREG + 1*SZVREG > + vld vr2, sp, 13*SZREG + 2*SZVREG > + vld vr3, sp, 13*SZREG + 3*SZVREG > + vld vr4, sp, 13*SZREG + 4*SZVREG > + vld vr5, sp, 13*SZREG + 5*SZVREG > + vld vr6, sp, 13*SZREG + 6*SZVREG > + vld vr7, sp, 13*SZREG + 7*SZVREG > + vld vr8, sp, 13*SZREG + 8*SZVREG > + vld vr9, sp, 13*SZREG + 9*SZVREG > + vld vr10, sp, 13*SZREG + 10*SZVREG > + vld vr11, sp, 13*SZREG + 11*SZVREG > + vld vr12, sp, 13*SZREG + 12*SZVREG > + vld vr13, sp, 13*SZREG + 13*SZVREG > + vld vr14, sp, 13*SZREG + 14*SZVREG > + vld vr15, sp, 13*SZREG + 15*SZVREG > + vld vr16, sp, 13*SZREG + 16*SZVREG > + vld vr17, sp, 13*SZREG + 17*SZVREG > + vld vr18, sp, 13*SZREG + 18*SZVREG > + vld vr19, sp, 13*SZREG + 19*SZVREG > + vld vr20, sp, 13*SZREG + 20*SZVREG > + vld vr21, sp, 13*SZREG + 21*SZVREG > + vld vr22, sp, 13*SZREG + 22*SZVREG > + vld vr23, sp, 13*SZREG + 23*SZVREG > + vld vr24, sp, 13*SZREG + 24*SZVREG > + vld vr25, sp, 13*SZREG + 25*SZVREG > + vld vr26, sp, 13*SZREG + 26*SZVREG > + vld vr27, sp, 13*SZREG + 27*SZVREG > + vld vr28, sp, 13*SZREG + 28*SZVREG > + vld vr29, sp, 13*SZREG + 29*SZVREG > + vld vr30, sp, 13*SZREG + 30*SZVREG > + vld vr31, sp, 13*SZREG + 31*SZVREG > + REG_L t0, sp, 32*SZVREG > + movgr2fcsr fcsr0, t0 > +#elif !defined __loongarch_soft_float > + FREG_L fa0, sp, 13*SZREG + 0*SZFREG > + FREG_L fa1, sp, 13*SZREG + 1*SZFREG > + FREG_L fa2, sp, 13*SZREG + 2*SZFREG > + FREG_L fa3, sp, 13*SZREG + 3*SZFREG > + FREG_L fa4, sp, 13*SZREG + 4*SZFREG > + FREG_L fa5, sp, 13*SZREG + 5*SZFREG > + FREG_L fa6, sp, 13*SZREG + 6*SZFREG > + FREG_L fa7, sp, 13*SZREG + 7*SZFREG > + FREG_L ft0, sp, 13*SZREG + 8*SZFREG > + FREG_L ft1, sp, 13*SZREG + 9*SZFREG > + FREG_L ft2, sp, 13*SZREG + 10*SZFREG > + FREG_L ft3, sp, 13*SZREG + 11*SZFREG > + FREG_L ft4, sp, 13*SZREG + 12*SZFREG > + FREG_L ft5, sp, 13*SZREG + 13*SZFREG > + FREG_L ft6, sp, 13*SZREG + 14*SZFREG > + FREG_L ft7, sp, 13*SZREG + 15*SZFREG > + FREG_L ft8, sp, 13*SZREG + 16*SZFREG > + FREG_L ft9, sp, 13*SZREG + 17*SZFREG > + FREG_L ft10, sp, 13*SZREG + 18*SZFREG > + FREG_L ft11, sp, 13*SZREG + 19*SZFREG > + FREG_L ft12, sp, 13*SZREG + 20*SZFREG > + FREG_L ft13, sp, 13*SZREG + 21*SZFREG > + FREG_L ft14, sp, 13*SZREG + 22*SZFREG > + FREG_L ft15, sp, 13*SZREG + 23*SZFREG > + REG_L t0, sp, 24*SZFREG > + movgr2fcsr fcsr0, t0 > +#endif /* #ifdef USE_LASX */ > + > + ADDI sp, sp, FRAME_SIZE > + b Lret > + cfi_endproc > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > +#endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S > new file mode 100644 > index 0000000000..4a17079169 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.S > @@ -0,0 +1,93 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include <tls.h> > +#include "tlsdesc.h" > + > + .text > + > + /* Compute the thread pointer offset for symbols in the static > + TLS block. The offset is the same for all threads. > + Prototype: > + _dl_tlsdesc_return (tlsdesc *); */ > + .hidden _dl_tlsdesc_return > + .global _dl_tlsdesc_return > + .type _dl_tlsdesc_return,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_return: > + REG_L a0, a0, 8 > + RET > + cfi_endproc > + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return > + > + /* Handler for undefined weak TLS symbols. > + Prototype: > + _dl_tlsdesc_undefweak (tlsdesc *); > + > + The second word of the descriptor contains the addend. > + Return the addend minus the thread pointer. This ensures > + that when the caller adds on the thread pointer it gets back > + the addend. */ > + .hidden _dl_tlsdesc_undefweak > + .global _dl_tlsdesc_undefweak > + .type _dl_tlsdesc_undefweak,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_undefweak: > + REG_L a0, a0, 8 > + sub.d a0, a0, tp > + RET > + cfi_endproc > + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak > + > + > +#ifdef SHARED > + > +#if !defined __loongarch_soft_float > + > +#define USE_LASX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx > +#define Lret Lret_lasx > +#define Lslow Lslow_lasx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LASX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > + > +#define USE_LSX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx > +#define Lret Lret_lsx > +#define Lslow Lslow_lsx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LSX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > + > +#endif > + > +#include "dl-tlsdesc-dynamic.h" > + > +#endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h > new file mode 100644 > index 0000000000..988037a714 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.h > @@ -0,0 +1,53 @@ > +/* Thread-local storage descriptor handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _DL_TLSDESC_H > +#define _DL_TLSDESC_H > + > +#include <dl-tls.h> > + > +/* Type used to represent a TLS descriptor in the GOT. */ > +struct tlsdesc > +{ > + ptrdiff_t (*entry) (struct tlsdesc *); > + void *arg; > +}; > + > +/* Type used as the argument in a TLS descriptor for a symbol that > + needs dynamic TLS offsets. */ > +struct tlsdesc_dynamic_arg > +{ > + tls_index tlsinfo; > + size_t gen_count; > +}; > + > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); > + > +# ifdef SHARED > +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); > +#if !defined __loongarch_soft_float > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); > +#endif > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); > +#endif > + > +#endif > diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h > index 4d8737ee7f..9b1773634c 100644 > --- a/sysdeps/loongarch/linkmap.h > +++ b/sysdeps/loongarch/linkmap.h > @@ -19,4 +19,5 @@ > struct link_map_machine > { > ElfW (Addr) plt; /* Address of .plt. */ > + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ > }; > diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h > index 51521a7eb4..23c1d12914 100644 > --- a/sysdeps/loongarch/sys/asm.h > +++ b/sysdeps/loongarch/sys/asm.h > @@ -25,6 +25,7 @@ > /* Macros to handle different pointer/register sizes for 32/64-bit code. */ > #define SZREG 8 > #define SZFREG 8 > +#define SZFCSREG 4 > #define SZVREG 16 > #define SZXREG 32 > #define REG_L ld.d > diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h > index f61ee25b25..80ce3e9c00 100644 > --- a/sysdeps/loongarch/sys/regdef.h > +++ b/sysdeps/loongarch/sys/regdef.h > @@ -97,6 +97,7 @@ > #define fcc5 $fcc5 > #define fcc6 $fcc6 > #define fcc7 $fcc7 > +#define fcsr0 $fcsr0 > > #define vr0 $vr0 > #define vr1 $vr1 > diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c > new file mode 100644 > index 0000000000..a357e7619f > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.c > @@ -0,0 +1,39 @@ > +/* Manage TLS descriptors. AArch64 version. Change it. > + >
On 28/02/24 22:43, mengqinggang wrote: > This is mostly based on AArch64 and RISC-V implementation. > > Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. > > For _dl_tlsdesc_dynamic function slow path, temporarily save and restore > all vector registers. > --- > Changes v1 -> v2: > - Fix vr24-vr31, xr24-xr31 typo. > - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. > - Save and restore fcsr0 in _dl_tlsdesc_dynamic. > > v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html Patch looks ok, some comments below. > > elf/elf.h | 2 + > sysdeps/loongarch/Makefile | 6 + > sysdeps/loongarch/dl-link.sym | 1 + > sysdeps/loongarch/dl-machine.h | 60 ++- > sysdeps/loongarch/dl-tls.h | 9 +- > sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++ > sysdeps/loongarch/dl-tlsdesc.S | 93 +++++ > sysdeps/loongarch/dl-tlsdesc.h | 53 +++ > sysdeps/loongarch/linkmap.h | 1 + > sysdeps/loongarch/sys/asm.h | 1 + > sysdeps/loongarch/sys/regdef.h | 1 + > sysdeps/loongarch/tlsdesc.c | 39 ++ > sysdeps/loongarch/tlsdesc.sym | 19 + > .../unix/sysv/linux/loongarch/localplt.data | 2 + > 14 files changed, 625 insertions(+), 3 deletions(-) > create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h > create mode 100644 sysdeps/loongarch/dl-tlsdesc.S > create mode 100644 sysdeps/loongarch/dl-tlsdesc.h > create mode 100644 sysdeps/loongarch/tlsdesc.c > create mode 100644 sysdeps/loongarch/tlsdesc.sym > > diff --git a/elf/elf.h b/elf/elf.h > index f2206e5c06..eec24ea049 100644 > --- a/elf/elf.h > +++ b/elf/elf.h > @@ -4237,6 +4237,8 @@ enum > #define R_LARCH_TLS_TPREL32 10 > #define R_LARCH_TLS_TPREL64 11 > #define R_LARCH_IRELATIVE 12 > +#define R_LARCH_TLS_DESC32 13 > +#define R_LARCH_TLS_DESC64 14 > > /* Reserved for future relocs that the dynamic linker must understand. */ > > diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile > index 43d2f583cd..181389e787 100644 > --- a/sysdeps/loongarch/Makefile > +++ b/sysdeps/loongarch/Makefile > @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h > endif > > ifeq ($(subdir),elf) > +sysdep-dl-routines += tlsdesc dl-tlsdesc > gen-as-const-headers += dl-link.sym > endif > > +ifeq ($(subdir),csu) > +gen-as-const-headers += tlsdesc.sym > +endif > + > + > # LoongArch's assembler also needs to know about PIC as it changes the > # definition of some assembler macros. > ASFLAGS-.os += $(pic-ccflag) > diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym > index b534968e30..fd81ef37d5 100644 > --- a/sysdeps/loongarch/dl-link.sym > +++ b/sysdeps/loongarch/dl-link.sym > @@ -1,6 +1,7 @@ > #include <stddef.h> > #include <sysdep.h> > #include <link.h> > +#include <dl-tlsdesc.h> > > DL_SIZEOF_RG sizeof(struct La_loongarch_regs) > DL_SIZEOF_RV sizeof(struct La_loongarch_retval) > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index ab81b82d95..8ca6c224f6 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -25,7 +25,7 @@ > #include <entry.h> > #include <elf/elf.h> > #include <sys/asm.h> > -#include <dl-tls.h> > +#include <dl-tlsdesc.h> > #include <dl-static-tls.h> > #include <dl-machine-rel.h> > > @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], > *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; > break; > > + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: > + { > + struct tlsdesc volatile *td = > + (struct tlsdesc volatile *)addr_field; > + if (! sym) > + { > + td->arg = (void*)reloc->r_addend; > + td->entry = _dl_tlsdesc_undefweak; > + } > + else > + { > +# ifndef SHARED > + CHECK_STATIC_TLS (map, sym_map); > +# else > + if (!TRY_STATIC_TLS (map, sym_map)) > + { > + td->arg = _dl_make_tlsdesc_dynamic > + (sym_map, sym->st_value + reloc->r_addend); > +# if !defined __loongarch_soft_float > + if (SUPPORT_LASX) > + td->entry = _dl_tlsdesc_dynamic_lasx; > + else > + if (SUPPORT_LSX) > + td->entry = _dl_tlsdesc_dynamic_lsx; > + else > +# endif > + td->entry = _dl_tlsdesc_dynamic; > + } > + else > +# endif > + { > + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) > + + reloc->r_addend); > + td->entry = _dl_tlsdesc_return; > + } > + } > + break; > + } > + > case R_LARCH_COPY: > { > if (sym == NULL) > @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], > else > *reloc_addr = map->l_mach.plt; > } > + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) Use __glibc_likely here. > + { > + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); > + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); > + const ElfW (Sym) *sym = &symtab[symndx]; > + const struct r_found_version *version = NULL; > + > + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) > + { > + const ElfW (Half) *vernum = > + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); > + version = &map->l_versions[vernum[symndx] & 0x7fff]; > + } > + > + /* Always initialize TLS descriptors completely, because lazy > + initialization requires synchronization at every TLS access. */ > + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, > + skip_ifunc); > + } > else > _dl_reloc_bad_type (map, r_type, 1); > } > diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h > index 29924b866d..de593c002d 100644 > --- a/sysdeps/loongarch/dl-tls.h > +++ b/sysdeps/loongarch/dl-tls.h > @@ -16,6 +16,9 @@ > License along with the GNU C Library. If not, see > <https://www.gnu.org/licenses/>. */ > > +#ifndef _DL_TLS_H > +#define _DL_TLS_H > + > /* Type used for the representation of TLS information in the GOT. */ > typedef struct > { > @@ -23,6 +26,8 @@ typedef struct > unsigned long int ti_offset; > } tls_index; > > +extern void *__tls_get_addr (tls_index *ti); > + > /* The thread pointer points to the first static TLS block. */ > #define TLS_TP_OFFSET 0 > > @@ -37,10 +42,10 @@ typedef struct > /* Compute the value for a DTPREL reloc. */ > #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) > > -extern void *__tls_get_addr (tls_index *ti); > - Why move the function prototype? > #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) > #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) > > /* Value used for dtv entries for which the allocation is delayed. */ > #define TLS_DTV_UNALLOCATED ((void *) -1l) > + > +#endif > diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > new file mode 100644 > index 0000000000..0d8c9bb991 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h > @@ -0,0 +1,341 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. Update Copyright years to 2024. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifdef USE_LASX > +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) > +#elif defined USE_LSX > +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) > +#elif !defined __loongarch_soft_float > +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) > +#else > +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) > +#endif I don't have a strong opinion, but another option that might be simpler it to provide only only one _dl_tlsdesc_dynamic implementation and check the required save/restore of vector register based on hwcap value. > + > +#ifdef SHARED > + /* Handler for dynamic TLS symbols. > + Prototype: > + _dl_tlsdesc_dynamic (tlsdesc *) ; > + > + The second word of the descriptor points to a > + tlsdesc_dynamic_arg structure. > + > + Returns the offset between the thread pointer and the > + object referenced by the argument. > + > + ptrdiff_t > + __attribute__ ((__regparm__ (1))) Does this attribute really make sense for loongarch? > + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > + { > + struct tlsdesc_dynamic_arg *td = tdp->arg; > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); > + if (__builtin_expect (td->gen_count <= dtv[0].counter Use __glibc_unlikely or just remove the __builtin_expect for clarity. > + && (dtv[td->tlsinfo.ti_module].pointer.val > + != TLS_DTV_UNALLOCATED), > + 1)) > + return dtv[td->tlsinfo.ti_module].pointer.val > + + td->tlsinfo.ti_offset > + - __thread_pointer; > + > + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > + } > + */ > + .hidden _dl_tlsdesc_dynamic > + .global _dl_tlsdesc_dynamic > + .type _dl_tlsdesc_dynamic,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_dynamic: > + /* Save just enough registers to support fast path, if we fall > + into slow path we will save additional registers. */ > + ADDI sp, sp,-24 > + REG_S t0, sp, 0 > + REG_S t1, sp, 8 > + REG_S t2, sp, 16 > + > + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start > + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg > + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count > + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter > + bltu t2, t1, Lslow > + > + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module > + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ > + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) > + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val > + li.d t2, TLS_DTV_UNALLOCATED > + beq t1, t2, Lslow > + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset > + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset > + add.d a0, t1, t2 > +Lret: > + sub.d a0, a0, tp > + REG_L t0, sp, 0 > + REG_L t1, sp, 8 > + REG_L t2, sp, 16 > + ADDI sp, sp, 24 > + RET > + > +Lslow: > + /* This is the slow path. We need to call __tls_get_addr() which > + means we need to save and restore all the register that the > + callee will trash. */ > + > + /* Save the remaining registers that we must treat as caller save. */ > + ADDI sp, sp, -FRAME_SIZE > + REG_S ra, sp, 0 * SZREG > + REG_S a1, sp, 1 * SZREG > + REG_S a2, sp, 2 * SZREG > + REG_S a3, sp, 3 * SZREG > + REG_S a4, sp, 4 * SZREG > + REG_S a5, sp, 5 * SZREG > + REG_S a6, sp, 6 * SZREG > + REG_S a7, sp, 7 * SZREG > + REG_S t4, sp, 8 * SZREG > + REG_S t5, sp, 9 * SZREG > + REG_S t6, sp, 10 * SZREG > + REG_S t7, sp, 11 * SZREG > + REG_S t8, sp, 12 * SZREG > + > +#ifdef USE_LASX > + xvst xr0, sp, 13*SZREG + 0*SZXREG > + xvst xr1, sp, 13*SZREG + 1*SZXREG > + xvst xr2, sp, 13*SZREG + 2*SZXREG > + xvst xr3, sp, 13*SZREG + 3*SZXREG > + xvst xr4, sp, 13*SZREG + 4*SZXREG > + xvst xr5, sp, 13*SZREG + 5*SZXREG > + xvst xr6, sp, 13*SZREG + 6*SZXREG > + xvst xr7, sp, 13*SZREG + 7*SZXREG > + xvst xr8, sp, 13*SZREG + 8*SZXREG > + xvst xr9, sp, 13*SZREG + 9*SZXREG > + xvst xr10, sp, 13*SZREG + 10*SZXREG > + xvst xr11, sp, 13*SZREG + 11*SZXREG > + xvst xr12, sp, 13*SZREG + 12*SZXREG > + xvst xr13, sp, 13*SZREG + 13*SZXREG > + xvst xr14, sp, 13*SZREG + 14*SZXREG > + xvst xr15, sp, 13*SZREG + 15*SZXREG > + xvst xr16, sp, 13*SZREG + 16*SZXREG > + xvst xr17, sp, 13*SZREG + 17*SZXREG > + xvst xr18, sp, 13*SZREG + 18*SZXREG > + xvst xr19, sp, 13*SZREG + 19*SZXREG > + xvst xr20, sp, 13*SZREG + 20*SZXREG > + xvst xr21, sp, 13*SZREG + 21*SZXREG > + xvst xr22, sp, 13*SZREG + 22*SZXREG > + xvst xr23, sp, 13*SZREG + 23*SZXREG > + xvst xr24, sp, 13*SZREG + 24*SZXREG > + xvst xr25, sp, 13*SZREG + 25*SZXREG > + xvst xr26, sp, 13*SZREG + 26*SZXREG > + xvst xr27, sp, 13*SZREG + 27*SZXREG > + xvst xr28, sp, 13*SZREG + 28*SZXREG > + xvst xr29, sp, 13*SZREG + 29*SZXREG > + xvst xr30, sp, 13*SZREG + 30*SZXREG > + xvst xr31, sp, 13*SZREG + 31*SZXREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 32*SZXREG > +#elif defined USE_LSX > + vst vr0, sp, 13*SZREG + 0*SZVREG > + vst vr1, sp, 13*SZREG + 1*SZVREG > + vst vr2, sp, 13*SZREG + 2*SZVREG > + vst vr3, sp, 13*SZREG + 3*SZVREG > + vst vr4, sp, 13*SZREG + 4*SZVREG > + vst vr5, sp, 13*SZREG + 5*SZVREG > + vst vr6, sp, 13*SZREG + 6*SZVREG > + vst vr7, sp, 13*SZREG + 7*SZVREG > + vst vr8, sp, 13*SZREG + 8*SZVREG > + vst vr9, sp, 13*SZREG + 9*SZVREG > + vst vr10, sp, 13*SZREG + 10*SZVREG > + vst vr11, sp, 13*SZREG + 11*SZVREG > + vst vr12, sp, 13*SZREG + 12*SZVREG > + vst vr13, sp, 13*SZREG + 13*SZVREG > + vst vr14, sp, 13*SZREG + 14*SZVREG > + vst vr15, sp, 13*SZREG + 15*SZVREG > + vst vr16, sp, 13*SZREG + 16*SZVREG > + vst vr17, sp, 13*SZREG + 17*SZVREG > + vst vr18, sp, 13*SZREG + 18*SZVREG > + vst vr19, sp, 13*SZREG + 19*SZVREG > + vst vr20, sp, 13*SZREG + 20*SZVREG > + vst vr21, sp, 13*SZREG + 21*SZVREG > + vst vr22, sp, 13*SZREG + 22*SZVREG > + vst vr23, sp, 13*SZREG + 23*SZVREG > + vst vr24, sp, 13*SZREG + 24*SZVREG > + vst vr25, sp, 13*SZREG + 25*SZVREG > + vst vr26, sp, 13*SZREG + 26*SZVREG > + vst vr27, sp, 13*SZREG + 27*SZVREG > + vst vr28, sp, 13*SZREG + 28*SZVREG > + vst vr29, sp, 13*SZREG + 29*SZVREG > + vst vr30, sp, 13*SZREG + 30*SZVREG > + vst vr31, sp, 13*SZREG + 31*SZVREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 32*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_S fa0, sp, 13*SZREG + 0*SZFREG > + FREG_S fa1, sp, 13*SZREG + 1*SZFREG > + FREG_S fa2, sp, 13*SZREG + 2*SZFREG > + FREG_S fa3, sp, 13*SZREG + 3*SZFREG > + FREG_S fa4, sp, 13*SZREG + 4*SZFREG > + FREG_S fa5, sp, 13*SZREG + 5*SZFREG > + FREG_S fa6, sp, 13*SZREG + 6*SZFREG > + FREG_S fa7, sp, 13*SZREG + 7*SZFREG > + FREG_S ft0, sp, 13*SZREG + 8*SZFREG > + FREG_S ft1, sp, 13*SZREG + 9*SZFREG > + FREG_S ft2, sp, 13*SZREG + 10*SZFREG > + FREG_S ft3, sp, 13*SZREG + 11*SZFREG > + FREG_S ft4, sp, 13*SZREG + 12*SZFREG > + FREG_S ft5, sp, 13*SZREG + 13*SZFREG > + FREG_S ft6, sp, 13*SZREG + 14*SZFREG > + FREG_S ft7, sp, 13*SZREG + 15*SZFREG > + FREG_S ft8, sp, 13*SZREG + 16*SZFREG > + FREG_S ft9, sp, 13*SZREG + 17*SZFREG > + FREG_S ft10, sp, 13*SZREG + 18*SZFREG > + FREG_S ft11, sp, 13*SZREG + 19*SZFREG > + FREG_S ft12, sp, 13*SZREG + 20*SZFREG > + FREG_S ft13, sp, 13*SZREG + 21*SZFREG > + FREG_S ft14, sp, 13*SZREG + 22*SZFREG > + FREG_S ft15, sp, 13*SZREG + 23*SZFREG > + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of > + # some fields in fcsr0 > + movfcsr2gr t0, fcsr0 > + REG_S t0, sp, 24*SZFREG > +#endif /* #ifdef USE_LASX */ > + > + bl __tls_get_addr > + ADDI a0, a0, -TLS_DTV_OFFSET > + > + REG_L ra, sp, 0 > + REG_L a1, sp, 1 * 8 > + REG_L a2, sp, 2 * 8 > + REG_L a3, sp, 3 * 8 > + REG_L a4, sp, 4 * 8 > + REG_L a5, sp, 5 * 8 > + REG_L a6, sp, 6 * 8 > + REG_L a7, sp, 7 * 8 > + REG_L t4, sp, 8 * 8 > + REG_L t5, sp, 9 * 8 > + REG_L t6, sp, 10 * 8 > + REG_L t7, sp, 11 * 8 > + REG_L t8, sp, 12 * 8 > + > +#ifdef USE_LASX > + xvld xr0, sp, 13*SZREG + 0*SZXREG > + xvld xr1, sp, 13*SZREG + 1*SZXREG > + xvld xr2, sp, 13*SZREG + 2*SZXREG > + xvld xr3, sp, 13*SZREG + 3*SZXREG > + xvld xr4, sp, 13*SZREG + 4*SZXREG > + xvld xr5, sp, 13*SZREG + 5*SZXREG > + xvld xr6, sp, 13*SZREG + 6*SZXREG > + xvld xr7, sp, 13*SZREG + 7*SZXREG > + xvld xr8, sp, 13*SZREG + 8*SZXREG > + xvld xr9, sp, 13*SZREG + 9*SZXREG > + xvld xr10, sp, 13*SZREG + 10*SZXREG > + xvld xr11, sp, 13*SZREG + 11*SZXREG > + xvld xr12, sp, 13*SZREG + 12*SZXREG > + xvld xr13, sp, 13*SZREG + 13*SZXREG > + xvld xr14, sp, 13*SZREG + 14*SZXREG > + xvld xr15, sp, 13*SZREG + 15*SZXREG > + xvld xr16, sp, 13*SZREG + 16*SZXREG > + xvld xr17, sp, 13*SZREG + 17*SZXREG > + xvld xr18, sp, 13*SZREG + 18*SZXREG > + xvld xr19, sp, 13*SZREG + 19*SZXREG > + xvld xr20, sp, 13*SZREG + 20*SZXREG > + xvld xr21, sp, 13*SZREG + 21*SZXREG > + xvld xr22, sp, 13*SZREG + 22*SZXREG > + xvld xr23, sp, 13*SZREG + 23*SZXREG > + xvld xr24, sp, 13*SZREG + 24*SZXREG > + xvld xr25, sp, 13*SZREG + 25*SZXREG > + xvld xr26, sp, 13*SZREG + 26*SZXREG > + xvld xr27, sp, 13*SZREG + 27*SZXREG > + xvld xr28, sp, 13*SZREG + 28*SZXREG > + xvld xr29, sp, 13*SZREG + 29*SZXREG > + xvld xr30, sp, 13*SZREG + 30*SZXREG > + xvld xr31, sp, 13*SZREG + 31*SZXREG > + REG_L t0, sp, 32*SZXREG > + movgr2fcsr fcsr0, t0 > +#elif defined USE_LSX > + vld vr0, sp, 13*SZREG + 0*SZVREG > + vld vr1, sp, 13*SZREG + 1*SZVREG > + vld vr2, sp, 13*SZREG + 2*SZVREG > + vld vr3, sp, 13*SZREG + 3*SZVREG > + vld vr4, sp, 13*SZREG + 4*SZVREG > + vld vr5, sp, 13*SZREG + 5*SZVREG > + vld vr6, sp, 13*SZREG + 6*SZVREG > + vld vr7, sp, 13*SZREG + 7*SZVREG > + vld vr8, sp, 13*SZREG + 8*SZVREG > + vld vr9, sp, 13*SZREG + 9*SZVREG > + vld vr10, sp, 13*SZREG + 10*SZVREG > + vld vr11, sp, 13*SZREG + 11*SZVREG > + vld vr12, sp, 13*SZREG + 12*SZVREG > + vld vr13, sp, 13*SZREG + 13*SZVREG > + vld vr14, sp, 13*SZREG + 14*SZVREG > + vld vr15, sp, 13*SZREG + 15*SZVREG > + vld vr16, sp, 13*SZREG + 16*SZVREG > + vld vr17, sp, 13*SZREG + 17*SZVREG > + vld vr18, sp, 13*SZREG + 18*SZVREG > + vld vr19, sp, 13*SZREG + 19*SZVREG > + vld vr20, sp, 13*SZREG + 20*SZVREG > + vld vr21, sp, 13*SZREG + 21*SZVREG > + vld vr22, sp, 13*SZREG + 22*SZVREG > + vld vr23, sp, 13*SZREG + 23*SZVREG > + vld vr24, sp, 13*SZREG + 24*SZVREG > + vld vr25, sp, 13*SZREG + 25*SZVREG > + vld vr26, sp, 13*SZREG + 26*SZVREG > + vld vr27, sp, 13*SZREG + 27*SZVREG > + vld vr28, sp, 13*SZREG + 28*SZVREG > + vld vr29, sp, 13*SZREG + 29*SZVREG > + vld vr30, sp, 13*SZREG + 30*SZVREG > + vld vr31, sp, 13*SZREG + 31*SZVREG > + REG_L t0, sp, 32*SZVREG > + movgr2fcsr fcsr0, t0 > +#elif !defined __loongarch_soft_float > + FREG_L fa0, sp, 13*SZREG + 0*SZFREG > + FREG_L fa1, sp, 13*SZREG + 1*SZFREG > + FREG_L fa2, sp, 13*SZREG + 2*SZFREG > + FREG_L fa3, sp, 13*SZREG + 3*SZFREG > + FREG_L fa4, sp, 13*SZREG + 4*SZFREG > + FREG_L fa5, sp, 13*SZREG + 5*SZFREG > + FREG_L fa6, sp, 13*SZREG + 6*SZFREG > + FREG_L fa7, sp, 13*SZREG + 7*SZFREG > + FREG_L ft0, sp, 13*SZREG + 8*SZFREG > + FREG_L ft1, sp, 13*SZREG + 9*SZFREG > + FREG_L ft2, sp, 13*SZREG + 10*SZFREG > + FREG_L ft3, sp, 13*SZREG + 11*SZFREG > + FREG_L ft4, sp, 13*SZREG + 12*SZFREG > + FREG_L ft5, sp, 13*SZREG + 13*SZFREG > + FREG_L ft6, sp, 13*SZREG + 14*SZFREG > + FREG_L ft7, sp, 13*SZREG + 15*SZFREG > + FREG_L ft8, sp, 13*SZREG + 16*SZFREG > + FREG_L ft9, sp, 13*SZREG + 17*SZFREG > + FREG_L ft10, sp, 13*SZREG + 18*SZFREG > + FREG_L ft11, sp, 13*SZREG + 19*SZFREG > + FREG_L ft12, sp, 13*SZREG + 20*SZFREG > + FREG_L ft13, sp, 13*SZREG + 21*SZFREG > + FREG_L ft14, sp, 13*SZREG + 22*SZFREG > + FREG_L ft15, sp, 13*SZREG + 23*SZFREG > + REG_L t0, sp, 24*SZFREG > + movgr2fcsr fcsr0, t0 > +#endif /* #ifdef USE_LASX */ > + > + ADDI sp, sp, FRAME_SIZE > + b Lret > + cfi_endproc > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > +#endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S > new file mode 100644 > index 0000000000..4a17079169 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.S > @@ -0,0 +1,93 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. Update Copyright years to 2024. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include <tls.h> > +#include "tlsdesc.h" > + > + .text > + > + /* Compute the thread pointer offset for symbols in the static > + TLS block. The offset is the same for all threads. > + Prototype: > + _dl_tlsdesc_return (tlsdesc *); */ > + .hidden _dl_tlsdesc_return > + .global _dl_tlsdesc_return > + .type _dl_tlsdesc_return,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_return: > + REG_L a0, a0, 8 > + RET > + cfi_endproc > + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return > + > + /* Handler for undefined weak TLS symbols. > + Prototype: > + _dl_tlsdesc_undefweak (tlsdesc *); > + > + The second word of the descriptor contains the addend. > + Return the addend minus the thread pointer. This ensures > + that when the caller adds on the thread pointer it gets back > + the addend. */ > + .hidden _dl_tlsdesc_undefweak > + .global _dl_tlsdesc_undefweak > + .type _dl_tlsdesc_undefweak,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_undefweak: > + REG_L a0, a0, 8 > + sub.d a0, a0, tp > + RET > + cfi_endproc > + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak > + > + > +#ifdef SHARED > + > +#if !defined __loongarch_soft_float > + > +#define USE_LASX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx > +#define Lret Lret_lasx > +#define Lslow Lslow_lasx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LASX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > + > +#define USE_LSX > +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx > +#define Lret Lret_lsx > +#define Lslow Lslow_lsx > +#include "dl-tlsdesc-dynamic.h" > +#undef FRAME_SIZE > +#undef USE_LSX > +#undef _dl_tlsdesc_dynamic > +#undef Lret > +#undef Lslow > + > +#endif > + > +#include "dl-tlsdesc-dynamic.h" > + > +#endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h > new file mode 100644 > index 0000000000..988037a714 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.h > @@ -0,0 +1,53 @@ > +/* Thread-local storage descriptor handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _DL_TLSDESC_H > +#define _DL_TLSDESC_H > + > +#include <dl-tls.h> > + > +/* Type used to represent a TLS descriptor in the GOT. */ > +struct tlsdesc > +{ > + ptrdiff_t (*entry) (struct tlsdesc *); > + void *arg; > +}; > + > +/* Type used as the argument in a TLS descriptor for a symbol that > + needs dynamic TLS offsets. */ > +struct tlsdesc_dynamic_arg > +{ > + tls_index tlsinfo; > + size_t gen_count; > +}; > + > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); > + > +# ifdef SHARED > +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); > +#if !defined __loongarch_soft_float Minor style, usually for single tests we use '#ifndef' and add attribute_hidden at the end of prototype. > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); > +#endif > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); > +#endif > + > +#endif > diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h > index 4d8737ee7f..9b1773634c 100644 > --- a/sysdeps/loongarch/linkmap.h > +++ b/sysdeps/loongarch/linkmap.h > @@ -19,4 +19,5 @@ > struct link_map_machine > { > ElfW (Addr) plt; /* Address of .plt. */ > + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ > }; > diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h > index 51521a7eb4..23c1d12914 100644 > --- a/sysdeps/loongarch/sys/asm.h > +++ b/sysdeps/loongarch/sys/asm.h > @@ -25,6 +25,7 @@ > /* Macros to handle different pointer/register sizes for 32/64-bit code. */ > #define SZREG 8 > #define SZFREG 8 > +#define SZFCSREG 4 > #define SZVREG 16 > #define SZXREG 32 > #define REG_L ld.d > diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h > index f61ee25b25..80ce3e9c00 100644 > --- a/sysdeps/loongarch/sys/regdef.h > +++ b/sysdeps/loongarch/sys/regdef.h > @@ -97,6 +97,7 @@ > #define fcc5 $fcc5 > #define fcc6 $fcc6 > #define fcc7 $fcc7 > +#define fcsr0 $fcsr0 > > #define vr0 $vr0 > #define vr1 $vr1 > diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c > new file mode 100644 > index 0000000000..a357e7619f > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.c > @@ -0,0 +1,39 @@ > +/* Manage TLS descriptors. AArch64 version. > + > + Copyright (C) 2011-2023 Free Software Foundation, Inc. Update Copyright years to 2024 and remove the 'AArch64'. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <ldsodefs.h> > +#include <tls.h> > +#include <dl-tlsdesc.h> > +#include <dl-unmap-segments.h> > +#include <tlsdeschtab.h> > + > +/* Unmap the dynamic object, but also release its TLS descriptor table > + if there is one. */ > + > +void > +_dl_unmap (struct link_map *map) > +{ > + _dl_unmap_segments (map); > + > +#ifdef SHARED > + if (map->l_mach.tlsdesc_table) > + htab_delete (map->l_mach.tlsdesc_table); > +#endif > +} > diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym > new file mode 100644 > index 0000000000..bcab218631 > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.sym > @@ -0,0 +1,19 @@ > +#include <stddef.h> > +#include <sysdep.h> > +#include <tls.h> > +#include <link.h> > +#include <dl-tlsdesc.h> > + > +-- > + > +-- Abuse tls.h macros to derive offsets relative to the thread register. > + > +TLSDESC_ARG offsetof(struct tlsdesc, arg) > +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) > +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) > +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) > +TCBHEAD_DTV offsetof(tcbhead_t, dtv) > +DTV_COUNTER offsetof(dtv_t, counter) > +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED > +TLS_DTV_OFFSET TLS_DTV_OFFSET > +SIZE_OF_DTV sizeof(tcbhead_t) > diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data > index 547b1c1b7f..ec32e6d13f 100644 > --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data > +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data > @@ -5,3 +5,5 @@ libc.so: calloc > libc.so: free > libc.so: malloc > libc.so: realloc > +# The dynamic loader needs __tls_get_addr for TLS. > +ld.so: __tls_get_addr
Thanks a lot for the review! A new v3 version patch has been sent. https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html 在 2024/3/4 下午11:42, H.J. Lu 写道: > On Wed, Feb 28, 2024 at 5:44 PM mengqinggang <mengqinggang@loongson.cn> wrote: >> This is mostly based on AArch64 and RISC-V implementation. >> >> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. >> >> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore >> all vector registers. >> --- >> Changes v1 -> v2: >> - Fix vr24-vr31, xr24-xr31 typo. >> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. >> - Save and restore fcsr0 in _dl_tlsdesc_dynamic. >> >> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html >> >> elf/elf.h | 2 + >> sysdeps/loongarch/Makefile | 6 + >> sysdeps/loongarch/dl-link.sym | 1 + >> sysdeps/loongarch/dl-machine.h | 60 ++- >> sysdeps/loongarch/dl-tls.h | 9 +- >> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++ >> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++ >> sysdeps/loongarch/dl-tlsdesc.h | 53 +++ >> sysdeps/loongarch/linkmap.h | 1 + >> sysdeps/loongarch/sys/asm.h | 1 + >> sysdeps/loongarch/sys/regdef.h | 1 + >> sysdeps/loongarch/tlsdesc.c | 39 ++ >> sysdeps/loongarch/tlsdesc.sym | 19 + >> .../unix/sysv/linux/loongarch/localplt.data | 2 + >> 14 files changed, 625 insertions(+), 3 deletions(-) >> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h >> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S >> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h >> create mode 100644 sysdeps/loongarch/tlsdesc.c >> create mode 100644 sysdeps/loongarch/tlsdesc.sym >> >> diff --git a/elf/elf.h b/elf/elf.h >> index f2206e5c06..eec24ea049 100644 >> --- a/elf/elf.h >> +++ b/elf/elf.h >> @@ -4237,6 +4237,8 @@ enum >> #define R_LARCH_TLS_TPREL32 10 >> #define R_LARCH_TLS_TPREL64 11 >> #define R_LARCH_IRELATIVE 12 >> +#define R_LARCH_TLS_DESC32 13 >> +#define R_LARCH_TLS_DESC64 14 >> >> /* Reserved for future relocs that the dynamic linker must understand. */ >> >> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile >> index 43d2f583cd..181389e787 100644 >> --- a/sysdeps/loongarch/Makefile >> +++ b/sysdeps/loongarch/Makefile >> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h >> endif >> >> ifeq ($(subdir),elf) >> +sysdep-dl-routines += tlsdesc dl-tlsdesc >> gen-as-const-headers += dl-link.sym >> endif >> >> +ifeq ($(subdir),csu) >> +gen-as-const-headers += tlsdesc.sym >> +endif >> + >> + >> # LoongArch's assembler also needs to know about PIC as it changes the >> # definition of some assembler macros. >> ASFLAGS-.os += $(pic-ccflag) >> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym >> index b534968e30..fd81ef37d5 100644 >> --- a/sysdeps/loongarch/dl-link.sym >> +++ b/sysdeps/loongarch/dl-link.sym >> @@ -1,6 +1,7 @@ >> #include <stddef.h> >> #include <sysdep.h> >> #include <link.h> >> +#include <dl-tlsdesc.h> >> >> DL_SIZEOF_RG sizeof(struct La_loongarch_regs) >> DL_SIZEOF_RV sizeof(struct La_loongarch_retval) >> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h >> index ab81b82d95..8ca6c224f6 100644 >> --- a/sysdeps/loongarch/dl-machine.h >> +++ b/sysdeps/loongarch/dl-machine.h >> @@ -25,7 +25,7 @@ >> #include <entry.h> >> #include <elf/elf.h> >> #include <sys/asm.h> >> -#include <dl-tls.h> >> +#include <dl-tlsdesc.h> >> #include <dl-static-tls.h> >> #include <dl-machine-rel.h> >> >> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], >> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; >> break; >> >> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: >> + { >> + struct tlsdesc volatile *td = >> + (struct tlsdesc volatile *)addr_field; >> + if (! sym) >> + { >> + td->arg = (void*)reloc->r_addend; >> + td->entry = _dl_tlsdesc_undefweak; >> + } >> + else >> + { >> +# ifndef SHARED >> + CHECK_STATIC_TLS (map, sym_map); >> +# else >> + if (!TRY_STATIC_TLS (map, sym_map)) >> + { >> + td->arg = _dl_make_tlsdesc_dynamic >> + (sym_map, sym->st_value + reloc->r_addend); >> +# if !defined __loongarch_soft_float >> + if (SUPPORT_LASX) >> + td->entry = _dl_tlsdesc_dynamic_lasx; >> + else >> + if (SUPPORT_LSX) >> + td->entry = _dl_tlsdesc_dynamic_lsx; >> + else >> +# endif >> + td->entry = _dl_tlsdesc_dynamic; >> + } >> + else >> +# endif >> + { >> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) >> + + reloc->r_addend); >> + td->entry = _dl_tlsdesc_return; >> + } >> + } >> + break; >> + } >> + >> case R_LARCH_COPY: >> { >> if (sym == NULL) >> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], >> else >> *reloc_addr = map->l_mach.plt; >> } >> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) >> + { >> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); >> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); >> + const ElfW (Sym) *sym = &symtab[symndx]; >> + const struct r_found_version *version = NULL; >> + >> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) >> + { >> + const ElfW (Half) *vernum = >> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); >> + version = &map->l_versions[vernum[symndx] & 0x7fff]; >> + } >> + >> + /* Always initialize TLS descriptors completely, because lazy >> + initialization requires synchronization at every TLS access. */ >> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, >> + skip_ifunc); >> + } >> else >> _dl_reloc_bad_type (map, r_type, 1); >> } >> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h >> index 29924b866d..de593c002d 100644 >> --- a/sysdeps/loongarch/dl-tls.h >> +++ b/sysdeps/loongarch/dl-tls.h >> @@ -16,6 +16,9 @@ >> License along with the GNU C Library. If not, see >> <https://www.gnu.org/licenses/>. */ >> >> +#ifndef _DL_TLS_H >> +#define _DL_TLS_H >> + >> /* Type used for the representation of TLS information in the GOT. */ >> typedef struct >> { >> @@ -23,6 +26,8 @@ typedef struct >> unsigned long int ti_offset; >> } tls_index; >> >> +extern void *__tls_get_addr (tls_index *ti); >> + >> /* The thread pointer points to the first static TLS block. */ >> #define TLS_TP_OFFSET 0 >> >> @@ -37,10 +42,10 @@ typedef struct >> /* Compute the value for a DTPREL reloc. */ >> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) >> >> -extern void *__tls_get_addr (tls_index *ti); >> - >> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) >> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) >> >> /* Value used for dtv entries for which the allocation is delayed. */ >> #define TLS_DTV_UNALLOCATED ((void *) -1l) >> + >> +#endif >> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >> new file mode 100644 >> index 0000000000..0d8c9bb991 >> --- /dev/null >> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >> @@ -0,0 +1,341 @@ >> +/* Thread-local storage handling in the ELF dynamic linker. >> + LoongArch version. >> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#ifdef USE_LASX >> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) >> +#elif defined USE_LSX >> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) >> +#elif !defined __loongarch_soft_float >> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) >> +#else >> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) >> +#endif >> + >> +#ifdef SHARED >> + /* Handler for dynamic TLS symbols. >> + Prototype: >> + _dl_tlsdesc_dynamic (tlsdesc *) ; >> + >> + The second word of the descriptor points to a >> + tlsdesc_dynamic_arg structure. >> + >> + Returns the offset between the thread pointer and the >> + object referenced by the argument. >> + >> + ptrdiff_t >> + __attribute__ ((__regparm__ (1))) >> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >> + { >> + struct tlsdesc_dynamic_arg *td = tdp->arg; >> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); >> + if (__builtin_expect (td->gen_count <= dtv[0].counter >> + && (dtv[td->tlsinfo.ti_module].pointer.val >> + != TLS_DTV_UNALLOCATED), >> + 1)) >> + return dtv[td->tlsinfo.ti_module].pointer.val >> + + td->tlsinfo.ti_offset >> + - __thread_pointer; >> + >> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >> + } >> + */ >> + .hidden _dl_tlsdesc_dynamic >> + .global _dl_tlsdesc_dynamic >> + .type _dl_tlsdesc_dynamic,%function >> + cfi_startproc >> + .align 2 >> +_dl_tlsdesc_dynamic: >> + /* Save just enough registers to support fast path, if we fall >> + into slow path we will save additional registers. */ >> + ADDI sp, sp,-24 >> + REG_S t0, sp, 0 >> + REG_S t1, sp, 8 >> + REG_S t2, sp, 16 >> + >> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start >> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg >> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count >> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter >> + bltu t2, t1, Lslow >> + >> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module >> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ >> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) >> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val >> + li.d t2, TLS_DTV_UNALLOCATED >> + beq t1, t2, Lslow >> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset >> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset >> + add.d a0, t1, t2 >> +Lret: >> + sub.d a0, a0, tp >> + REG_L t0, sp, 0 >> + REG_L t1, sp, 8 >> + REG_L t2, sp, 16 >> + ADDI sp, sp, 24 >> + RET >> + >> +Lslow: >> + /* This is the slow path. We need to call __tls_get_addr() which >> + means we need to save and restore all the register that the >> + callee will trash. */ >> + >> + /* Save the remaining registers that we must treat as caller save. */ >> + ADDI sp, sp, -FRAME_SIZE >> + REG_S ra, sp, 0 * SZREG >> + REG_S a1, sp, 1 * SZREG >> + REG_S a2, sp, 2 * SZREG >> + REG_S a3, sp, 3 * SZREG >> + REG_S a4, sp, 4 * SZREG >> + REG_S a5, sp, 5 * SZREG >> + REG_S a6, sp, 6 * SZREG >> + REG_S a7, sp, 7 * SZREG >> + REG_S t4, sp, 8 * SZREG >> + REG_S t5, sp, 9 * SZREG >> + REG_S t6, sp, 10 * SZREG >> + REG_S t7, sp, 11 * SZREG >> + REG_S t8, sp, 12 * SZREG >> + >> +#ifdef USE_LASX >> + xvst xr0, sp, 13*SZREG + 0*SZXREG >> + xvst xr1, sp, 13*SZREG + 1*SZXREG >> + xvst xr2, sp, 13*SZREG + 2*SZXREG >> + xvst xr3, sp, 13*SZREG + 3*SZXREG >> + xvst xr4, sp, 13*SZREG + 4*SZXREG >> + xvst xr5, sp, 13*SZREG + 5*SZXREG >> + xvst xr6, sp, 13*SZREG + 6*SZXREG >> + xvst xr7, sp, 13*SZREG + 7*SZXREG >> + xvst xr8, sp, 13*SZREG + 8*SZXREG >> + xvst xr9, sp, 13*SZREG + 9*SZXREG >> + xvst xr10, sp, 13*SZREG + 10*SZXREG >> + xvst xr11, sp, 13*SZREG + 11*SZXREG >> + xvst xr12, sp, 13*SZREG + 12*SZXREG >> + xvst xr13, sp, 13*SZREG + 13*SZXREG >> + xvst xr14, sp, 13*SZREG + 14*SZXREG >> + xvst xr15, sp, 13*SZREG + 15*SZXREG >> + xvst xr16, sp, 13*SZREG + 16*SZXREG >> + xvst xr17, sp, 13*SZREG + 17*SZXREG >> + xvst xr18, sp, 13*SZREG + 18*SZXREG >> + xvst xr19, sp, 13*SZREG + 19*SZXREG >> + xvst xr20, sp, 13*SZREG + 20*SZXREG >> + xvst xr21, sp, 13*SZREG + 21*SZXREG >> + xvst xr22, sp, 13*SZREG + 22*SZXREG >> + xvst xr23, sp, 13*SZREG + 23*SZXREG >> + xvst xr24, sp, 13*SZREG + 24*SZXREG >> + xvst xr25, sp, 13*SZREG + 25*SZXREG >> + xvst xr26, sp, 13*SZREG + 26*SZXREG >> + xvst xr27, sp, 13*SZREG + 27*SZXREG >> + xvst xr28, sp, 13*SZREG + 28*SZXREG >> + xvst xr29, sp, 13*SZREG + 29*SZXREG >> + xvst xr30, sp, 13*SZREG + 30*SZXREG >> + xvst xr31, sp, 13*SZREG + 31*SZXREG >> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >> + # some fields in fcsr0 >> + movfcsr2gr t0, fcsr0 >> + REG_S t0, sp, 32*SZXREG >> +#elif defined USE_LSX >> + vst vr0, sp, 13*SZREG + 0*SZVREG >> + vst vr1, sp, 13*SZREG + 1*SZVREG >> + vst vr2, sp, 13*SZREG + 2*SZVREG >> + vst vr3, sp, 13*SZREG + 3*SZVREG >> + vst vr4, sp, 13*SZREG + 4*SZVREG >> + vst vr5, sp, 13*SZREG + 5*SZVREG >> + vst vr6, sp, 13*SZREG + 6*SZVREG >> + vst vr7, sp, 13*SZREG + 7*SZVREG >> + vst vr8, sp, 13*SZREG + 8*SZVREG >> + vst vr9, sp, 13*SZREG + 9*SZVREG >> + vst vr10, sp, 13*SZREG + 10*SZVREG >> + vst vr11, sp, 13*SZREG + 11*SZVREG >> + vst vr12, sp, 13*SZREG + 12*SZVREG >> + vst vr13, sp, 13*SZREG + 13*SZVREG >> + vst vr14, sp, 13*SZREG + 14*SZVREG >> + vst vr15, sp, 13*SZREG + 15*SZVREG >> + vst vr16, sp, 13*SZREG + 16*SZVREG >> + vst vr17, sp, 13*SZREG + 17*SZVREG >> + vst vr18, sp, 13*SZREG + 18*SZVREG >> + vst vr19, sp, 13*SZREG + 19*SZVREG >> + vst vr20, sp, 13*SZREG + 20*SZVREG >> + vst vr21, sp, 13*SZREG + 21*SZVREG >> + vst vr22, sp, 13*SZREG + 22*SZVREG >> + vst vr23, sp, 13*SZREG + 23*SZVREG >> + vst vr24, sp, 13*SZREG + 24*SZVREG >> + vst vr25, sp, 13*SZREG + 25*SZVREG >> + vst vr26, sp, 13*SZREG + 26*SZVREG >> + vst vr27, sp, 13*SZREG + 27*SZVREG >> + vst vr28, sp, 13*SZREG + 28*SZVREG >> + vst vr29, sp, 13*SZREG + 29*SZVREG >> + vst vr30, sp, 13*SZREG + 30*SZVREG >> + vst vr31, sp, 13*SZREG + 31*SZVREG >> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >> + # some fields in fcsr0 >> + movfcsr2gr t0, fcsr0 >> + REG_S t0, sp, 32*SZVREG >> +#elif !defined __loongarch_soft_float >> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG >> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG >> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG >> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG >> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG >> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG >> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG >> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG >> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG >> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG >> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG >> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG >> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG >> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG >> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG >> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG >> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG >> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG >> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG >> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG >> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG >> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG >> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG >> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG >> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >> + # some fields in fcsr0 >> + movfcsr2gr t0, fcsr0 >> + REG_S t0, sp, 24*SZFREG >> +#endif /* #ifdef USE_LASX */ >> + >> + bl __tls_get_addr >> + ADDI a0, a0, -TLS_DTV_OFFSET >> + >> + REG_L ra, sp, 0 >> + REG_L a1, sp, 1 * 8 >> + REG_L a2, sp, 2 * 8 >> + REG_L a3, sp, 3 * 8 >> + REG_L a4, sp, 4 * 8 >> + REG_L a5, sp, 5 * 8 >> + REG_L a6, sp, 6 * 8 >> + REG_L a7, sp, 7 * 8 >> + REG_L t4, sp, 8 * 8 >> + REG_L t5, sp, 9 * 8 >> + REG_L t6, sp, 10 * 8 >> + REG_L t7, sp, 11 * 8 >> + REG_L t8, sp, 12 * 8 >> + >> +#ifdef USE_LASX >> + xvld xr0, sp, 13*SZREG + 0*SZXREG >> + xvld xr1, sp, 13*SZREG + 1*SZXREG >> + xvld xr2, sp, 13*SZREG + 2*SZXREG >> + xvld xr3, sp, 13*SZREG + 3*SZXREG >> + xvld xr4, sp, 13*SZREG + 4*SZXREG >> + xvld xr5, sp, 13*SZREG + 5*SZXREG >> + xvld xr6, sp, 13*SZREG + 6*SZXREG >> + xvld xr7, sp, 13*SZREG + 7*SZXREG >> + xvld xr8, sp, 13*SZREG + 8*SZXREG >> + xvld xr9, sp, 13*SZREG + 9*SZXREG >> + xvld xr10, sp, 13*SZREG + 10*SZXREG >> + xvld xr11, sp, 13*SZREG + 11*SZXREG >> + xvld xr12, sp, 13*SZREG + 12*SZXREG >> + xvld xr13, sp, 13*SZREG + 13*SZXREG >> + xvld xr14, sp, 13*SZREG + 14*SZXREG >> + xvld xr15, sp, 13*SZREG + 15*SZXREG >> + xvld xr16, sp, 13*SZREG + 16*SZXREG >> + xvld xr17, sp, 13*SZREG + 17*SZXREG >> + xvld xr18, sp, 13*SZREG + 18*SZXREG >> + xvld xr19, sp, 13*SZREG + 19*SZXREG >> + xvld xr20, sp, 13*SZREG + 20*SZXREG >> + xvld xr21, sp, 13*SZREG + 21*SZXREG >> + xvld xr22, sp, 13*SZREG + 22*SZXREG >> + xvld xr23, sp, 13*SZREG + 23*SZXREG >> + xvld xr24, sp, 13*SZREG + 24*SZXREG >> + xvld xr25, sp, 13*SZREG + 25*SZXREG >> + xvld xr26, sp, 13*SZREG + 26*SZXREG >> + xvld xr27, sp, 13*SZREG + 27*SZXREG >> + xvld xr28, sp, 13*SZREG + 28*SZXREG >> + xvld xr29, sp, 13*SZREG + 29*SZXREG >> + xvld xr30, sp, 13*SZREG + 30*SZXREG >> + xvld xr31, sp, 13*SZREG + 31*SZXREG >> + REG_L t0, sp, 32*SZXREG >> + movgr2fcsr fcsr0, t0 >> +#elif defined USE_LSX >> + vld vr0, sp, 13*SZREG + 0*SZVREG >> + vld vr1, sp, 13*SZREG + 1*SZVREG >> + vld vr2, sp, 13*SZREG + 2*SZVREG >> + vld vr3, sp, 13*SZREG + 3*SZVREG >> + vld vr4, sp, 13*SZREG + 4*SZVREG >> + vld vr5, sp, 13*SZREG + 5*SZVREG >> + vld vr6, sp, 13*SZREG + 6*SZVREG >> + vld vr7, sp, 13*SZREG + 7*SZVREG >> + vld vr8, sp, 13*SZREG + 8*SZVREG >> + vld vr9, sp, 13*SZREG + 9*SZVREG >> + vld vr10, sp, 13*SZREG + 10*SZVREG >> + vld vr11, sp, 13*SZREG + 11*SZVREG >> + vld vr12, sp, 13*SZREG + 12*SZVREG >> + vld vr13, sp, 13*SZREG + 13*SZVREG >> + vld vr14, sp, 13*SZREG + 14*SZVREG >> + vld vr15, sp, 13*SZREG + 15*SZVREG >> + vld vr16, sp, 13*SZREG + 16*SZVREG >> + vld vr17, sp, 13*SZREG + 17*SZVREG >> + vld vr18, sp, 13*SZREG + 18*SZVREG >> + vld vr19, sp, 13*SZREG + 19*SZVREG >> + vld vr20, sp, 13*SZREG + 20*SZVREG >> + vld vr21, sp, 13*SZREG + 21*SZVREG >> + vld vr22, sp, 13*SZREG + 22*SZVREG >> + vld vr23, sp, 13*SZREG + 23*SZVREG >> + vld vr24, sp, 13*SZREG + 24*SZVREG >> + vld vr25, sp, 13*SZREG + 25*SZVREG >> + vld vr26, sp, 13*SZREG + 26*SZVREG >> + vld vr27, sp, 13*SZREG + 27*SZVREG >> + vld vr28, sp, 13*SZREG + 28*SZVREG >> + vld vr29, sp, 13*SZREG + 29*SZVREG >> + vld vr30, sp, 13*SZREG + 30*SZVREG >> + vld vr31, sp, 13*SZREG + 31*SZVREG >> + REG_L t0, sp, 32*SZVREG >> + movgr2fcsr fcsr0, t0 >> +#elif !defined __loongarch_soft_float >> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG >> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG >> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG >> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG >> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG >> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG >> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG >> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG >> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG >> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG >> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG >> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG >> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG >> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG >> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG >> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG >> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG >> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG >> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG >> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG >> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG >> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG >> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG >> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG >> + REG_L t0, sp, 24*SZFREG >> + movgr2fcsr fcsr0, t0 >> +#endif /* #ifdef USE_LASX */ >> + >> + ADDI sp, sp, FRAME_SIZE >> + b Lret >> + cfi_endproc >> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >> +#endif /* #ifdef SHARED */ >> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S >> new file mode 100644 >> index 0000000000..4a17079169 >> --- /dev/null >> +++ b/sysdeps/loongarch/dl-tlsdesc.S >> @@ -0,0 +1,93 @@ >> +/* Thread-local storage handling in the ELF dynamic linker. >> + LoongArch version. >> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#include <sysdep.h> >> +#include <tls.h> >> +#include "tlsdesc.h" >> + >> + .text >> + >> + /* Compute the thread pointer offset for symbols in the static >> + TLS block. The offset is the same for all threads. >> + Prototype: >> + _dl_tlsdesc_return (tlsdesc *); */ >> + .hidden _dl_tlsdesc_return >> + .global _dl_tlsdesc_return >> + .type _dl_tlsdesc_return,%function >> + cfi_startproc >> + .align 2 >> +_dl_tlsdesc_return: >> + REG_L a0, a0, 8 >> + RET >> + cfi_endproc >> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return >> + >> + /* Handler for undefined weak TLS symbols. >> + Prototype: >> + _dl_tlsdesc_undefweak (tlsdesc *); >> + >> + The second word of the descriptor contains the addend. >> + Return the addend minus the thread pointer. This ensures >> + that when the caller adds on the thread pointer it gets back >> + the addend. */ >> + .hidden _dl_tlsdesc_undefweak >> + .global _dl_tlsdesc_undefweak >> + .type _dl_tlsdesc_undefweak,%function >> + cfi_startproc >> + .align 2 >> +_dl_tlsdesc_undefweak: >> + REG_L a0, a0, 8 >> + sub.d a0, a0, tp >> + RET >> + cfi_endproc >> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak >> + >> + >> +#ifdef SHARED >> + >> +#if !defined __loongarch_soft_float >> + >> +#define USE_LASX >> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx >> +#define Lret Lret_lasx >> +#define Lslow Lslow_lasx >> +#include "dl-tlsdesc-dynamic.h" >> +#undef FRAME_SIZE >> +#undef USE_LASX >> +#undef _dl_tlsdesc_dynamic >> +#undef Lret >> +#undef Lslow >> + >> +#define USE_LSX >> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx >> +#define Lret Lret_lsx >> +#define Lslow Lslow_lsx >> +#include "dl-tlsdesc-dynamic.h" >> +#undef FRAME_SIZE >> +#undef USE_LSX >> +#undef _dl_tlsdesc_dynamic >> +#undef Lret >> +#undef Lslow >> + >> +#endif >> + >> +#include "dl-tlsdesc-dynamic.h" >> + >> +#endif /* #ifdef SHARED */ >> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h >> new file mode 100644 >> index 0000000000..988037a714 >> --- /dev/null >> +++ b/sysdeps/loongarch/dl-tlsdesc.h >> @@ -0,0 +1,53 @@ >> +/* Thread-local storage descriptor handling in the ELF dynamic linker. >> + LoongArch version. >> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#ifndef _DL_TLSDESC_H >> +#define _DL_TLSDESC_H >> + >> +#include <dl-tls.h> >> + >> +/* Type used to represent a TLS descriptor in the GOT. */ >> +struct tlsdesc >> +{ >> + ptrdiff_t (*entry) (struct tlsdesc *); >> + void *arg; >> +}; >> + >> +/* Type used as the argument in a TLS descriptor for a symbol that >> + needs dynamic TLS offsets. */ >> +struct tlsdesc_dynamic_arg >> +{ >> + tls_index tlsinfo; >> + size_t gen_count; >> +}; >> + >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); >> + >> +# ifdef SHARED >> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); >> +#if !defined __loongarch_soft_float >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); >> +#endif >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); >> +#endif >> + >> +#endif >> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h >> index 4d8737ee7f..9b1773634c 100644 >> --- a/sysdeps/loongarch/linkmap.h >> +++ b/sysdeps/loongarch/linkmap.h >> @@ -19,4 +19,5 @@ >> struct link_map_machine >> { >> ElfW (Addr) plt; /* Address of .plt. */ >> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ >> }; >> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h >> index 51521a7eb4..23c1d12914 100644 >> --- a/sysdeps/loongarch/sys/asm.h >> +++ b/sysdeps/loongarch/sys/asm.h >> @@ -25,6 +25,7 @@ >> /* Macros to handle different pointer/register sizes for 32/64-bit code. */ >> #define SZREG 8 >> #define SZFREG 8 >> +#define SZFCSREG 4 >> #define SZVREG 16 >> #define SZXREG 32 >> #define REG_L ld.d >> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h >> index f61ee25b25..80ce3e9c00 100644 >> --- a/sysdeps/loongarch/sys/regdef.h >> +++ b/sysdeps/loongarch/sys/regdef.h >> @@ -97,6 +97,7 @@ >> #define fcc5 $fcc5 >> #define fcc6 $fcc6 >> #define fcc7 $fcc7 >> +#define fcsr0 $fcsr0 >> >> #define vr0 $vr0 >> #define vr1 $vr1 >> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c >> new file mode 100644 >> index 0000000000..a357e7619f >> --- /dev/null >> +++ b/sysdeps/loongarch/tlsdesc.c >> @@ -0,0 +1,39 @@ >> +/* Manage TLS descriptors. AArch64 version. > Change it. >> + >> >
Thanks a lot for the review! A new v3 version patch has been sent. https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html And some reply below. 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道: > > On 28/02/24 22:43, mengqinggang wrote: >> This is mostly based on AArch64 and RISC-V implementation. >> >> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. >> >> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore >> all vector registers. >> --- >> Changes v1 -> v2: >> - Fix vr24-vr31, xr24-xr31 typo. >> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. >> - Save and restore fcsr0 in _dl_tlsdesc_dynamic. >> >> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html > Patch looks ok, some comments below. > >> elf/elf.h | 2 + >> sysdeps/loongarch/Makefile | 6 + >> sysdeps/loongarch/dl-link.sym | 1 + >> sysdeps/loongarch/dl-machine.h | 60 ++- >> sysdeps/loongarch/dl-tls.h | 9 +- >> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++ >> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++ >> sysdeps/loongarch/dl-tlsdesc.h | 53 +++ >> sysdeps/loongarch/linkmap.h | 1 + >> sysdeps/loongarch/sys/asm.h | 1 + >> sysdeps/loongarch/sys/regdef.h | 1 + >> sysdeps/loongarch/tlsdesc.c | 39 ++ >> sysdeps/loongarch/tlsdesc.sym | 19 + >> .../unix/sysv/linux/loongarch/localplt.data | 2 + >> 14 files changed, 625 insertions(+), 3 deletions(-) >> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h >> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S >> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h >> create mode 100644 sysdeps/loongarch/tlsdesc.c >> create mode 100644 sysdeps/loongarch/tlsdesc.sym >> >> diff --git a/elf/elf.h b/elf/elf.h >> index f2206e5c06..eec24ea049 100644 >> --- a/elf/elf.h >> +++ b/elf/elf.h >> @@ -4237,6 +4237,8 @@ enum >> #define R_LARCH_TLS_TPREL32 10 >> #define R_LARCH_TLS_TPREL64 11 >> #define R_LARCH_IRELATIVE 12 >> +#define R_LARCH_TLS_DESC32 13 >> +#define R_LARCH_TLS_DESC64 14 >> >> /* Reserved for future relocs that the dynamic linker must understand. */ >> >> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile >> index 43d2f583cd..181389e787 100644 >> --- a/sysdeps/loongarch/Makefile >> +++ b/sysdeps/loongarch/Makefile >> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h >> endif >> >> ifeq ($(subdir),elf) >> +sysdep-dl-routines += tlsdesc dl-tlsdesc >> gen-as-const-headers += dl-link.sym >> endif >> >> +ifeq ($(subdir),csu) >> +gen-as-const-headers += tlsdesc.sym >> +endif >> + >> + >> # LoongArch's assembler also needs to know about PIC as it changes the >> # definition of some assembler macros. >> ASFLAGS-.os += $(pic-ccflag) >> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym >> index b534968e30..fd81ef37d5 100644 >> --- a/sysdeps/loongarch/dl-link.sym >> +++ b/sysdeps/loongarch/dl-link.sym >> @@ -1,6 +1,7 @@ >> #include <stddef.h> >> #include <sysdep.h> >> #include <link.h> >> +#include <dl-tlsdesc.h> >> >> DL_SIZEOF_RG sizeof(struct La_loongarch_regs) >> DL_SIZEOF_RV sizeof(struct La_loongarch_retval) >> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h >> index ab81b82d95..8ca6c224f6 100644 >> --- a/sysdeps/loongarch/dl-machine.h >> +++ b/sysdeps/loongarch/dl-machine.h >> @@ -25,7 +25,7 @@ >> #include <entry.h> >> #include <elf/elf.h> >> #include <sys/asm.h> >> -#include <dl-tls.h> >> +#include <dl-tlsdesc.h> >> #include <dl-static-tls.h> >> #include <dl-machine-rel.h> >> >> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], >> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; >> break; >> >> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: >> + { >> + struct tlsdesc volatile *td = >> + (struct tlsdesc volatile *)addr_field; >> + if (! sym) >> + { >> + td->arg = (void*)reloc->r_addend; >> + td->entry = _dl_tlsdesc_undefweak; >> + } >> + else >> + { >> +# ifndef SHARED >> + CHECK_STATIC_TLS (map, sym_map); >> +# else >> + if (!TRY_STATIC_TLS (map, sym_map)) >> + { >> + td->arg = _dl_make_tlsdesc_dynamic >> + (sym_map, sym->st_value + reloc->r_addend); >> +# if !defined __loongarch_soft_float >> + if (SUPPORT_LASX) >> + td->entry = _dl_tlsdesc_dynamic_lasx; >> + else >> + if (SUPPORT_LSX) >> + td->entry = _dl_tlsdesc_dynamic_lsx; >> + else >> +# endif >> + td->entry = _dl_tlsdesc_dynamic; >> + } >> + else >> +# endif >> + { >> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) >> + + reloc->r_addend); >> + td->entry = _dl_tlsdesc_return; >> + } >> + } >> + break; >> + } >> + >> case R_LARCH_COPY: >> { >> if (sym == NULL) >> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], >> else >> *reloc_addr = map->l_mach.plt; >> } >> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) > Use __glibc_likely here. > >> + { >> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); >> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); >> + const ElfW (Sym) *sym = &symtab[symndx]; >> + const struct r_found_version *version = NULL; >> + >> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) >> + { >> + const ElfW (Half) *vernum = >> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); >> + version = &map->l_versions[vernum[symndx] & 0x7fff]; >> + } >> + >> + /* Always initialize TLS descriptors completely, because lazy >> + initialization requires synchronization at every TLS access. */ >> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, >> + skip_ifunc); >> + } >> else >> _dl_reloc_bad_type (map, r_type, 1); >> } >> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h >> index 29924b866d..de593c002d 100644 >> --- a/sysdeps/loongarch/dl-tls.h >> +++ b/sysdeps/loongarch/dl-tls.h >> @@ -16,6 +16,9 @@ >> License along with the GNU C Library. If not, see >> <https://www.gnu.org/licenses/>. */ >> >> +#ifndef _DL_TLS_H >> +#define _DL_TLS_H >> + >> /* Type used for the representation of TLS information in the GOT. */ >> typedef struct >> { >> @@ -23,6 +26,8 @@ typedef struct >> unsigned long int ti_offset; >> } tls_index; >> >> +extern void *__tls_get_addr (tls_index *ti); >> + >> /* The thread pointer points to the first static TLS block. */ >> #define TLS_TP_OFFSET 0 >> >> @@ -37,10 +42,10 @@ typedef struct >> /* Compute the value for a DTPREL reloc. */ >> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) >> >> -extern void *__tls_get_addr (tls_index *ti); >> - > Why move the function prototype? Maybe just want to take it out of a bunch of macros. > >> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) >> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) >> >> /* Value used for dtv entries for which the allocation is delayed. */ >> #define TLS_DTV_UNALLOCATED ((void *) -1l) >> + >> +#endif >> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >> new file mode 100644 >> index 0000000000..0d8c9bb991 >> --- /dev/null >> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >> @@ -0,0 +1,341 @@ >> +/* Thread-local storage handling in the ELF dynamic linker. >> + LoongArch version. >> + Copyright (C) 2011-2023 Free Software Foundation, Inc. > Update Copyright years to 2024. > >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#ifdef USE_LASX >> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) >> +#elif defined USE_LSX >> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) >> +#elif !defined __loongarch_soft_float >> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) >> +#else >> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) >> +#endif > I don't have a strong opinion, but another option that might be simpler it > to provide only only one _dl_tlsdesc_dynamic implementation and check the > required save/restore of vector register based on hwcap value. The v3 patch provides only one _dl_tlsdesc_dynamic implementation. >> + >> +#ifdef SHARED >> + /* Handler for dynamic TLS symbols. >> + Prototype: >> + _dl_tlsdesc_dynamic (tlsdesc *) ; >> + >> + The second word of the descriptor points to a >> + tlsdesc_dynamic_arg structure. >> + >> + Returns the offset between the thread pointer and the >> + object referenced by the argument. >> + >> + ptrdiff_t >> + __attribute__ ((__regparm__ (1))) > Does this attribute really make sense for loongarch? This line has been deleted. > >> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >> + { >> + struct tlsdesc_dynamic_arg *td = tdp->arg; >> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); >> + if (__builtin_expect (td->gen_count <= dtv[0].counter > Use __glibc_unlikely or just remove the __builtin_expect for clarity. > >> + && (dtv[td->tlsinfo.ti_module].pointer.val >> + != TLS_DTV_UNALLOCATED), >> + 1)) >> + return dtv[td->tlsinfo.ti_module].pointer.val >> + + td->tlsinfo.ti_offset >> + - __thread_pointer; >> + >> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >> + } >> + */ >> + .hidden _dl_tlsdesc_dynamic >> + .global _dl_tlsdesc_dynamic >> + .type _dl_tlsdesc_dynamic,%function >> + cfi_startproc >> + .align 2 >> +_dl_tlsdesc_dynamic: >> + /* Save just enough registers to support fast path, if we fall >> + into slow path we will save additional registers. */ >> + ADDI sp, sp,-24 >> + REG_S t0, sp, 0 >> + REG_S t1, sp, 8 >> + REG_S t2, sp, 16 >> + >> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start >> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg >> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count >> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter >> + bltu t2, t1, Lslow >> + >> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module >> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ >> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) >> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val >> + li.d t2, TLS_DTV_UNALLOCATED >> + beq t1, t2, Lslow >> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset >> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset >> + add.d a0, t1, t2 >> +Lret: >> + sub.d a0, a0, tp >> + REG_L t0, sp, 0 >> + REG_L t1, sp, 8 >> + REG_L t2, sp, 16 >> + ADDI sp, sp, 24 >> + RET >> + >> +Lslow: >> + /* This is the slow path. We need to call __tls_get_addr() which >> + means we need to save and restore all the register that the >> + callee will trash. */ >> + >> + /* Save the remaining registers that we must treat as caller save. */ >> + ADDI sp, sp, -FRAME_SIZE >> + REG_S ra, sp, 0 * SZREG >> + REG_S a1, sp, 1 * SZREG >> + REG_S a2, sp, 2 * SZREG >> + REG_S a3, sp, 3 * SZREG >> + REG_S a4, sp, 4 * SZREG >> + REG_S a5, sp, 5 * SZREG >> + REG_S a6, sp, 6 * SZREG >> + REG_S a7, sp, 7 * SZREG >> + REG_S t4, sp, 8 * SZREG >> + REG_S t5, sp, 9 * SZREG >> + REG_S t6, sp, 10 * SZREG >> + REG_S t7, sp, 11 * SZREG >> + REG_S t8, sp, 12 * SZREG >> + >> +#ifdef USE_LASX >> + xvst xr0, sp, 13*SZREG + 0*SZXREG >> + xvst xr1, sp, 13*SZREG + 1*SZXREG >> + xvst xr2, sp, 13*SZREG + 2*SZXREG >> + xvst xr3, sp, 13*SZREG + 3*SZXREG >> + xvst xr4, sp, 13*SZREG + 4*SZXREG >> + xvst xr5, sp, 13*SZREG + 5*SZXREG >> + xvst xr6, sp, 13*SZREG + 6*SZXREG >> + xvst xr7, sp, 13*SZREG + 7*SZXREG >> + xvst xr8, sp, 13*SZREG + 8*SZXREG >> + xvst xr9, sp, 13*SZREG + 9*SZXREG >> + xvst xr10, sp, 13*SZREG + 10*SZXREG >> + xvst xr11, sp, 13*SZREG + 11*SZXREG >> + xvst xr12, sp, 13*SZREG + 12*SZXREG >> + xvst xr13, sp, 13*SZREG + 13*SZXREG >> + xvst xr14, sp, 13*SZREG + 14*SZXREG >> + xvst xr15, sp, 13*SZREG + 15*SZXREG >> + xvst xr16, sp, 13*SZREG + 16*SZXREG >> + xvst xr17, sp, 13*SZREG + 17*SZXREG >> + xvst xr18, sp, 13*SZREG + 18*SZXREG >> + xvst xr19, sp, 13*SZREG + 19*SZXREG >> + xvst xr20, sp, 13*SZREG + 20*SZXREG >> + xvst xr21, sp, 13*SZREG + 21*SZXREG >> + xvst xr22, sp, 13*SZREG + 22*SZXREG >> + xvst xr23, sp, 13*SZREG + 23*SZXREG >> + xvst xr24, sp, 13*SZREG + 24*SZXREG >> + xvst xr25, sp, 13*SZREG + 25*SZXREG >> + xvst xr26, sp, 13*SZREG + 26*SZXREG >> + xvst xr27, sp, 13*SZREG + 27*SZXREG >> + xvst xr28, sp, 13*SZREG + 28*SZXREG >> + xvst xr29, sp, 13*SZREG + 29*SZXREG >> + xvst xr30, sp, 13*SZREG + 30*SZXREG >> + xvst xr31, sp, 13*SZREG + 31*SZXREG >> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >> + # some fields in fcsr0 >> + movfcsr2gr t0, fcsr0 >> + REG_S t0, sp, 32*SZXREG >> +#elif defined USE_LSX >> + vst vr0, sp, 13*SZREG + 0*SZVREG >> + vst vr1, sp, 13*SZREG + 1*SZVREG >> + vst vr2, sp, 13*SZREG + 2*SZVREG >> + vst vr3, sp, 13*SZREG + 3*SZVREG >> + vst vr4, sp, 13*SZREG + 4*SZVREG >> + vst vr5, sp, 13*SZREG + 5*SZVREG >> + vst vr6, sp, 13*SZREG + 6*SZVREG >> + vst vr7, sp, 13*SZREG + 7*SZVREG >> + vst vr8, sp, 13*SZREG + 8*SZVREG >> + vst vr9, sp, 13*SZREG + 9*SZVREG >> + vst vr10, sp, 13*SZREG + 10*SZVREG >> + vst vr11, sp, 13*SZREG + 11*SZVREG >> + vst vr12, sp, 13*SZREG + 12*SZVREG >> + vst vr13, sp, 13*SZREG + 13*SZVREG >> + vst vr14, sp, 13*SZREG + 14*SZVREG >> + vst vr15, sp, 13*SZREG + 15*SZVREG >> + vst vr16, sp, 13*SZREG + 16*SZVREG >> + vst vr17, sp, 13*SZREG + 17*SZVREG >> + vst vr18, sp, 13*SZREG + 18*SZVREG >> + vst vr19, sp, 13*SZREG + 19*SZVREG >> + vst vr20, sp, 13*SZREG + 20*SZVREG >> + vst vr21, sp, 13*SZREG + 21*SZVREG >> + vst vr22, sp, 13*SZREG + 22*SZVREG >> + vst vr23, sp, 13*SZREG + 23*SZVREG >> + vst vr24, sp, 13*SZREG + 24*SZVREG >> + vst vr25, sp, 13*SZREG + 25*SZVREG >> + vst vr26, sp, 13*SZREG + 26*SZVREG >> + vst vr27, sp, 13*SZREG + 27*SZVREG >> + vst vr28, sp, 13*SZREG + 28*SZVREG >> + vst vr29, sp, 13*SZREG + 29*SZVREG >> + vst vr30, sp, 13*SZREG + 30*SZVREG >> + vst vr31, sp, 13*SZREG + 31*SZVREG >> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >> + # some fields in fcsr0 >> + movfcsr2gr t0, fcsr0 >> + REG_S t0, sp, 32*SZVREG >> +#elif !defined __loongarch_soft_float >> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG >> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG >> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG >> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG >> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG >> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG >> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG >> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG >> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG >> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG >> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG >> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG >> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG >> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG >> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG >> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG >> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG >> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG >> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG >> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG >> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG >> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG >> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG >> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG >> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >> + # some fields in fcsr0 >> + movfcsr2gr t0, fcsr0 >> + REG_S t0, sp, 24*SZFREG >> +#endif /* #ifdef USE_LASX */ >> + >> + bl __tls_get_addr >> + ADDI a0, a0, -TLS_DTV_OFFSET >> + >> + REG_L ra, sp, 0 >> + REG_L a1, sp, 1 * 8 >> + REG_L a2, sp, 2 * 8 >> + REG_L a3, sp, 3 * 8 >> + REG_L a4, sp, 4 * 8 >> + REG_L a5, sp, 5 * 8 >> + REG_L a6, sp, 6 * 8 >> + REG_L a7, sp, 7 * 8 >> + REG_L t4, sp, 8 * 8 >> + REG_L t5, sp, 9 * 8 >> + REG_L t6, sp, 10 * 8 >> + REG_L t7, sp, 11 * 8 >> + REG_L t8, sp, 12 * 8 >> + >> +#ifdef USE_LASX >> + xvld xr0, sp, 13*SZREG + 0*SZXREG >> + xvld xr1, sp, 13*SZREG + 1*SZXREG >> + xvld xr2, sp, 13*SZREG + 2*SZXREG >> + xvld xr3, sp, 13*SZREG + 3*SZXREG >> + xvld xr4, sp, 13*SZREG + 4*SZXREG >> + xvld xr5, sp, 13*SZREG + 5*SZXREG >> + xvld xr6, sp, 13*SZREG + 6*SZXREG >> + xvld xr7, sp, 13*SZREG + 7*SZXREG >> + xvld xr8, sp, 13*SZREG + 8*SZXREG >> + xvld xr9, sp, 13*SZREG + 9*SZXREG >> + xvld xr10, sp, 13*SZREG + 10*SZXREG >> + xvld xr11, sp, 13*SZREG + 11*SZXREG >> + xvld xr12, sp, 13*SZREG + 12*SZXREG >> + xvld xr13, sp, 13*SZREG + 13*SZXREG >> + xvld xr14, sp, 13*SZREG + 14*SZXREG >> + xvld xr15, sp, 13*SZREG + 15*SZXREG >> + xvld xr16, sp, 13*SZREG + 16*SZXREG >> + xvld xr17, sp, 13*SZREG + 17*SZXREG >> + xvld xr18, sp, 13*SZREG + 18*SZXREG >> + xvld xr19, sp, 13*SZREG + 19*SZXREG >> + xvld xr20, sp, 13*SZREG + 20*SZXREG >> + xvld xr21, sp, 13*SZREG + 21*SZXREG >> + xvld xr22, sp, 13*SZREG + 22*SZXREG >> + xvld xr23, sp, 13*SZREG + 23*SZXREG >> + xvld xr24, sp, 13*SZREG + 24*SZXREG >> + xvld xr25, sp, 13*SZREG + 25*SZXREG >> + xvld xr26, sp, 13*SZREG + 26*SZXREG >> + xvld xr27, sp, 13*SZREG + 27*SZXREG >> + xvld xr28, sp, 13*SZREG + 28*SZXREG >> + xvld xr29, sp, 13*SZREG + 29*SZXREG >> + xvld xr30, sp, 13*SZREG + 30*SZXREG >> + xvld xr31, sp, 13*SZREG + 31*SZXREG >> + REG_L t0, sp, 32*SZXREG >> + movgr2fcsr fcsr0, t0 >> +#elif defined USE_LSX >> + vld vr0, sp, 13*SZREG + 0*SZVREG >> + vld vr1, sp, 13*SZREG + 1*SZVREG >> + vld vr2, sp, 13*SZREG + 2*SZVREG >> + vld vr3, sp, 13*SZREG + 3*SZVREG >> + vld vr4, sp, 13*SZREG + 4*SZVREG >> + vld vr5, sp, 13*SZREG + 5*SZVREG >> + vld vr6, sp, 13*SZREG + 6*SZVREG >> + vld vr7, sp, 13*SZREG + 7*SZVREG >> + vld vr8, sp, 13*SZREG + 8*SZVREG >> + vld vr9, sp, 13*SZREG + 9*SZVREG >> + vld vr10, sp, 13*SZREG + 10*SZVREG >> + vld vr11, sp, 13*SZREG + 11*SZVREG >> + vld vr12, sp, 13*SZREG + 12*SZVREG >> + vld vr13, sp, 13*SZREG + 13*SZVREG >> + vld vr14, sp, 13*SZREG + 14*SZVREG >> + vld vr15, sp, 13*SZREG + 15*SZVREG >> + vld vr16, sp, 13*SZREG + 16*SZVREG >> + vld vr17, sp, 13*SZREG + 17*SZVREG >> + vld vr18, sp, 13*SZREG + 18*SZVREG >> + vld vr19, sp, 13*SZREG + 19*SZVREG >> + vld vr20, sp, 13*SZREG + 20*SZVREG >> + vld vr21, sp, 13*SZREG + 21*SZVREG >> + vld vr22, sp, 13*SZREG + 22*SZVREG >> + vld vr23, sp, 13*SZREG + 23*SZVREG >> + vld vr24, sp, 13*SZREG + 24*SZVREG >> + vld vr25, sp, 13*SZREG + 25*SZVREG >> + vld vr26, sp, 13*SZREG + 26*SZVREG >> + vld vr27, sp, 13*SZREG + 27*SZVREG >> + vld vr28, sp, 13*SZREG + 28*SZVREG >> + vld vr29, sp, 13*SZREG + 29*SZVREG >> + vld vr30, sp, 13*SZREG + 30*SZVREG >> + vld vr31, sp, 13*SZREG + 31*SZVREG >> + REG_L t0, sp, 32*SZVREG >> + movgr2fcsr fcsr0, t0 >> +#elif !defined __loongarch_soft_float >> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG >> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG >> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG >> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG >> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG >> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG >> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG >> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG >> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG >> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG >> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG >> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG >> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG >> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG >> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG >> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG >> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG >> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG >> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG >> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG >> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG >> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG >> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG >> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG >> + REG_L t0, sp, 24*SZFREG >> + movgr2fcsr fcsr0, t0 >> +#endif /* #ifdef USE_LASX */ >> + >> + ADDI sp, sp, FRAME_SIZE >> + b Lret >> + cfi_endproc >> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >> +#endif /* #ifdef SHARED */ >> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S >> new file mode 100644 >> index 0000000000..4a17079169 >> --- /dev/null >> +++ b/sysdeps/loongarch/dl-tlsdesc.S >> @@ -0,0 +1,93 @@ >> +/* Thread-local storage handling in the ELF dynamic linker. >> + LoongArch version. >> + Copyright (C) 2011-2023 Free Software Foundation, Inc. > Update Copyright years to 2024. > >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#include <sysdep.h> >> +#include <tls.h> >> +#include "tlsdesc.h" >> + >> + .text >> + >> + /* Compute the thread pointer offset for symbols in the static >> + TLS block. The offset is the same for all threads. >> + Prototype: >> + _dl_tlsdesc_return (tlsdesc *); */ >> + .hidden _dl_tlsdesc_return >> + .global _dl_tlsdesc_return >> + .type _dl_tlsdesc_return,%function >> + cfi_startproc >> + .align 2 >> +_dl_tlsdesc_return: >> + REG_L a0, a0, 8 >> + RET >> + cfi_endproc >> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return >> + >> + /* Handler for undefined weak TLS symbols. >> + Prototype: >> + _dl_tlsdesc_undefweak (tlsdesc *); >> + >> + The second word of the descriptor contains the addend. >> + Return the addend minus the thread pointer. This ensures >> + that when the caller adds on the thread pointer it gets back >> + the addend. */ >> + .hidden _dl_tlsdesc_undefweak >> + .global _dl_tlsdesc_undefweak >> + .type _dl_tlsdesc_undefweak,%function >> + cfi_startproc >> + .align 2 >> +_dl_tlsdesc_undefweak: >> + REG_L a0, a0, 8 >> + sub.d a0, a0, tp >> + RET >> + cfi_endproc >> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak >> + >> + >> +#ifdef SHARED >> + >> +#if !defined __loongarch_soft_float >> + >> +#define USE_LASX >> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx >> +#define Lret Lret_lasx >> +#define Lslow Lslow_lasx >> +#include "dl-tlsdesc-dynamic.h" >> +#undef FRAME_SIZE >> +#undef USE_LASX >> +#undef _dl_tlsdesc_dynamic >> +#undef Lret >> +#undef Lslow >> + >> +#define USE_LSX >> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx >> +#define Lret Lret_lsx >> +#define Lslow Lslow_lsx >> +#include "dl-tlsdesc-dynamic.h" >> +#undef FRAME_SIZE >> +#undef USE_LSX >> +#undef _dl_tlsdesc_dynamic >> +#undef Lret >> +#undef Lslow >> + >> +#endif >> + >> +#include "dl-tlsdesc-dynamic.h" >> + >> +#endif /* #ifdef SHARED */ >> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h >> new file mode 100644 >> index 0000000000..988037a714 >> --- /dev/null >> +++ b/sysdeps/loongarch/dl-tlsdesc.h >> @@ -0,0 +1,53 @@ >> +/* Thread-local storage descriptor handling in the ELF dynamic linker. >> + LoongArch version. >> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#ifndef _DL_TLSDESC_H >> +#define _DL_TLSDESC_H >> + >> +#include <dl-tls.h> >> + >> +/* Type used to represent a TLS descriptor in the GOT. */ >> +struct tlsdesc >> +{ >> + ptrdiff_t (*entry) (struct tlsdesc *); >> + void *arg; >> +}; >> + >> +/* Type used as the argument in a TLS descriptor for a symbol that >> + needs dynamic TLS offsets. */ >> +struct tlsdesc_dynamic_arg >> +{ >> + tls_index tlsinfo; >> + size_t gen_count; >> +}; >> + >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); >> + >> +# ifdef SHARED >> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); >> +#if !defined __loongarch_soft_float > Minor style, usually for single tests we use '#ifndef' and add > attribute_hidden at the end of prototype. > >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); >> +#endif >> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); >> +#endif >> + >> +#endif >> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h >> index 4d8737ee7f..9b1773634c 100644 >> --- a/sysdeps/loongarch/linkmap.h >> +++ b/sysdeps/loongarch/linkmap.h >> @@ -19,4 +19,5 @@ >> struct link_map_machine >> { >> ElfW (Addr) plt; /* Address of .plt. */ >> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ >> }; >> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h >> index 51521a7eb4..23c1d12914 100644 >> --- a/sysdeps/loongarch/sys/asm.h >> +++ b/sysdeps/loongarch/sys/asm.h >> @@ -25,6 +25,7 @@ >> /* Macros to handle different pointer/register sizes for 32/64-bit code. */ >> #define SZREG 8 >> #define SZFREG 8 >> +#define SZFCSREG 4 >> #define SZVREG 16 >> #define SZXREG 32 >> #define REG_L ld.d >> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h >> index f61ee25b25..80ce3e9c00 100644 >> --- a/sysdeps/loongarch/sys/regdef.h >> +++ b/sysdeps/loongarch/sys/regdef.h >> @@ -97,6 +97,7 @@ >> #define fcc5 $fcc5 >> #define fcc6 $fcc6 >> #define fcc7 $fcc7 >> +#define fcsr0 $fcsr0 >> >> #define vr0 $vr0 >> #define vr1 $vr1 >> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c >> new file mode 100644 >> index 0000000000..a357e7619f >> --- /dev/null >> +++ b/sysdeps/loongarch/tlsdesc.c >> @@ -0,0 +1,39 @@ >> +/* Manage TLS descriptors. AArch64 version. >> + >> + Copyright (C) 2011-2023 Free Software Foundation, Inc. > Update Copyright years to 2024 and remove the 'AArch64'. > > >> + >> + This file is part of the GNU C Library. >> + >> + The GNU C Library is free software; you can redistribute it and/or >> + modify it under the terms of the GNU Lesser General Public >> + License as published by the Free Software Foundation; either >> + version 2.1 of the License, or (at your option) any later version. >> + >> + The GNU C Library is distributed in the hope that it will be useful, >> + but WITHOUT ANY WARRANTY; without even the implied warranty of >> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + Lesser General Public License for more details. >> + >> + You should have received a copy of the GNU Lesser General Public >> + License along with the GNU C Library; if not, see >> + <https://www.gnu.org/licenses/>. */ >> + >> +#include <ldsodefs.h> >> +#include <tls.h> >> +#include <dl-tlsdesc.h> >> +#include <dl-unmap-segments.h> >> +#include <tlsdeschtab.h> >> + >> +/* Unmap the dynamic object, but also release its TLS descriptor table >> + if there is one. */ >> + >> +void >> +_dl_unmap (struct link_map *map) >> +{ >> + _dl_unmap_segments (map); >> + >> +#ifdef SHARED >> + if (map->l_mach.tlsdesc_table) >> + htab_delete (map->l_mach.tlsdesc_table); >> +#endif >> +} >> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym >> new file mode 100644 >> index 0000000000..bcab218631 >> --- /dev/null >> +++ b/sysdeps/loongarch/tlsdesc.sym >> @@ -0,0 +1,19 @@ >> +#include <stddef.h> >> +#include <sysdep.h> >> +#include <tls.h> >> +#include <link.h> >> +#include <dl-tlsdesc.h> >> + >> +-- >> + >> +-- Abuse tls.h macros to derive offsets relative to the thread register. >> + >> +TLSDESC_ARG offsetof(struct tlsdesc, arg) >> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) >> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) >> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) >> +TCBHEAD_DTV offsetof(tcbhead_t, dtv) >> +DTV_COUNTER offsetof(dtv_t, counter) >> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED >> +TLS_DTV_OFFSET TLS_DTV_OFFSET >> +SIZE_OF_DTV sizeof(tcbhead_t) >> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data >> index 547b1c1b7f..ec32e6d13f 100644 >> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data >> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data >> @@ -5,3 +5,5 @@ libc.so: calloc >> libc.so: free >> libc.so: malloc >> libc.so: realloc >> +# The dynamic loader needs __tls_get_addr for TLS. >> +ld.so: __tls_get_addr
On 08/03/24 04:53, mengqinggang wrote: > Thanks a lot for the review! A new v3 version patch has been sent. > https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html > > > And some reply below. > > From the gcc enablement patch it seems that you are using the aarch64 ABI naming, -mtls-dialect={desc,trad}. So I would suggest to check if there is no regression with my patch to enable TLS descriptor for -mtls=desc [1]. You might also extend the testing to check for possible vector register wrong save/restore, as I did to check for arm32 ones [2] (check the sysdeps/arm/tst-gnu2-tls2.h). Also, I think this patch should be pushed only after gcc enablement is installed. [1] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-4-adhemerval.zanella@linaro.org/ [2] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-3-adhemerval.zanella@linaro.org/ > 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道: >> >> On 28/02/24 22:43, mengqinggang wrote: >>> This is mostly based on AArch64 and RISC-V implementation. >>> >>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. >>> >>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore >>> all vector registers. >>> --- >>> Changes v1 -> v2: >>> - Fix vr24-vr31, xr24-xr31 typo. >>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. >>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic. >>> >>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html >> Patch looks ok, some comments below. >> >>> elf/elf.h | 2 + >>> sysdeps/loongarch/Makefile | 6 + >>> sysdeps/loongarch/dl-link.sym | 1 + >>> sysdeps/loongarch/dl-machine.h | 60 ++- >>> sysdeps/loongarch/dl-tls.h | 9 +- >>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++ >>> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++ >>> sysdeps/loongarch/dl-tlsdesc.h | 53 +++ >>> sysdeps/loongarch/linkmap.h | 1 + >>> sysdeps/loongarch/sys/asm.h | 1 + >>> sysdeps/loongarch/sys/regdef.h | 1 + >>> sysdeps/loongarch/tlsdesc.c | 39 ++ >>> sysdeps/loongarch/tlsdesc.sym | 19 + >>> .../unix/sysv/linux/loongarch/localplt.data | 2 + >>> 14 files changed, 625 insertions(+), 3 deletions(-) >>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h >>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S >>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h >>> create mode 100644 sysdeps/loongarch/tlsdesc.c >>> create mode 100644 sysdeps/loongarch/tlsdesc.sym >>> >>> diff --git a/elf/elf.h b/elf/elf.h >>> index f2206e5c06..eec24ea049 100644 >>> --- a/elf/elf.h >>> +++ b/elf/elf.h >>> @@ -4237,6 +4237,8 @@ enum >>> #define R_LARCH_TLS_TPREL32 10 >>> #define R_LARCH_TLS_TPREL64 11 >>> #define R_LARCH_IRELATIVE 12 >>> +#define R_LARCH_TLS_DESC32 13 >>> +#define R_LARCH_TLS_DESC64 14 >>> /* Reserved for future relocs that the dynamic linker must understand. */ >>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile >>> index 43d2f583cd..181389e787 100644 >>> --- a/sysdeps/loongarch/Makefile >>> +++ b/sysdeps/loongarch/Makefile >>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h >>> endif >>> ifeq ($(subdir),elf) >>> +sysdep-dl-routines += tlsdesc dl-tlsdesc >>> gen-as-const-headers += dl-link.sym >>> endif >>> +ifeq ($(subdir),csu) >>> +gen-as-const-headers += tlsdesc.sym >>> +endif >>> + >>> + >>> # LoongArch's assembler also needs to know about PIC as it changes the >>> # definition of some assembler macros. >>> ASFLAGS-.os += $(pic-ccflag) >>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym >>> index b534968e30..fd81ef37d5 100644 >>> --- a/sysdeps/loongarch/dl-link.sym >>> +++ b/sysdeps/loongarch/dl-link.sym >>> @@ -1,6 +1,7 @@ >>> #include <stddef.h> >>> #include <sysdep.h> >>> #include <link.h> >>> +#include <dl-tlsdesc.h> >>> DL_SIZEOF_RG sizeof(struct La_loongarch_regs) >>> DL_SIZEOF_RV sizeof(struct La_loongarch_retval) >>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h >>> index ab81b82d95..8ca6c224f6 100644 >>> --- a/sysdeps/loongarch/dl-machine.h >>> +++ b/sysdeps/loongarch/dl-machine.h >>> @@ -25,7 +25,7 @@ >>> #include <entry.h> >>> #include <elf/elf.h> >>> #include <sys/asm.h> >>> -#include <dl-tls.h> >>> +#include <dl-tlsdesc.h> >>> #include <dl-static-tls.h> >>> #include <dl-machine-rel.h> >>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], >>> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; >>> break; >>> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: >>> + { >>> + struct tlsdesc volatile *td = >>> + (struct tlsdesc volatile *)addr_field; >>> + if (! sym) >>> + { >>> + td->arg = (void*)reloc->r_addend; >>> + td->entry = _dl_tlsdesc_undefweak; >>> + } >>> + else >>> + { >>> +# ifndef SHARED >>> + CHECK_STATIC_TLS (map, sym_map); >>> +# else >>> + if (!TRY_STATIC_TLS (map, sym_map)) >>> + { >>> + td->arg = _dl_make_tlsdesc_dynamic >>> + (sym_map, sym->st_value + reloc->r_addend); >>> +# if !defined __loongarch_soft_float >>> + if (SUPPORT_LASX) >>> + td->entry = _dl_tlsdesc_dynamic_lasx; >>> + else >>> + if (SUPPORT_LSX) >>> + td->entry = _dl_tlsdesc_dynamic_lsx; >>> + else >>> +# endif >>> + td->entry = _dl_tlsdesc_dynamic; >>> + } >>> + else >>> +# endif >>> + { >>> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) >>> + + reloc->r_addend); >>> + td->entry = _dl_tlsdesc_return; >>> + } >>> + } >>> + break; >>> + } >>> + >>> case R_LARCH_COPY: >>> { >>> if (sym == NULL) >>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], >>> else >>> *reloc_addr = map->l_mach.plt; >>> } >>> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) >> Use __glibc_likely here. >> >>> + { >>> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); >>> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); >>> + const ElfW (Sym) *sym = &symtab[symndx]; >>> + const struct r_found_version *version = NULL; >>> + >>> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) >>> + { >>> + const ElfW (Half) *vernum = >>> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); >>> + version = &map->l_versions[vernum[symndx] & 0x7fff]; >>> + } >>> + >>> + /* Always initialize TLS descriptors completely, because lazy >>> + initialization requires synchronization at every TLS access. */ >>> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, >>> + skip_ifunc); >>> + } >>> else >>> _dl_reloc_bad_type (map, r_type, 1); >>> } >>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h >>> index 29924b866d..de593c002d 100644 >>> --- a/sysdeps/loongarch/dl-tls.h >>> +++ b/sysdeps/loongarch/dl-tls.h >>> @@ -16,6 +16,9 @@ >>> License along with the GNU C Library. If not, see >>> <https://www.gnu.org/licenses/>. */ >>> +#ifndef _DL_TLS_H >>> +#define _DL_TLS_H >>> + >>> /* Type used for the representation of TLS information in the GOT. */ >>> typedef struct >>> { >>> @@ -23,6 +26,8 @@ typedef struct >>> unsigned long int ti_offset; >>> } tls_index; >>> +extern void *__tls_get_addr (tls_index *ti); >>> + >>> /* The thread pointer points to the first static TLS block. */ >>> #define TLS_TP_OFFSET 0 >>> @@ -37,10 +42,10 @@ typedef struct >>> /* Compute the value for a DTPREL reloc. */ >>> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) >>> -extern void *__tls_get_addr (tls_index *ti); >>> - >> Why move the function prototype? > > > Maybe just want to take it out of a bunch of macros. > > >> >>> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) >>> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) >>> /* Value used for dtv entries for which the allocation is delayed. */ >>> #define TLS_DTV_UNALLOCATED ((void *) -1l) >>> + >>> +#endif >>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >>> new file mode 100644 >>> index 0000000000..0d8c9bb991 >>> --- /dev/null >>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >>> @@ -0,0 +1,341 @@ >>> +/* Thread-local storage handling in the ELF dynamic linker. >>> + LoongArch version. >>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >> Update Copyright years to 2024. >> >>> + >>> + This file is part of the GNU C Library. >>> + >>> + The GNU C Library is free software; you can redistribute it and/or >>> + modify it under the terms of the GNU Lesser General Public >>> + License as published by the Free Software Foundation; either >>> + version 2.1 of the License, or (at your option) any later version. >>> + >>> + The GNU C Library is distributed in the hope that it will be useful, >>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> + Lesser General Public License for more details. >>> + >>> + You should have received a copy of the GNU Lesser General Public >>> + License along with the GNU C Library; if not, see >>> + <https://www.gnu.org/licenses/>. */ >>> + >>> +#ifdef USE_LASX >>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) >>> +#elif defined USE_LSX >>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) >>> +#elif !defined __loongarch_soft_float >>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) >>> +#else >>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) >>> +#endif >> I don't have a strong opinion, but another option that might be simpler it >> to provide only only one _dl_tlsdesc_dynamic implementation and check the >> required save/restore of vector register based on hwcap value. > > > The v3 patch provides only one _dl_tlsdesc_dynamic implementation. > > >>> + >>> +#ifdef SHARED >>> + /* Handler for dynamic TLS symbols. >>> + Prototype: >>> + _dl_tlsdesc_dynamic (tlsdesc *) ; >>> + >>> + The second word of the descriptor points to a >>> + tlsdesc_dynamic_arg structure. >>> + >>> + Returns the offset between the thread pointer and the >>> + object referenced by the argument. >>> + >>> + ptrdiff_t >>> + __attribute__ ((__regparm__ (1))) >> Does this attribute really make sense for loongarch? > > > This line has been deleted. > > >> >>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >>> + { >>> + struct tlsdesc_dynamic_arg *td = tdp->arg; >>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); >>> + if (__builtin_expect (td->gen_count <= dtv[0].counter >> Use __glibc_unlikely or just remove the __builtin_expect for clarity. >> >>> + && (dtv[td->tlsinfo.ti_module].pointer.val >>> + != TLS_DTV_UNALLOCATED), >>> + 1)) >>> + return dtv[td->tlsinfo.ti_module].pointer.val >>> + + td->tlsinfo.ti_offset >>> + - __thread_pointer; >>> + >>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >>> + } >>> + */ >>> + .hidden _dl_tlsdesc_dynamic >>> + .global _dl_tlsdesc_dynamic >>> + .type _dl_tlsdesc_dynamic,%function >>> + cfi_startproc >>> + .align 2 >>> +_dl_tlsdesc_dynamic: >>> + /* Save just enough registers to support fast path, if we fall >>> + into slow path we will save additional registers. */ >>> + ADDI sp, sp,-24 >>> + REG_S t0, sp, 0 >>> + REG_S t1, sp, 8 >>> + REG_S t2, sp, 16 >>> + >>> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start >>> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg >>> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count >>> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter >>> + bltu t2, t1, Lslow >>> + >>> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module >>> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ >>> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) >>> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val >>> + li.d t2, TLS_DTV_UNALLOCATED >>> + beq t1, t2, Lslow >>> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset >>> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset >>> + add.d a0, t1, t2 >>> +Lret: >>> + sub.d a0, a0, tp >>> + REG_L t0, sp, 0 >>> + REG_L t1, sp, 8 >>> + REG_L t2, sp, 16 >>> + ADDI sp, sp, 24 >>> + RET >>> + >>> +Lslow: >>> + /* This is the slow path. We need to call __tls_get_addr() which >>> + means we need to save and restore all the register that the >>> + callee will trash. */ >>> + >>> + /* Save the remaining registers that we must treat as caller save. */ >>> + ADDI sp, sp, -FRAME_SIZE >>> + REG_S ra, sp, 0 * SZREG >>> + REG_S a1, sp, 1 * SZREG >>> + REG_S a2, sp, 2 * SZREG >>> + REG_S a3, sp, 3 * SZREG >>> + REG_S a4, sp, 4 * SZREG >>> + REG_S a5, sp, 5 * SZREG >>> + REG_S a6, sp, 6 * SZREG >>> + REG_S a7, sp, 7 * SZREG >>> + REG_S t4, sp, 8 * SZREG >>> + REG_S t5, sp, 9 * SZREG >>> + REG_S t6, sp, 10 * SZREG >>> + REG_S t7, sp, 11 * SZREG >>> + REG_S t8, sp, 12 * SZREG >>> + >>> +#ifdef USE_LASX >>> + xvst xr0, sp, 13*SZREG + 0*SZXREG >>> + xvst xr1, sp, 13*SZREG + 1*SZXREG >>> + xvst xr2, sp, 13*SZREG + 2*SZXREG >>> + xvst xr3, sp, 13*SZREG + 3*SZXREG >>> + xvst xr4, sp, 13*SZREG + 4*SZXREG >>> + xvst xr5, sp, 13*SZREG + 5*SZXREG >>> + xvst xr6, sp, 13*SZREG + 6*SZXREG >>> + xvst xr7, sp, 13*SZREG + 7*SZXREG >>> + xvst xr8, sp, 13*SZREG + 8*SZXREG >>> + xvst xr9, sp, 13*SZREG + 9*SZXREG >>> + xvst xr10, sp, 13*SZREG + 10*SZXREG >>> + xvst xr11, sp, 13*SZREG + 11*SZXREG >>> + xvst xr12, sp, 13*SZREG + 12*SZXREG >>> + xvst xr13, sp, 13*SZREG + 13*SZXREG >>> + xvst xr14, sp, 13*SZREG + 14*SZXREG >>> + xvst xr15, sp, 13*SZREG + 15*SZXREG >>> + xvst xr16, sp, 13*SZREG + 16*SZXREG >>> + xvst xr17, sp, 13*SZREG + 17*SZXREG >>> + xvst xr18, sp, 13*SZREG + 18*SZXREG >>> + xvst xr19, sp, 13*SZREG + 19*SZXREG >>> + xvst xr20, sp, 13*SZREG + 20*SZXREG >>> + xvst xr21, sp, 13*SZREG + 21*SZXREG >>> + xvst xr22, sp, 13*SZREG + 22*SZXREG >>> + xvst xr23, sp, 13*SZREG + 23*SZXREG >>> + xvst xr24, sp, 13*SZREG + 24*SZXREG >>> + xvst xr25, sp, 13*SZREG + 25*SZXREG >>> + xvst xr26, sp, 13*SZREG + 26*SZXREG >>> + xvst xr27, sp, 13*SZREG + 27*SZXREG >>> + xvst xr28, sp, 13*SZREG + 28*SZXREG >>> + xvst xr29, sp, 13*SZREG + 29*SZXREG >>> + xvst xr30, sp, 13*SZREG + 30*SZXREG >>> + xvst xr31, sp, 13*SZREG + 31*SZXREG >>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >>> + # some fields in fcsr0 >>> + movfcsr2gr t0, fcsr0 >>> + REG_S t0, sp, 32*SZXREG >>> +#elif defined USE_LSX >>> + vst vr0, sp, 13*SZREG + 0*SZVREG >>> + vst vr1, sp, 13*SZREG + 1*SZVREG >>> + vst vr2, sp, 13*SZREG + 2*SZVREG >>> + vst vr3, sp, 13*SZREG + 3*SZVREG >>> + vst vr4, sp, 13*SZREG + 4*SZVREG >>> + vst vr5, sp, 13*SZREG + 5*SZVREG >>> + vst vr6, sp, 13*SZREG + 6*SZVREG >>> + vst vr7, sp, 13*SZREG + 7*SZVREG >>> + vst vr8, sp, 13*SZREG + 8*SZVREG >>> + vst vr9, sp, 13*SZREG + 9*SZVREG >>> + vst vr10, sp, 13*SZREG + 10*SZVREG >>> + vst vr11, sp, 13*SZREG + 11*SZVREG >>> + vst vr12, sp, 13*SZREG + 12*SZVREG >>> + vst vr13, sp, 13*SZREG + 13*SZVREG >>> + vst vr14, sp, 13*SZREG + 14*SZVREG >>> + vst vr15, sp, 13*SZREG + 15*SZVREG >>> + vst vr16, sp, 13*SZREG + 16*SZVREG >>> + vst vr17, sp, 13*SZREG + 17*SZVREG >>> + vst vr18, sp, 13*SZREG + 18*SZVREG >>> + vst vr19, sp, 13*SZREG + 19*SZVREG >>> + vst vr20, sp, 13*SZREG + 20*SZVREG >>> + vst vr21, sp, 13*SZREG + 21*SZVREG >>> + vst vr22, sp, 13*SZREG + 22*SZVREG >>> + vst vr23, sp, 13*SZREG + 23*SZVREG >>> + vst vr24, sp, 13*SZREG + 24*SZVREG >>> + vst vr25, sp, 13*SZREG + 25*SZVREG >>> + vst vr26, sp, 13*SZREG + 26*SZVREG >>> + vst vr27, sp, 13*SZREG + 27*SZVREG >>> + vst vr28, sp, 13*SZREG + 28*SZVREG >>> + vst vr29, sp, 13*SZREG + 29*SZVREG >>> + vst vr30, sp, 13*SZREG + 30*SZVREG >>> + vst vr31, sp, 13*SZREG + 31*SZVREG >>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >>> + # some fields in fcsr0 >>> + movfcsr2gr t0, fcsr0 >>> + REG_S t0, sp, 32*SZVREG >>> +#elif !defined __loongarch_soft_float >>> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG >>> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG >>> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG >>> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG >>> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG >>> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG >>> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG >>> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG >>> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG >>> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG >>> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG >>> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG >>> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG >>> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG >>> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG >>> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG >>> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG >>> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG >>> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG >>> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG >>> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG >>> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG >>> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG >>> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG >>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >>> + # some fields in fcsr0 >>> + movfcsr2gr t0, fcsr0 >>> + REG_S t0, sp, 24*SZFREG >>> +#endif /* #ifdef USE_LASX */ >>> + >>> + bl __tls_get_addr >>> + ADDI a0, a0, -TLS_DTV_OFFSET >>> + >>> + REG_L ra, sp, 0 >>> + REG_L a1, sp, 1 * 8 >>> + REG_L a2, sp, 2 * 8 >>> + REG_L a3, sp, 3 * 8 >>> + REG_L a4, sp, 4 * 8 >>> + REG_L a5, sp, 5 * 8 >>> + REG_L a6, sp, 6 * 8 >>> + REG_L a7, sp, 7 * 8 >>> + REG_L t4, sp, 8 * 8 >>> + REG_L t5, sp, 9 * 8 >>> + REG_L t6, sp, 10 * 8 >>> + REG_L t7, sp, 11 * 8 >>> + REG_L t8, sp, 12 * 8 >>> + >>> +#ifdef USE_LASX >>> + xvld xr0, sp, 13*SZREG + 0*SZXREG >>> + xvld xr1, sp, 13*SZREG + 1*SZXREG >>> + xvld xr2, sp, 13*SZREG + 2*SZXREG >>> + xvld xr3, sp, 13*SZREG + 3*SZXREG >>> + xvld xr4, sp, 13*SZREG + 4*SZXREG >>> + xvld xr5, sp, 13*SZREG + 5*SZXREG >>> + xvld xr6, sp, 13*SZREG + 6*SZXREG >>> + xvld xr7, sp, 13*SZREG + 7*SZXREG >>> + xvld xr8, sp, 13*SZREG + 8*SZXREG >>> + xvld xr9, sp, 13*SZREG + 9*SZXREG >>> + xvld xr10, sp, 13*SZREG + 10*SZXREG >>> + xvld xr11, sp, 13*SZREG + 11*SZXREG >>> + xvld xr12, sp, 13*SZREG + 12*SZXREG >>> + xvld xr13, sp, 13*SZREG + 13*SZXREG >>> + xvld xr14, sp, 13*SZREG + 14*SZXREG >>> + xvld xr15, sp, 13*SZREG + 15*SZXREG >>> + xvld xr16, sp, 13*SZREG + 16*SZXREG >>> + xvld xr17, sp, 13*SZREG + 17*SZXREG >>> + xvld xr18, sp, 13*SZREG + 18*SZXREG >>> + xvld xr19, sp, 13*SZREG + 19*SZXREG >>> + xvld xr20, sp, 13*SZREG + 20*SZXREG >>> + xvld xr21, sp, 13*SZREG + 21*SZXREG >>> + xvld xr22, sp, 13*SZREG + 22*SZXREG >>> + xvld xr23, sp, 13*SZREG + 23*SZXREG >>> + xvld xr24, sp, 13*SZREG + 24*SZXREG >>> + xvld xr25, sp, 13*SZREG + 25*SZXREG >>> + xvld xr26, sp, 13*SZREG + 26*SZXREG >>> + xvld xr27, sp, 13*SZREG + 27*SZXREG >>> + xvld xr28, sp, 13*SZREG + 28*SZXREG >>> + xvld xr29, sp, 13*SZREG + 29*SZXREG >>> + xvld xr30, sp, 13*SZREG + 30*SZXREG >>> + xvld xr31, sp, 13*SZREG + 31*SZXREG >>> + REG_L t0, sp, 32*SZXREG >>> + movgr2fcsr fcsr0, t0 >>> +#elif defined USE_LSX >>> + vld vr0, sp, 13*SZREG + 0*SZVREG >>> + vld vr1, sp, 13*SZREG + 1*SZVREG >>> + vld vr2, sp, 13*SZREG + 2*SZVREG >>> + vld vr3, sp, 13*SZREG + 3*SZVREG >>> + vld vr4, sp, 13*SZREG + 4*SZVREG >>> + vld vr5, sp, 13*SZREG + 5*SZVREG >>> + vld vr6, sp, 13*SZREG + 6*SZVREG >>> + vld vr7, sp, 13*SZREG + 7*SZVREG >>> + vld vr8, sp, 13*SZREG + 8*SZVREG >>> + vld vr9, sp, 13*SZREG + 9*SZVREG >>> + vld vr10, sp, 13*SZREG + 10*SZVREG >>> + vld vr11, sp, 13*SZREG + 11*SZVREG >>> + vld vr12, sp, 13*SZREG + 12*SZVREG >>> + vld vr13, sp, 13*SZREG + 13*SZVREG >>> + vld vr14, sp, 13*SZREG + 14*SZVREG >>> + vld vr15, sp, 13*SZREG + 15*SZVREG >>> + vld vr16, sp, 13*SZREG + 16*SZVREG >>> + vld vr17, sp, 13*SZREG + 17*SZVREG >>> + vld vr18, sp, 13*SZREG + 18*SZVREG >>> + vld vr19, sp, 13*SZREG + 19*SZVREG >>> + vld vr20, sp, 13*SZREG + 20*SZVREG >>> + vld vr21, sp, 13*SZREG + 21*SZVREG >>> + vld vr22, sp, 13*SZREG + 22*SZVREG >>> + vld vr23, sp, 13*SZREG + 23*SZVREG >>> + vld vr24, sp, 13*SZREG + 24*SZVREG >>> + vld vr25, sp, 13*SZREG + 25*SZVREG >>> + vld vr26, sp, 13*SZREG + 26*SZVREG >>> + vld vr27, sp, 13*SZREG + 27*SZVREG >>> + vld vr28, sp, 13*SZREG + 28*SZVREG >>> + vld vr29, sp, 13*SZREG + 29*SZVREG >>> + vld vr30, sp, 13*SZREG + 30*SZVREG >>> + vld vr31, sp, 13*SZREG + 31*SZVREG >>> + REG_L t0, sp, 32*SZVREG >>> + movgr2fcsr fcsr0, t0 >>> +#elif !defined __loongarch_soft_float >>> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG >>> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG >>> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG >>> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG >>> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG >>> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG >>> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG >>> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG >>> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG >>> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG >>> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG >>> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG >>> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG >>> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG >>> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG >>> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG >>> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG >>> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG >>> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG >>> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG >>> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG >>> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG >>> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG >>> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG >>> + REG_L t0, sp, 24*SZFREG >>> + movgr2fcsr fcsr0, t0 >>> +#endif /* #ifdef USE_LASX */ >>> + >>> + ADDI sp, sp, FRAME_SIZE >>> + b Lret >>> + cfi_endproc >>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >>> +#endif /* #ifdef SHARED */ >>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S >>> new file mode 100644 >>> index 0000000000..4a17079169 >>> --- /dev/null >>> +++ b/sysdeps/loongarch/dl-tlsdesc.S >>> @@ -0,0 +1,93 @@ >>> +/* Thread-local storage handling in the ELF dynamic linker. >>> + LoongArch version. >>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >> Update Copyright years to 2024. >> >>> + >>> + This file is part of the GNU C Library. >>> + >>> + The GNU C Library is free software; you can redistribute it and/or >>> + modify it under the terms of the GNU Lesser General Public >>> + License as published by the Free Software Foundation; either >>> + version 2.1 of the License, or (at your option) any later version. >>> + >>> + The GNU C Library is distributed in the hope that it will be useful, >>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> + Lesser General Public License for more details. >>> + >>> + You should have received a copy of the GNU Lesser General Public >>> + License along with the GNU C Library; if not, see >>> + <https://www.gnu.org/licenses/>. */ >>> + >>> +#include <sysdep.h> >>> +#include <tls.h> >>> +#include "tlsdesc.h" >>> + >>> + .text >>> + >>> + /* Compute the thread pointer offset for symbols in the static >>> + TLS block. The offset is the same for all threads. >>> + Prototype: >>> + _dl_tlsdesc_return (tlsdesc *); */ >>> + .hidden _dl_tlsdesc_return >>> + .global _dl_tlsdesc_return >>> + .type _dl_tlsdesc_return,%function >>> + cfi_startproc >>> + .align 2 >>> +_dl_tlsdesc_return: >>> + REG_L a0, a0, 8 >>> + RET >>> + cfi_endproc >>> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return >>> + >>> + /* Handler for undefined weak TLS symbols. >>> + Prototype: >>> + _dl_tlsdesc_undefweak (tlsdesc *); >>> + >>> + The second word of the descriptor contains the addend. >>> + Return the addend minus the thread pointer. This ensures >>> + that when the caller adds on the thread pointer it gets back >>> + the addend. */ >>> + .hidden _dl_tlsdesc_undefweak >>> + .global _dl_tlsdesc_undefweak >>> + .type _dl_tlsdesc_undefweak,%function >>> + cfi_startproc >>> + .align 2 >>> +_dl_tlsdesc_undefweak: >>> + REG_L a0, a0, 8 >>> + sub.d a0, a0, tp >>> + RET >>> + cfi_endproc >>> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak >>> + >>> + >>> +#ifdef SHARED >>> + >>> +#if !defined __loongarch_soft_float >>> + >>> +#define USE_LASX >>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx >>> +#define Lret Lret_lasx >>> +#define Lslow Lslow_lasx >>> +#include "dl-tlsdesc-dynamic.h" >>> +#undef FRAME_SIZE >>> +#undef USE_LASX >>> +#undef _dl_tlsdesc_dynamic >>> +#undef Lret >>> +#undef Lslow >>> + >>> +#define USE_LSX >>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx >>> +#define Lret Lret_lsx >>> +#define Lslow Lslow_lsx >>> +#include "dl-tlsdesc-dynamic.h" >>> +#undef FRAME_SIZE >>> +#undef USE_LSX >>> +#undef _dl_tlsdesc_dynamic >>> +#undef Lret >>> +#undef Lslow >>> + >>> +#endif >>> + >>> +#include "dl-tlsdesc-dynamic.h" >>> + >>> +#endif /* #ifdef SHARED */ >>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h >>> new file mode 100644 >>> index 0000000000..988037a714 >>> --- /dev/null >>> +++ b/sysdeps/loongarch/dl-tlsdesc.h >>> @@ -0,0 +1,53 @@ >>> +/* Thread-local storage descriptor handling in the ELF dynamic linker. >>> + LoongArch version. >>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >>> + >>> + This file is part of the GNU C Library. >>> + >>> + The GNU C Library is free software; you can redistribute it and/or >>> + modify it under the terms of the GNU Lesser General Public >>> + License as published by the Free Software Foundation; either >>> + version 2.1 of the License, or (at your option) any later version. >>> + >>> + The GNU C Library is distributed in the hope that it will be useful, >>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> + Lesser General Public License for more details. >>> + >>> + You should have received a copy of the GNU Lesser General Public >>> + License along with the GNU C Library; if not, see >>> + <https://www.gnu.org/licenses/>. */ >>> + >>> +#ifndef _DL_TLSDESC_H >>> +#define _DL_TLSDESC_H >>> + >>> +#include <dl-tls.h> >>> + >>> +/* Type used to represent a TLS descriptor in the GOT. */ >>> +struct tlsdesc >>> +{ >>> + ptrdiff_t (*entry) (struct tlsdesc *); >>> + void *arg; >>> +}; >>> + >>> +/* Type used as the argument in a TLS descriptor for a symbol that >>> + needs dynamic TLS offsets. */ >>> +struct tlsdesc_dynamic_arg >>> +{ >>> + tls_index tlsinfo; >>> + size_t gen_count; >>> +}; >>> + >>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); >>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); >>> + >>> +# ifdef SHARED >>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); >>> +#if !defined __loongarch_soft_float >> Minor style, usually for single tests we use '#ifndef' and add >> attribute_hidden at the end of prototype. >> >>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); >>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); >>> +#endif >>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); >>> +#endif >>> + >>> +#endif >>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h >>> index 4d8737ee7f..9b1773634c 100644 >>> --- a/sysdeps/loongarch/linkmap.h >>> +++ b/sysdeps/loongarch/linkmap.h >>> @@ -19,4 +19,5 @@ >>> struct link_map_machine >>> { >>> ElfW (Addr) plt; /* Address of .plt. */ >>> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ >>> }; >>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h >>> index 51521a7eb4..23c1d12914 100644 >>> --- a/sysdeps/loongarch/sys/asm.h >>> +++ b/sysdeps/loongarch/sys/asm.h >>> @@ -25,6 +25,7 @@ >>> /* Macros to handle different pointer/register sizes for 32/64-bit code. */ >>> #define SZREG 8 >>> #define SZFREG 8 >>> +#define SZFCSREG 4 >>> #define SZVREG 16 >>> #define SZXREG 32 >>> #define REG_L ld.d >>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h >>> index f61ee25b25..80ce3e9c00 100644 >>> --- a/sysdeps/loongarch/sys/regdef.h >>> +++ b/sysdeps/loongarch/sys/regdef.h >>> @@ -97,6 +97,7 @@ >>> #define fcc5 $fcc5 >>> #define fcc6 $fcc6 >>> #define fcc7 $fcc7 >>> +#define fcsr0 $fcsr0 >>> #define vr0 $vr0 >>> #define vr1 $vr1 >>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c >>> new file mode 100644 >>> index 0000000000..a357e7619f >>> --- /dev/null >>> +++ b/sysdeps/loongarch/tlsdesc.c >>> @@ -0,0 +1,39 @@ >>> +/* Manage TLS descriptors. AArch64 version. >>> + >>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >> Update Copyright years to 2024 and remove the 'AArch64'. >> >> >>> + >>> + This file is part of the GNU C Library. >>> + >>> + The GNU C Library is free software; you can redistribute it and/or >>> + modify it under the terms of the GNU Lesser General Public >>> + License as published by the Free Software Foundation; either >>> + version 2.1 of the License, or (at your option) any later version. >>> + >>> + The GNU C Library is distributed in the hope that it will be useful, >>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>> + Lesser General Public License for more details. >>> + >>> + You should have received a copy of the GNU Lesser General Public >>> + License along with the GNU C Library; if not, see >>> + <https://www.gnu.org/licenses/>. */ >>> + >>> +#include <ldsodefs.h> >>> +#include <tls.h> >>> +#include <dl-tlsdesc.h> >>> +#include <dl-unmap-segments.h> >>> +#include <tlsdeschtab.h> >>> + >>> +/* Unmap the dynamic object, but also release its TLS descriptor table >>> + if there is one. */ >>> + >>> +void >>> +_dl_unmap (struct link_map *map) >>> +{ >>> + _dl_unmap_segments (map); >>> + >>> +#ifdef SHARED >>> + if (map->l_mach.tlsdesc_table) >>> + htab_delete (map->l_mach.tlsdesc_table); >>> +#endif >>> +} >>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym >>> new file mode 100644 >>> index 0000000000..bcab218631 >>> --- /dev/null >>> +++ b/sysdeps/loongarch/tlsdesc.sym >>> @@ -0,0 +1,19 @@ >>> +#include <stddef.h> >>> +#include <sysdep.h> >>> +#include <tls.h> >>> +#include <link.h> >>> +#include <dl-tlsdesc.h> >>> + >>> +-- >>> + >>> +-- Abuse tls.h macros to derive offsets relative to the thread register. >>> + >>> +TLSDESC_ARG offsetof(struct tlsdesc, arg) >>> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) >>> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) >>> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) >>> +TCBHEAD_DTV offsetof(tcbhead_t, dtv) >>> +DTV_COUNTER offsetof(dtv_t, counter) >>> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED >>> +TLS_DTV_OFFSET TLS_DTV_OFFSET >>> +SIZE_OF_DTV sizeof(tcbhead_t) >>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data >>> index 547b1c1b7f..ec32e6d13f 100644 >>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data >>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data >>> @@ -5,3 +5,5 @@ libc.so: calloc >>> libc.so: free >>> libc.so: malloc >>> libc.so: realloc >>> +# The dynamic loader needs __tls_get_addr for TLS. >>> +ld.so: __tls_get_addr >
Thanks, I will first complete the gcc patch as soon as possible. 在 2024/3/8 下午10:10, Adhemerval Zanella Netto 写道: > > On 08/03/24 04:53, mengqinggang wrote: >> Thanks a lot for the review! A new v3 version patch has been sent. >> https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html >> >> >> And some reply below. >> >> > From the gcc enablement patch it seems that you are using the aarch64 > ABI naming, -mtls-dialect={desc,trad}. So I would suggest to check if > there is no regression with my patch to enable TLS descriptor for > -mtls=desc [1]. > > You might also extend the testing to check for possible vector register > wrong save/restore, as I did to check for arm32 ones [2] (check > the sysdeps/arm/tst-gnu2-tls2.h). > > Also, I think this patch should be pushed only after gcc enablement > is installed. > > > [1] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-4-adhemerval.zanella@linaro.org/ > [2] https://patchwork.sourceware.org/project/glibc/patch/20240229194642.3398122-3-adhemerval.zanella@linaro.org/ > >> 在 2024/3/6 上午3:29, Adhemerval Zanella Netto 写道: >>> On 28/02/24 22:43, mengqinggang wrote: >>>> This is mostly based on AArch64 and RISC-V implementation. >>>> >>>> Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. >>>> >>>> For _dl_tlsdesc_dynamic function slow path, temporarily save and restore >>>> all vector registers. >>>> --- >>>> Changes v1 -> v2: >>>> - Fix vr24-vr31, xr24-xr31 typo. >>>> - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. >>>> - Save and restore fcsr0 in _dl_tlsdesc_dynamic. >>>> >>>> v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html >>> Patch looks ok, some comments below. >>> >>>> elf/elf.h | 2 + >>>> sysdeps/loongarch/Makefile | 6 + >>>> sysdeps/loongarch/dl-link.sym | 1 + >>>> sysdeps/loongarch/dl-machine.h | 60 ++- >>>> sysdeps/loongarch/dl-tls.h | 9 +- >>>> sysdeps/loongarch/dl-tlsdesc-dynamic.h | 341 ++++++++++++++++++ >>>> sysdeps/loongarch/dl-tlsdesc.S | 93 +++++ >>>> sysdeps/loongarch/dl-tlsdesc.h | 53 +++ >>>> sysdeps/loongarch/linkmap.h | 1 + >>>> sysdeps/loongarch/sys/asm.h | 1 + >>>> sysdeps/loongarch/sys/regdef.h | 1 + >>>> sysdeps/loongarch/tlsdesc.c | 39 ++ >>>> sysdeps/loongarch/tlsdesc.sym | 19 + >>>> .../unix/sysv/linux/loongarch/localplt.data | 2 + >>>> 14 files changed, 625 insertions(+), 3 deletions(-) >>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h >>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.S >>>> create mode 100644 sysdeps/loongarch/dl-tlsdesc.h >>>> create mode 100644 sysdeps/loongarch/tlsdesc.c >>>> create mode 100644 sysdeps/loongarch/tlsdesc.sym >>>> >>>> diff --git a/elf/elf.h b/elf/elf.h >>>> index f2206e5c06..eec24ea049 100644 >>>> --- a/elf/elf.h >>>> +++ b/elf/elf.h >>>> @@ -4237,6 +4237,8 @@ enum >>>> #define R_LARCH_TLS_TPREL32 10 >>>> #define R_LARCH_TLS_TPREL64 11 >>>> #define R_LARCH_IRELATIVE 12 >>>> +#define R_LARCH_TLS_DESC32 13 >>>> +#define R_LARCH_TLS_DESC64 14 >>>> /* Reserved for future relocs that the dynamic linker must understand. */ >>>> diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile >>>> index 43d2f583cd..181389e787 100644 >>>> --- a/sysdeps/loongarch/Makefile >>>> +++ b/sysdeps/loongarch/Makefile >>>> @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h >>>> endif >>>> ifeq ($(subdir),elf) >>>> +sysdep-dl-routines += tlsdesc dl-tlsdesc >>>> gen-as-const-headers += dl-link.sym >>>> endif >>>> +ifeq ($(subdir),csu) >>>> +gen-as-const-headers += tlsdesc.sym >>>> +endif >>>> + >>>> + >>>> # LoongArch's assembler also needs to know about PIC as it changes the >>>> # definition of some assembler macros. >>>> ASFLAGS-.os += $(pic-ccflag) >>>> diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym >>>> index b534968e30..fd81ef37d5 100644 >>>> --- a/sysdeps/loongarch/dl-link.sym >>>> +++ b/sysdeps/loongarch/dl-link.sym >>>> @@ -1,6 +1,7 @@ >>>> #include <stddef.h> >>>> #include <sysdep.h> >>>> #include <link.h> >>>> +#include <dl-tlsdesc.h> >>>> DL_SIZEOF_RG sizeof(struct La_loongarch_regs) >>>> DL_SIZEOF_RV sizeof(struct La_loongarch_retval) >>>> diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h >>>> index ab81b82d95..8ca6c224f6 100644 >>>> --- a/sysdeps/loongarch/dl-machine.h >>>> +++ b/sysdeps/loongarch/dl-machine.h >>>> @@ -25,7 +25,7 @@ >>>> #include <entry.h> >>>> #include <elf/elf.h> >>>> #include <sys/asm.h> >>>> -#include <dl-tls.h> >>>> +#include <dl-tlsdesc.h> >>>> #include <dl-static-tls.h> >>>> #include <dl-machine-rel.h> >>>> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], >>>> *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; >>>> break; >>>> + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: >>>> + { >>>> + struct tlsdesc volatile *td = >>>> + (struct tlsdesc volatile *)addr_field; >>>> + if (! sym) >>>> + { >>>> + td->arg = (void*)reloc->r_addend; >>>> + td->entry = _dl_tlsdesc_undefweak; >>>> + } >>>> + else >>>> + { >>>> +# ifndef SHARED >>>> + CHECK_STATIC_TLS (map, sym_map); >>>> +# else >>>> + if (!TRY_STATIC_TLS (map, sym_map)) >>>> + { >>>> + td->arg = _dl_make_tlsdesc_dynamic >>>> + (sym_map, sym->st_value + reloc->r_addend); >>>> +# if !defined __loongarch_soft_float >>>> + if (SUPPORT_LASX) >>>> + td->entry = _dl_tlsdesc_dynamic_lasx; >>>> + else >>>> + if (SUPPORT_LSX) >>>> + td->entry = _dl_tlsdesc_dynamic_lsx; >>>> + else >>>> +# endif >>>> + td->entry = _dl_tlsdesc_dynamic; >>>> + } >>>> + else >>>> +# endif >>>> + { >>>> + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) >>>> + + reloc->r_addend); >>>> + td->entry = _dl_tlsdesc_return; >>>> + } >>>> + } >>>> + break; >>>> + } >>>> + >>>> case R_LARCH_COPY: >>>> { >>>> if (sym == NULL) >>>> @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], >>>> else >>>> *reloc_addr = map->l_mach.plt; >>>> } >>>> + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) >>> Use __glibc_likely here. >>> >>>> + { >>>> + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); >>>> + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); >>>> + const ElfW (Sym) *sym = &symtab[symndx]; >>>> + const struct r_found_version *version = NULL; >>>> + >>>> + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) >>>> + { >>>> + const ElfW (Half) *vernum = >>>> + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); >>>> + version = &map->l_versions[vernum[symndx] & 0x7fff]; >>>> + } >>>> + >>>> + /* Always initialize TLS descriptors completely, because lazy >>>> + initialization requires synchronization at every TLS access. */ >>>> + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, >>>> + skip_ifunc); >>>> + } >>>> else >>>> _dl_reloc_bad_type (map, r_type, 1); >>>> } >>>> diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h >>>> index 29924b866d..de593c002d 100644 >>>> --- a/sysdeps/loongarch/dl-tls.h >>>> +++ b/sysdeps/loongarch/dl-tls.h >>>> @@ -16,6 +16,9 @@ >>>> License along with the GNU C Library. If not, see >>>> <https://www.gnu.org/licenses/>. */ >>>> +#ifndef _DL_TLS_H >>>> +#define _DL_TLS_H >>>> + >>>> /* Type used for the representation of TLS information in the GOT. */ >>>> typedef struct >>>> { >>>> @@ -23,6 +26,8 @@ typedef struct >>>> unsigned long int ti_offset; >>>> } tls_index; >>>> +extern void *__tls_get_addr (tls_index *ti); >>>> + >>>> /* The thread pointer points to the first static TLS block. */ >>>> #define TLS_TP_OFFSET 0 >>>> @@ -37,10 +42,10 @@ typedef struct >>>> /* Compute the value for a DTPREL reloc. */ >>>> #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) >>>> -extern void *__tls_get_addr (tls_index *ti); >>>> - >>> Why move the function prototype? >> >> Maybe just want to take it out of a bunch of macros. >> >> >>>> #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) >>>> #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) >>>> /* Value used for dtv entries for which the allocation is delayed. */ >>>> #define TLS_DTV_UNALLOCATED ((void *) -1l) >>>> + >>>> +#endif >>>> diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >>>> new file mode 100644 >>>> index 0000000000..0d8c9bb991 >>>> --- /dev/null >>>> +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h >>>> @@ -0,0 +1,341 @@ >>>> +/* Thread-local storage handling in the ELF dynamic linker. >>>> + LoongArch version. >>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >>> Update Copyright years to 2024. >>> >>>> + >>>> + This file is part of the GNU C Library. >>>> + >>>> + The GNU C Library is free software; you can redistribute it and/or >>>> + modify it under the terms of the GNU Lesser General Public >>>> + License as published by the Free Software Foundation; either >>>> + version 2.1 of the License, or (at your option) any later version. >>>> + >>>> + The GNU C Library is distributed in the hope that it will be useful, >>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>>> + Lesser General Public License for more details. >>>> + >>>> + You should have received a copy of the GNU Lesser General Public >>>> + License along with the GNU C Library; if not, see >>>> + <https://www.gnu.org/licenses/>. */ >>>> + >>>> +#ifdef USE_LASX >>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) >>>> +#elif defined USE_LSX >>>> +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) >>>> +#elif !defined __loongarch_soft_float >>>> +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) >>>> +#else >>>> +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) >>>> +#endif >>> I don't have a strong opinion, but another option that might be simpler it >>> to provide only only one _dl_tlsdesc_dynamic implementation and check the >>> required save/restore of vector register based on hwcap value. >> >> The v3 patch provides only one _dl_tlsdesc_dynamic implementation. >> >> >>>> + >>>> +#ifdef SHARED >>>> + /* Handler for dynamic TLS symbols. >>>> + Prototype: >>>> + _dl_tlsdesc_dynamic (tlsdesc *) ; >>>> + >>>> + The second word of the descriptor points to a >>>> + tlsdesc_dynamic_arg structure. >>>> + >>>> + Returns the offset between the thread pointer and the >>>> + object referenced by the argument. >>>> + >>>> + ptrdiff_t >>>> + __attribute__ ((__regparm__ (1))) >>> Does this attribute really make sense for loongarch? >> >> This line has been deleted. >> >> >>>> + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) >>>> + { >>>> + struct tlsdesc_dynamic_arg *td = tdp->arg; >>>> + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); >>>> + if (__builtin_expect (td->gen_count <= dtv[0].counter >>> Use __glibc_unlikely or just remove the __builtin_expect for clarity. >>> >>>> + && (dtv[td->tlsinfo.ti_module].pointer.val >>>> + != TLS_DTV_UNALLOCATED), >>>> + 1)) >>>> + return dtv[td->tlsinfo.ti_module].pointer.val >>>> + + td->tlsinfo.ti_offset >>>> + - __thread_pointer; >>>> + >>>> + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; >>>> + } >>>> + */ >>>> + .hidden _dl_tlsdesc_dynamic >>>> + .global _dl_tlsdesc_dynamic >>>> + .type _dl_tlsdesc_dynamic,%function >>>> + cfi_startproc >>>> + .align 2 >>>> +_dl_tlsdesc_dynamic: >>>> + /* Save just enough registers to support fast path, if we fall >>>> + into slow path we will save additional registers. */ >>>> + ADDI sp, sp,-24 >>>> + REG_S t0, sp, 0 >>>> + REG_S t1, sp, 8 >>>> + REG_S t2, sp, 16 >>>> + >>>> + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start >>>> + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg >>>> + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count >>>> + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter >>>> + bltu t2, t1, Lslow >>>> + >>>> + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module >>>> + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ >>>> + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) >>>> + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val >>>> + li.d t2, TLS_DTV_UNALLOCATED >>>> + beq t1, t2, Lslow >>>> + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset >>>> + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset >>>> + add.d a0, t1, t2 >>>> +Lret: >>>> + sub.d a0, a0, tp >>>> + REG_L t0, sp, 0 >>>> + REG_L t1, sp, 8 >>>> + REG_L t2, sp, 16 >>>> + ADDI sp, sp, 24 >>>> + RET >>>> + >>>> +Lslow: >>>> + /* This is the slow path. We need to call __tls_get_addr() which >>>> + means we need to save and restore all the register that the >>>> + callee will trash. */ >>>> + >>>> + /* Save the remaining registers that we must treat as caller save. */ >>>> + ADDI sp, sp, -FRAME_SIZE >>>> + REG_S ra, sp, 0 * SZREG >>>> + REG_S a1, sp, 1 * SZREG >>>> + REG_S a2, sp, 2 * SZREG >>>> + REG_S a3, sp, 3 * SZREG >>>> + REG_S a4, sp, 4 * SZREG >>>> + REG_S a5, sp, 5 * SZREG >>>> + REG_S a6, sp, 6 * SZREG >>>> + REG_S a7, sp, 7 * SZREG >>>> + REG_S t4, sp, 8 * SZREG >>>> + REG_S t5, sp, 9 * SZREG >>>> + REG_S t6, sp, 10 * SZREG >>>> + REG_S t7, sp, 11 * SZREG >>>> + REG_S t8, sp, 12 * SZREG >>>> + >>>> +#ifdef USE_LASX >>>> + xvst xr0, sp, 13*SZREG + 0*SZXREG >>>> + xvst xr1, sp, 13*SZREG + 1*SZXREG >>>> + xvst xr2, sp, 13*SZREG + 2*SZXREG >>>> + xvst xr3, sp, 13*SZREG + 3*SZXREG >>>> + xvst xr4, sp, 13*SZREG + 4*SZXREG >>>> + xvst xr5, sp, 13*SZREG + 5*SZXREG >>>> + xvst xr6, sp, 13*SZREG + 6*SZXREG >>>> + xvst xr7, sp, 13*SZREG + 7*SZXREG >>>> + xvst xr8, sp, 13*SZREG + 8*SZXREG >>>> + xvst xr9, sp, 13*SZREG + 9*SZXREG >>>> + xvst xr10, sp, 13*SZREG + 10*SZXREG >>>> + xvst xr11, sp, 13*SZREG + 11*SZXREG >>>> + xvst xr12, sp, 13*SZREG + 12*SZXREG >>>> + xvst xr13, sp, 13*SZREG + 13*SZXREG >>>> + xvst xr14, sp, 13*SZREG + 14*SZXREG >>>> + xvst xr15, sp, 13*SZREG + 15*SZXREG >>>> + xvst xr16, sp, 13*SZREG + 16*SZXREG >>>> + xvst xr17, sp, 13*SZREG + 17*SZXREG >>>> + xvst xr18, sp, 13*SZREG + 18*SZXREG >>>> + xvst xr19, sp, 13*SZREG + 19*SZXREG >>>> + xvst xr20, sp, 13*SZREG + 20*SZXREG >>>> + xvst xr21, sp, 13*SZREG + 21*SZXREG >>>> + xvst xr22, sp, 13*SZREG + 22*SZXREG >>>> + xvst xr23, sp, 13*SZREG + 23*SZXREG >>>> + xvst xr24, sp, 13*SZREG + 24*SZXREG >>>> + xvst xr25, sp, 13*SZREG + 25*SZXREG >>>> + xvst xr26, sp, 13*SZREG + 26*SZXREG >>>> + xvst xr27, sp, 13*SZREG + 27*SZXREG >>>> + xvst xr28, sp, 13*SZREG + 28*SZXREG >>>> + xvst xr29, sp, 13*SZREG + 29*SZXREG >>>> + xvst xr30, sp, 13*SZREG + 30*SZXREG >>>> + xvst xr31, sp, 13*SZREG + 31*SZXREG >>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >>>> + # some fields in fcsr0 >>>> + movfcsr2gr t0, fcsr0 >>>> + REG_S t0, sp, 32*SZXREG >>>> +#elif defined USE_LSX >>>> + vst vr0, sp, 13*SZREG + 0*SZVREG >>>> + vst vr1, sp, 13*SZREG + 1*SZVREG >>>> + vst vr2, sp, 13*SZREG + 2*SZVREG >>>> + vst vr3, sp, 13*SZREG + 3*SZVREG >>>> + vst vr4, sp, 13*SZREG + 4*SZVREG >>>> + vst vr5, sp, 13*SZREG + 5*SZVREG >>>> + vst vr6, sp, 13*SZREG + 6*SZVREG >>>> + vst vr7, sp, 13*SZREG + 7*SZVREG >>>> + vst vr8, sp, 13*SZREG + 8*SZVREG >>>> + vst vr9, sp, 13*SZREG + 9*SZVREG >>>> + vst vr10, sp, 13*SZREG + 10*SZVREG >>>> + vst vr11, sp, 13*SZREG + 11*SZVREG >>>> + vst vr12, sp, 13*SZREG + 12*SZVREG >>>> + vst vr13, sp, 13*SZREG + 13*SZVREG >>>> + vst vr14, sp, 13*SZREG + 14*SZVREG >>>> + vst vr15, sp, 13*SZREG + 15*SZVREG >>>> + vst vr16, sp, 13*SZREG + 16*SZVREG >>>> + vst vr17, sp, 13*SZREG + 17*SZVREG >>>> + vst vr18, sp, 13*SZREG + 18*SZVREG >>>> + vst vr19, sp, 13*SZREG + 19*SZVREG >>>> + vst vr20, sp, 13*SZREG + 20*SZVREG >>>> + vst vr21, sp, 13*SZREG + 21*SZVREG >>>> + vst vr22, sp, 13*SZREG + 22*SZVREG >>>> + vst vr23, sp, 13*SZREG + 23*SZVREG >>>> + vst vr24, sp, 13*SZREG + 24*SZVREG >>>> + vst vr25, sp, 13*SZREG + 25*SZVREG >>>> + vst vr26, sp, 13*SZREG + 26*SZVREG >>>> + vst vr27, sp, 13*SZREG + 27*SZVREG >>>> + vst vr28, sp, 13*SZREG + 28*SZVREG >>>> + vst vr29, sp, 13*SZREG + 29*SZVREG >>>> + vst vr30, sp, 13*SZREG + 30*SZVREG >>>> + vst vr31, sp, 13*SZREG + 31*SZVREG >>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >>>> + # some fields in fcsr0 >>>> + movfcsr2gr t0, fcsr0 >>>> + REG_S t0, sp, 32*SZVREG >>>> +#elif !defined __loongarch_soft_float >>>> + FREG_S fa0, sp, 13*SZREG + 0*SZFREG >>>> + FREG_S fa1, sp, 13*SZREG + 1*SZFREG >>>> + FREG_S fa2, sp, 13*SZREG + 2*SZFREG >>>> + FREG_S fa3, sp, 13*SZREG + 3*SZFREG >>>> + FREG_S fa4, sp, 13*SZREG + 4*SZFREG >>>> + FREG_S fa5, sp, 13*SZREG + 5*SZFREG >>>> + FREG_S fa6, sp, 13*SZREG + 6*SZFREG >>>> + FREG_S fa7, sp, 13*SZREG + 7*SZFREG >>>> + FREG_S ft0, sp, 13*SZREG + 8*SZFREG >>>> + FREG_S ft1, sp, 13*SZREG + 9*SZFREG >>>> + FREG_S ft2, sp, 13*SZREG + 10*SZFREG >>>> + FREG_S ft3, sp, 13*SZREG + 11*SZFREG >>>> + FREG_S ft4, sp, 13*SZREG + 12*SZFREG >>>> + FREG_S ft5, sp, 13*SZREG + 13*SZFREG >>>> + FREG_S ft6, sp, 13*SZREG + 14*SZFREG >>>> + FREG_S ft7, sp, 13*SZREG + 15*SZFREG >>>> + FREG_S ft8, sp, 13*SZREG + 16*SZFREG >>>> + FREG_S ft9, sp, 13*SZREG + 17*SZFREG >>>> + FREG_S ft10, sp, 13*SZREG + 18*SZFREG >>>> + FREG_S ft11, sp, 13*SZREG + 19*SZFREG >>>> + FREG_S ft12, sp, 13*SZREG + 20*SZFREG >>>> + FREG_S ft13, sp, 13*SZREG + 21*SZFREG >>>> + FREG_S ft14, sp, 13*SZREG + 22*SZFREG >>>> + FREG_S ft15, sp, 13*SZREG + 23*SZFREG >>>> + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of >>>> + # some fields in fcsr0 >>>> + movfcsr2gr t0, fcsr0 >>>> + REG_S t0, sp, 24*SZFREG >>>> +#endif /* #ifdef USE_LASX */ >>>> + >>>> + bl __tls_get_addr >>>> + ADDI a0, a0, -TLS_DTV_OFFSET >>>> + >>>> + REG_L ra, sp, 0 >>>> + REG_L a1, sp, 1 * 8 >>>> + REG_L a2, sp, 2 * 8 >>>> + REG_L a3, sp, 3 * 8 >>>> + REG_L a4, sp, 4 * 8 >>>> + REG_L a5, sp, 5 * 8 >>>> + REG_L a6, sp, 6 * 8 >>>> + REG_L a7, sp, 7 * 8 >>>> + REG_L t4, sp, 8 * 8 >>>> + REG_L t5, sp, 9 * 8 >>>> + REG_L t6, sp, 10 * 8 >>>> + REG_L t7, sp, 11 * 8 >>>> + REG_L t8, sp, 12 * 8 >>>> + >>>> +#ifdef USE_LASX >>>> + xvld xr0, sp, 13*SZREG + 0*SZXREG >>>> + xvld xr1, sp, 13*SZREG + 1*SZXREG >>>> + xvld xr2, sp, 13*SZREG + 2*SZXREG >>>> + xvld xr3, sp, 13*SZREG + 3*SZXREG >>>> + xvld xr4, sp, 13*SZREG + 4*SZXREG >>>> + xvld xr5, sp, 13*SZREG + 5*SZXREG >>>> + xvld xr6, sp, 13*SZREG + 6*SZXREG >>>> + xvld xr7, sp, 13*SZREG + 7*SZXREG >>>> + xvld xr8, sp, 13*SZREG + 8*SZXREG >>>> + xvld xr9, sp, 13*SZREG + 9*SZXREG >>>> + xvld xr10, sp, 13*SZREG + 10*SZXREG >>>> + xvld xr11, sp, 13*SZREG + 11*SZXREG >>>> + xvld xr12, sp, 13*SZREG + 12*SZXREG >>>> + xvld xr13, sp, 13*SZREG + 13*SZXREG >>>> + xvld xr14, sp, 13*SZREG + 14*SZXREG >>>> + xvld xr15, sp, 13*SZREG + 15*SZXREG >>>> + xvld xr16, sp, 13*SZREG + 16*SZXREG >>>> + xvld xr17, sp, 13*SZREG + 17*SZXREG >>>> + xvld xr18, sp, 13*SZREG + 18*SZXREG >>>> + xvld xr19, sp, 13*SZREG + 19*SZXREG >>>> + xvld xr20, sp, 13*SZREG + 20*SZXREG >>>> + xvld xr21, sp, 13*SZREG + 21*SZXREG >>>> + xvld xr22, sp, 13*SZREG + 22*SZXREG >>>> + xvld xr23, sp, 13*SZREG + 23*SZXREG >>>> + xvld xr24, sp, 13*SZREG + 24*SZXREG >>>> + xvld xr25, sp, 13*SZREG + 25*SZXREG >>>> + xvld xr26, sp, 13*SZREG + 26*SZXREG >>>> + xvld xr27, sp, 13*SZREG + 27*SZXREG >>>> + xvld xr28, sp, 13*SZREG + 28*SZXREG >>>> + xvld xr29, sp, 13*SZREG + 29*SZXREG >>>> + xvld xr30, sp, 13*SZREG + 30*SZXREG >>>> + xvld xr31, sp, 13*SZREG + 31*SZXREG >>>> + REG_L t0, sp, 32*SZXREG >>>> + movgr2fcsr fcsr0, t0 >>>> +#elif defined USE_LSX >>>> + vld vr0, sp, 13*SZREG + 0*SZVREG >>>> + vld vr1, sp, 13*SZREG + 1*SZVREG >>>> + vld vr2, sp, 13*SZREG + 2*SZVREG >>>> + vld vr3, sp, 13*SZREG + 3*SZVREG >>>> + vld vr4, sp, 13*SZREG + 4*SZVREG >>>> + vld vr5, sp, 13*SZREG + 5*SZVREG >>>> + vld vr6, sp, 13*SZREG + 6*SZVREG >>>> + vld vr7, sp, 13*SZREG + 7*SZVREG >>>> + vld vr8, sp, 13*SZREG + 8*SZVREG >>>> + vld vr9, sp, 13*SZREG + 9*SZVREG >>>> + vld vr10, sp, 13*SZREG + 10*SZVREG >>>> + vld vr11, sp, 13*SZREG + 11*SZVREG >>>> + vld vr12, sp, 13*SZREG + 12*SZVREG >>>> + vld vr13, sp, 13*SZREG + 13*SZVREG >>>> + vld vr14, sp, 13*SZREG + 14*SZVREG >>>> + vld vr15, sp, 13*SZREG + 15*SZVREG >>>> + vld vr16, sp, 13*SZREG + 16*SZVREG >>>> + vld vr17, sp, 13*SZREG + 17*SZVREG >>>> + vld vr18, sp, 13*SZREG + 18*SZVREG >>>> + vld vr19, sp, 13*SZREG + 19*SZVREG >>>> + vld vr20, sp, 13*SZREG + 20*SZVREG >>>> + vld vr21, sp, 13*SZREG + 21*SZVREG >>>> + vld vr22, sp, 13*SZREG + 22*SZVREG >>>> + vld vr23, sp, 13*SZREG + 23*SZVREG >>>> + vld vr24, sp, 13*SZREG + 24*SZVREG >>>> + vld vr25, sp, 13*SZREG + 25*SZVREG >>>> + vld vr26, sp, 13*SZREG + 26*SZVREG >>>> + vld vr27, sp, 13*SZREG + 27*SZVREG >>>> + vld vr28, sp, 13*SZREG + 28*SZVREG >>>> + vld vr29, sp, 13*SZREG + 29*SZVREG >>>> + vld vr30, sp, 13*SZREG + 30*SZVREG >>>> + vld vr31, sp, 13*SZREG + 31*SZVREG >>>> + REG_L t0, sp, 32*SZVREG >>>> + movgr2fcsr fcsr0, t0 >>>> +#elif !defined __loongarch_soft_float >>>> + FREG_L fa0, sp, 13*SZREG + 0*SZFREG >>>> + FREG_L fa1, sp, 13*SZREG + 1*SZFREG >>>> + FREG_L fa2, sp, 13*SZREG + 2*SZFREG >>>> + FREG_L fa3, sp, 13*SZREG + 3*SZFREG >>>> + FREG_L fa4, sp, 13*SZREG + 4*SZFREG >>>> + FREG_L fa5, sp, 13*SZREG + 5*SZFREG >>>> + FREG_L fa6, sp, 13*SZREG + 6*SZFREG >>>> + FREG_L fa7, sp, 13*SZREG + 7*SZFREG >>>> + FREG_L ft0, sp, 13*SZREG + 8*SZFREG >>>> + FREG_L ft1, sp, 13*SZREG + 9*SZFREG >>>> + FREG_L ft2, sp, 13*SZREG + 10*SZFREG >>>> + FREG_L ft3, sp, 13*SZREG + 11*SZFREG >>>> + FREG_L ft4, sp, 13*SZREG + 12*SZFREG >>>> + FREG_L ft5, sp, 13*SZREG + 13*SZFREG >>>> + FREG_L ft6, sp, 13*SZREG + 14*SZFREG >>>> + FREG_L ft7, sp, 13*SZREG + 15*SZFREG >>>> + FREG_L ft8, sp, 13*SZREG + 16*SZFREG >>>> + FREG_L ft9, sp, 13*SZREG + 17*SZFREG >>>> + FREG_L ft10, sp, 13*SZREG + 18*SZFREG >>>> + FREG_L ft11, sp, 13*SZREG + 19*SZFREG >>>> + FREG_L ft12, sp, 13*SZREG + 20*SZFREG >>>> + FREG_L ft13, sp, 13*SZREG + 21*SZFREG >>>> + FREG_L ft14, sp, 13*SZREG + 22*SZFREG >>>> + FREG_L ft15, sp, 13*SZREG + 23*SZFREG >>>> + REG_L t0, sp, 24*SZFREG >>>> + movgr2fcsr fcsr0, t0 >>>> +#endif /* #ifdef USE_LASX */ >>>> + >>>> + ADDI sp, sp, FRAME_SIZE >>>> + b Lret >>>> + cfi_endproc >>>> + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic >>>> +#endif /* #ifdef SHARED */ >>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S >>>> new file mode 100644 >>>> index 0000000000..4a17079169 >>>> --- /dev/null >>>> +++ b/sysdeps/loongarch/dl-tlsdesc.S >>>> @@ -0,0 +1,93 @@ >>>> +/* Thread-local storage handling in the ELF dynamic linker. >>>> + LoongArch version. >>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >>> Update Copyright years to 2024. >>> >>>> + >>>> + This file is part of the GNU C Library. >>>> + >>>> + The GNU C Library is free software; you can redistribute it and/or >>>> + modify it under the terms of the GNU Lesser General Public >>>> + License as published by the Free Software Foundation; either >>>> + version 2.1 of the License, or (at your option) any later version. >>>> + >>>> + The GNU C Library is distributed in the hope that it will be useful, >>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>>> + Lesser General Public License for more details. >>>> + >>>> + You should have received a copy of the GNU Lesser General Public >>>> + License along with the GNU C Library; if not, see >>>> + <https://www.gnu.org/licenses/>. */ >>>> + >>>> +#include <sysdep.h> >>>> +#include <tls.h> >>>> +#include "tlsdesc.h" >>>> + >>>> + .text >>>> + >>>> + /* Compute the thread pointer offset for symbols in the static >>>> + TLS block. The offset is the same for all threads. >>>> + Prototype: >>>> + _dl_tlsdesc_return (tlsdesc *); */ >>>> + .hidden _dl_tlsdesc_return >>>> + .global _dl_tlsdesc_return >>>> + .type _dl_tlsdesc_return,%function >>>> + cfi_startproc >>>> + .align 2 >>>> +_dl_tlsdesc_return: >>>> + REG_L a0, a0, 8 >>>> + RET >>>> + cfi_endproc >>>> + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return >>>> + >>>> + /* Handler for undefined weak TLS symbols. >>>> + Prototype: >>>> + _dl_tlsdesc_undefweak (tlsdesc *); >>>> + >>>> + The second word of the descriptor contains the addend. >>>> + Return the addend minus the thread pointer. This ensures >>>> + that when the caller adds on the thread pointer it gets back >>>> + the addend. */ >>>> + .hidden _dl_tlsdesc_undefweak >>>> + .global _dl_tlsdesc_undefweak >>>> + .type _dl_tlsdesc_undefweak,%function >>>> + cfi_startproc >>>> + .align 2 >>>> +_dl_tlsdesc_undefweak: >>>> + REG_L a0, a0, 8 >>>> + sub.d a0, a0, tp >>>> + RET >>>> + cfi_endproc >>>> + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak >>>> + >>>> + >>>> +#ifdef SHARED >>>> + >>>> +#if !defined __loongarch_soft_float >>>> + >>>> +#define USE_LASX >>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx >>>> +#define Lret Lret_lasx >>>> +#define Lslow Lslow_lasx >>>> +#include "dl-tlsdesc-dynamic.h" >>>> +#undef FRAME_SIZE >>>> +#undef USE_LASX >>>> +#undef _dl_tlsdesc_dynamic >>>> +#undef Lret >>>> +#undef Lslow >>>> + >>>> +#define USE_LSX >>>> +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx >>>> +#define Lret Lret_lsx >>>> +#define Lslow Lslow_lsx >>>> +#include "dl-tlsdesc-dynamic.h" >>>> +#undef FRAME_SIZE >>>> +#undef USE_LSX >>>> +#undef _dl_tlsdesc_dynamic >>>> +#undef Lret >>>> +#undef Lslow >>>> + >>>> +#endif >>>> + >>>> +#include "dl-tlsdesc-dynamic.h" >>>> + >>>> +#endif /* #ifdef SHARED */ >>>> diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h >>>> new file mode 100644 >>>> index 0000000000..988037a714 >>>> --- /dev/null >>>> +++ b/sysdeps/loongarch/dl-tlsdesc.h >>>> @@ -0,0 +1,53 @@ >>>> +/* Thread-local storage descriptor handling in the ELF dynamic linker. >>>> + LoongArch version. >>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >>>> + >>>> + This file is part of the GNU C Library. >>>> + >>>> + The GNU C Library is free software; you can redistribute it and/or >>>> + modify it under the terms of the GNU Lesser General Public >>>> + License as published by the Free Software Foundation; either >>>> + version 2.1 of the License, or (at your option) any later version. >>>> + >>>> + The GNU C Library is distributed in the hope that it will be useful, >>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>>> + Lesser General Public License for more details. >>>> + >>>> + You should have received a copy of the GNU Lesser General Public >>>> + License along with the GNU C Library; if not, see >>>> + <https://www.gnu.org/licenses/>. */ >>>> + >>>> +#ifndef _DL_TLSDESC_H >>>> +#define _DL_TLSDESC_H >>>> + >>>> +#include <dl-tls.h> >>>> + >>>> +/* Type used to represent a TLS descriptor in the GOT. */ >>>> +struct tlsdesc >>>> +{ >>>> + ptrdiff_t (*entry) (struct tlsdesc *); >>>> + void *arg; >>>> +}; >>>> + >>>> +/* Type used as the argument in a TLS descriptor for a symbol that >>>> + needs dynamic TLS offsets. */ >>>> +struct tlsdesc_dynamic_arg >>>> +{ >>>> + tls_index tlsinfo; >>>> + size_t gen_count; >>>> +}; >>>> + >>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); >>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); >>>> + >>>> +# ifdef SHARED >>>> +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); >>>> +#if !defined __loongarch_soft_float >>> Minor style, usually for single tests we use '#ifndef' and add >>> attribute_hidden at the end of prototype. >>> >>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); >>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); >>>> +#endif >>>> +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); >>>> +#endif >>>> + >>>> +#endif >>>> diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h >>>> index 4d8737ee7f..9b1773634c 100644 >>>> --- a/sysdeps/loongarch/linkmap.h >>>> +++ b/sysdeps/loongarch/linkmap.h >>>> @@ -19,4 +19,5 @@ >>>> struct link_map_machine >>>> { >>>> ElfW (Addr) plt; /* Address of .plt. */ >>>> + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ >>>> }; >>>> diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h >>>> index 51521a7eb4..23c1d12914 100644 >>>> --- a/sysdeps/loongarch/sys/asm.h >>>> +++ b/sysdeps/loongarch/sys/asm.h >>>> @@ -25,6 +25,7 @@ >>>> /* Macros to handle different pointer/register sizes for 32/64-bit code. */ >>>> #define SZREG 8 >>>> #define SZFREG 8 >>>> +#define SZFCSREG 4 >>>> #define SZVREG 16 >>>> #define SZXREG 32 >>>> #define REG_L ld.d >>>> diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h >>>> index f61ee25b25..80ce3e9c00 100644 >>>> --- a/sysdeps/loongarch/sys/regdef.h >>>> +++ b/sysdeps/loongarch/sys/regdef.h >>>> @@ -97,6 +97,7 @@ >>>> #define fcc5 $fcc5 >>>> #define fcc6 $fcc6 >>>> #define fcc7 $fcc7 >>>> +#define fcsr0 $fcsr0 >>>> #define vr0 $vr0 >>>> #define vr1 $vr1 >>>> diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c >>>> new file mode 100644 >>>> index 0000000000..a357e7619f >>>> --- /dev/null >>>> +++ b/sysdeps/loongarch/tlsdesc.c >>>> @@ -0,0 +1,39 @@ >>>> +/* Manage TLS descriptors. AArch64 version. >>>> + >>>> + Copyright (C) 2011-2023 Free Software Foundation, Inc. >>> Update Copyright years to 2024 and remove the 'AArch64'. >>> >>> >>>> + >>>> + This file is part of the GNU C Library. >>>> + >>>> + The GNU C Library is free software; you can redistribute it and/or >>>> + modify it under the terms of the GNU Lesser General Public >>>> + License as published by the Free Software Foundation; either >>>> + version 2.1 of the License, or (at your option) any later version. >>>> + >>>> + The GNU C Library is distributed in the hope that it will be useful, >>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of >>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >>>> + Lesser General Public License for more details. >>>> + >>>> + You should have received a copy of the GNU Lesser General Public >>>> + License along with the GNU C Library; if not, see >>>> + <https://www.gnu.org/licenses/>. */ >>>> + >>>> +#include <ldsodefs.h> >>>> +#include <tls.h> >>>> +#include <dl-tlsdesc.h> >>>> +#include <dl-unmap-segments.h> >>>> +#include <tlsdeschtab.h> >>>> + >>>> +/* Unmap the dynamic object, but also release its TLS descriptor table >>>> + if there is one. */ >>>> + >>>> +void >>>> +_dl_unmap (struct link_map *map) >>>> +{ >>>> + _dl_unmap_segments (map); >>>> + >>>> +#ifdef SHARED >>>> + if (map->l_mach.tlsdesc_table) >>>> + htab_delete (map->l_mach.tlsdesc_table); >>>> +#endif >>>> +} >>>> diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym >>>> new file mode 100644 >>>> index 0000000000..bcab218631 >>>> --- /dev/null >>>> +++ b/sysdeps/loongarch/tlsdesc.sym >>>> @@ -0,0 +1,19 @@ >>>> +#include <stddef.h> >>>> +#include <sysdep.h> >>>> +#include <tls.h> >>>> +#include <link.h> >>>> +#include <dl-tlsdesc.h> >>>> + >>>> +-- >>>> + >>>> +-- Abuse tls.h macros to derive offsets relative to the thread register. >>>> + >>>> +TLSDESC_ARG offsetof(struct tlsdesc, arg) >>>> +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) >>>> +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) >>>> +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) >>>> +TCBHEAD_DTV offsetof(tcbhead_t, dtv) >>>> +DTV_COUNTER offsetof(dtv_t, counter) >>>> +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED >>>> +TLS_DTV_OFFSET TLS_DTV_OFFSET >>>> +SIZE_OF_DTV sizeof(tcbhead_t) >>>> diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data >>>> index 547b1c1b7f..ec32e6d13f 100644 >>>> --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data >>>> +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data >>>> @@ -5,3 +5,5 @@ libc.so: calloc >>>> libc.so: free >>>> libc.so: malloc >>>> libc.so: realloc >>>> +# The dynamic loader needs __tls_get_addr for TLS. >>>> +ld.so: __tls_get_addr
diff --git a/elf/elf.h b/elf/elf.h index f2206e5c06..eec24ea049 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -4237,6 +4237,8 @@ enum #define R_LARCH_TLS_TPREL32 10 #define R_LARCH_TLS_TPREL64 11 #define R_LARCH_IRELATIVE 12 +#define R_LARCH_TLS_DESC32 13 +#define R_LARCH_TLS_DESC64 14 /* Reserved for future relocs that the dynamic linker must understand. */ diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile index 43d2f583cd..181389e787 100644 --- a/sysdeps/loongarch/Makefile +++ b/sysdeps/loongarch/Makefile @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h endif ifeq ($(subdir),elf) +sysdep-dl-routines += tlsdesc dl-tlsdesc gen-as-const-headers += dl-link.sym endif +ifeq ($(subdir),csu) +gen-as-const-headers += tlsdesc.sym +endif + + # LoongArch's assembler also needs to know about PIC as it changes the # definition of some assembler macros. ASFLAGS-.os += $(pic-ccflag) diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym index b534968e30..fd81ef37d5 100644 --- a/sysdeps/loongarch/dl-link.sym +++ b/sysdeps/loongarch/dl-link.sym @@ -1,6 +1,7 @@ #include <stddef.h> #include <sysdep.h> #include <link.h> +#include <dl-tlsdesc.h> DL_SIZEOF_RG sizeof(struct La_loongarch_regs) DL_SIZEOF_RV sizeof(struct La_loongarch_retval) diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h index ab81b82d95..8ca6c224f6 100644 --- a/sysdeps/loongarch/dl-machine.h +++ b/sysdeps/loongarch/dl-machine.h @@ -25,7 +25,7 @@ #include <entry.h> #include <elf/elf.h> #include <sys/asm.h> -#include <dl-tls.h> +#include <dl-tlsdesc.h> #include <dl-static-tls.h> #include <dl-machine-rel.h> @@ -187,6 +187,45 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; break; + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: + { + struct tlsdesc volatile *td = + (struct tlsdesc volatile *)addr_field; + if (! sym) + { + td->arg = (void*)reloc->r_addend; + td->entry = _dl_tlsdesc_undefweak; + } + else + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (!TRY_STATIC_TLS (map, sym_map)) + { + td->arg = _dl_make_tlsdesc_dynamic + (sym_map, sym->st_value + reloc->r_addend); +# if !defined __loongarch_soft_float + if (SUPPORT_LASX) + td->entry = _dl_tlsdesc_dynamic_lasx; + else + if (SUPPORT_LSX) + td->entry = _dl_tlsdesc_dynamic_lsx; + else +# endif + td->entry = _dl_tlsdesc_dynamic; + } + else +# endif + { + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) + + reloc->r_addend); + td->entry = _dl_tlsdesc_return; + } + } + break; + } + case R_LARCH_COPY: { if (sym == NULL) @@ -255,6 +294,25 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], else *reloc_addr = map->l_mach.plt; } + else if (__builtin_expect (r_type == R_LARCH_TLS_DESC64, 1)) + { + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); + const ElfW (Sym) *sym = &symtab[symndx]; + const struct r_found_version *version = NULL; + + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) + { + const ElfW (Half) *vernum = + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); + version = &map->l_versions[vernum[symndx] & 0x7fff]; + } + + /* Always initialize TLS descriptors completely, because lazy + initialization requires synchronization at every TLS access. */ + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, + skip_ifunc); + } else _dl_reloc_bad_type (map, r_type, 1); } diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h index 29924b866d..de593c002d 100644 --- a/sysdeps/loongarch/dl-tls.h +++ b/sysdeps/loongarch/dl-tls.h @@ -16,6 +16,9 @@ License along with the GNU C Library. If not, see <https://www.gnu.org/licenses/>. */ +#ifndef _DL_TLS_H +#define _DL_TLS_H + /* Type used for the representation of TLS information in the GOT. */ typedef struct { @@ -23,6 +26,8 @@ typedef struct unsigned long int ti_offset; } tls_index; +extern void *__tls_get_addr (tls_index *ti); + /* The thread pointer points to the first static TLS block. */ #define TLS_TP_OFFSET 0 @@ -37,10 +42,10 @@ typedef struct /* Compute the value for a DTPREL reloc. */ #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) -extern void *__tls_get_addr (tls_index *ti); - #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) /* Value used for dtv entries for which the allocation is delayed. */ #define TLS_DTV_UNALLOCATED ((void *) -1l) + +#endif diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h new file mode 100644 index 0000000000..0d8c9bb991 --- /dev/null +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h @@ -0,0 +1,341 @@ +/* Thread-local storage handling in the ELF dynamic linker. + LoongArch version. + Copyright (C) 2011-2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifdef USE_LASX +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZXREG - SZFCSREG) & ALMASK)) +#elif defined USE_LSX +# define FRAME_SIZE (-((-13 * SZREG - 32 * SZVREG - SZFCSREG) & ALMASK)) +#elif !defined __loongarch_soft_float +# define FRAME_SIZE (-((-13 * SZREG - 24 * SZFREG - SZFCSREG) & ALMASK)) +#else +# define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) +#endif + +#ifdef SHARED + /* Handler for dynamic TLS symbols. + Prototype: + _dl_tlsdesc_dynamic (tlsdesc *) ; + + The second word of the descriptor points to a + tlsdesc_dynamic_arg structure. + + Returns the offset between the thread pointer and the + object referenced by the argument. + + ptrdiff_t + __attribute__ ((__regparm__ (1))) + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) + { + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); + if (__builtin_expect (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; + } + */ + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,%function + cfi_startproc + .align 2 +_dl_tlsdesc_dynamic: + /* Save just enough registers to support fast path, if we fall + into slow path we will save additional registers. */ + ADDI sp, sp,-24 + REG_S t0, sp, 0 + REG_S t1, sp, 8 + REG_S t2, sp, 16 + + REG_L t0, tp, -SIZE_OF_DTV # dtv(t0) = tp + TCBHEAD_DTV dtv start + REG_L a0, a0, TLSDESC_ARG # td(a0) = tdp->arg + REG_L t1, a0, TLSDESC_GEN_COUNT # t1 = td->gen_count + REG_L t2, t0, DTV_COUNTER # t2 = dtv[0].counter + bltu t2, t1, Lslow + + REG_L t1, a0, TLSDESC_MODID # t1 = td->tlsinfo.ti_module + slli.d t1, t1, 3 + 1 # /* sizeof(dtv_t) == sizeof(void*) * 2 */ + add.d t1, t1, t0 # t1 = dtv + ti_module * sizeof(dtv_t) + REG_L t1, t1, 0 # t1 = dtv[td->tlsinfo.ti_module].pointer.val + li.d t2, TLS_DTV_UNALLOCATED + beq t1, t2, Lslow + REG_L t2, a0, TLSDESC_MODOFF # t2 = td->tlsinfo.ti_offset + # dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset + add.d a0, t1, t2 +Lret: + sub.d a0, a0, tp + REG_L t0, sp, 0 + REG_L t1, sp, 8 + REG_L t2, sp, 16 + ADDI sp, sp, 24 + RET + +Lslow: + /* This is the slow path. We need to call __tls_get_addr() which + means we need to save and restore all the register that the + callee will trash. */ + + /* Save the remaining registers that we must treat as caller save. */ + ADDI sp, sp, -FRAME_SIZE + REG_S ra, sp, 0 * SZREG + REG_S a1, sp, 1 * SZREG + REG_S a2, sp, 2 * SZREG + REG_S a3, sp, 3 * SZREG + REG_S a4, sp, 4 * SZREG + REG_S a5, sp, 5 * SZREG + REG_S a6, sp, 6 * SZREG + REG_S a7, sp, 7 * SZREG + REG_S t4, sp, 8 * SZREG + REG_S t5, sp, 9 * SZREG + REG_S t6, sp, 10 * SZREG + REG_S t7, sp, 11 * SZREG + REG_S t8, sp, 12 * SZREG + +#ifdef USE_LASX + xvst xr0, sp, 13*SZREG + 0*SZXREG + xvst xr1, sp, 13*SZREG + 1*SZXREG + xvst xr2, sp, 13*SZREG + 2*SZXREG + xvst xr3, sp, 13*SZREG + 3*SZXREG + xvst xr4, sp, 13*SZREG + 4*SZXREG + xvst xr5, sp, 13*SZREG + 5*SZXREG + xvst xr6, sp, 13*SZREG + 6*SZXREG + xvst xr7, sp, 13*SZREG + 7*SZXREG + xvst xr8, sp, 13*SZREG + 8*SZXREG + xvst xr9, sp, 13*SZREG + 9*SZXREG + xvst xr10, sp, 13*SZREG + 10*SZXREG + xvst xr11, sp, 13*SZREG + 11*SZXREG + xvst xr12, sp, 13*SZREG + 12*SZXREG + xvst xr13, sp, 13*SZREG + 13*SZXREG + xvst xr14, sp, 13*SZREG + 14*SZXREG + xvst xr15, sp, 13*SZREG + 15*SZXREG + xvst xr16, sp, 13*SZREG + 16*SZXREG + xvst xr17, sp, 13*SZREG + 17*SZXREG + xvst xr18, sp, 13*SZREG + 18*SZXREG + xvst xr19, sp, 13*SZREG + 19*SZXREG + xvst xr20, sp, 13*SZREG + 20*SZXREG + xvst xr21, sp, 13*SZREG + 21*SZXREG + xvst xr22, sp, 13*SZREG + 22*SZXREG + xvst xr23, sp, 13*SZREG + 23*SZXREG + xvst xr24, sp, 13*SZREG + 24*SZXREG + xvst xr25, sp, 13*SZREG + 25*SZXREG + xvst xr26, sp, 13*SZREG + 26*SZXREG + xvst xr27, sp, 13*SZREG + 27*SZXREG + xvst xr28, sp, 13*SZREG + 28*SZXREG + xvst xr29, sp, 13*SZREG + 29*SZXREG + xvst xr30, sp, 13*SZREG + 30*SZXREG + xvst xr31, sp, 13*SZREG + 31*SZXREG + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of + # some fields in fcsr0 + movfcsr2gr t0, fcsr0 + REG_S t0, sp, 32*SZXREG +#elif defined USE_LSX + vst vr0, sp, 13*SZREG + 0*SZVREG + vst vr1, sp, 13*SZREG + 1*SZVREG + vst vr2, sp, 13*SZREG + 2*SZVREG + vst vr3, sp, 13*SZREG + 3*SZVREG + vst vr4, sp, 13*SZREG + 4*SZVREG + vst vr5, sp, 13*SZREG + 5*SZVREG + vst vr6, sp, 13*SZREG + 6*SZVREG + vst vr7, sp, 13*SZREG + 7*SZVREG + vst vr8, sp, 13*SZREG + 8*SZVREG + vst vr9, sp, 13*SZREG + 9*SZVREG + vst vr10, sp, 13*SZREG + 10*SZVREG + vst vr11, sp, 13*SZREG + 11*SZVREG + vst vr12, sp, 13*SZREG + 12*SZVREG + vst vr13, sp, 13*SZREG + 13*SZVREG + vst vr14, sp, 13*SZREG + 14*SZVREG + vst vr15, sp, 13*SZREG + 15*SZVREG + vst vr16, sp, 13*SZREG + 16*SZVREG + vst vr17, sp, 13*SZREG + 17*SZVREG + vst vr18, sp, 13*SZREG + 18*SZVREG + vst vr19, sp, 13*SZREG + 19*SZVREG + vst vr20, sp, 13*SZREG + 20*SZVREG + vst vr21, sp, 13*SZREG + 21*SZVREG + vst vr22, sp, 13*SZREG + 22*SZVREG + vst vr23, sp, 13*SZREG + 23*SZVREG + vst vr24, sp, 13*SZREG + 24*SZVREG + vst vr25, sp, 13*SZREG + 25*SZVREG + vst vr26, sp, 13*SZREG + 26*SZVREG + vst vr27, sp, 13*SZREG + 27*SZVREG + vst vr28, sp, 13*SZREG + 28*SZVREG + vst vr29, sp, 13*SZREG + 29*SZVREG + vst vr30, sp, 13*SZREG + 30*SZVREG + vst vr31, sp, 13*SZREG + 31*SZVREG + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of + # some fields in fcsr0 + movfcsr2gr t0, fcsr0 + REG_S t0, sp, 32*SZVREG +#elif !defined __loongarch_soft_float + FREG_S fa0, sp, 13*SZREG + 0*SZFREG + FREG_S fa1, sp, 13*SZREG + 1*SZFREG + FREG_S fa2, sp, 13*SZREG + 2*SZFREG + FREG_S fa3, sp, 13*SZREG + 3*SZFREG + FREG_S fa4, sp, 13*SZREG + 4*SZFREG + FREG_S fa5, sp, 13*SZREG + 5*SZFREG + FREG_S fa6, sp, 13*SZREG + 6*SZFREG + FREG_S fa7, sp, 13*SZREG + 7*SZFREG + FREG_S ft0, sp, 13*SZREG + 8*SZFREG + FREG_S ft1, sp, 13*SZREG + 9*SZFREG + FREG_S ft2, sp, 13*SZREG + 10*SZFREG + FREG_S ft3, sp, 13*SZREG + 11*SZFREG + FREG_S ft4, sp, 13*SZREG + 12*SZFREG + FREG_S ft5, sp, 13*SZREG + 13*SZFREG + FREG_S ft6, sp, 13*SZREG + 14*SZFREG + FREG_S ft7, sp, 13*SZREG + 15*SZFREG + FREG_S ft8, sp, 13*SZREG + 16*SZFREG + FREG_S ft9, sp, 13*SZREG + 17*SZFREG + FREG_S ft10, sp, 13*SZREG + 18*SZFREG + FREG_S ft11, sp, 13*SZREG + 19*SZFREG + FREG_S ft12, sp, 13*SZREG + 20*SZFREG + FREG_S ft13, sp, 13*SZREG + 21*SZFREG + FREG_S ft14, sp, 13*SZREG + 22*SZFREG + FREG_S ft15, sp, 13*SZREG + 23*SZFREG + # Only one physical fcsr0 register, fcsr1-fcsr3 are aliases of + # some fields in fcsr0 + movfcsr2gr t0, fcsr0 + REG_S t0, sp, 24*SZFREG +#endif /* #ifdef USE_LASX */ + + bl __tls_get_addr + ADDI a0, a0, -TLS_DTV_OFFSET + + REG_L ra, sp, 0 + REG_L a1, sp, 1 * 8 + REG_L a2, sp, 2 * 8 + REG_L a3, sp, 3 * 8 + REG_L a4, sp, 4 * 8 + REG_L a5, sp, 5 * 8 + REG_L a6, sp, 6 * 8 + REG_L a7, sp, 7 * 8 + REG_L t4, sp, 8 * 8 + REG_L t5, sp, 9 * 8 + REG_L t6, sp, 10 * 8 + REG_L t7, sp, 11 * 8 + REG_L t8, sp, 12 * 8 + +#ifdef USE_LASX + xvld xr0, sp, 13*SZREG + 0*SZXREG + xvld xr1, sp, 13*SZREG + 1*SZXREG + xvld xr2, sp, 13*SZREG + 2*SZXREG + xvld xr3, sp, 13*SZREG + 3*SZXREG + xvld xr4, sp, 13*SZREG + 4*SZXREG + xvld xr5, sp, 13*SZREG + 5*SZXREG + xvld xr6, sp, 13*SZREG + 6*SZXREG + xvld xr7, sp, 13*SZREG + 7*SZXREG + xvld xr8, sp, 13*SZREG + 8*SZXREG + xvld xr9, sp, 13*SZREG + 9*SZXREG + xvld xr10, sp, 13*SZREG + 10*SZXREG + xvld xr11, sp, 13*SZREG + 11*SZXREG + xvld xr12, sp, 13*SZREG + 12*SZXREG + xvld xr13, sp, 13*SZREG + 13*SZXREG + xvld xr14, sp, 13*SZREG + 14*SZXREG + xvld xr15, sp, 13*SZREG + 15*SZXREG + xvld xr16, sp, 13*SZREG + 16*SZXREG + xvld xr17, sp, 13*SZREG + 17*SZXREG + xvld xr18, sp, 13*SZREG + 18*SZXREG + xvld xr19, sp, 13*SZREG + 19*SZXREG + xvld xr20, sp, 13*SZREG + 20*SZXREG + xvld xr21, sp, 13*SZREG + 21*SZXREG + xvld xr22, sp, 13*SZREG + 22*SZXREG + xvld xr23, sp, 13*SZREG + 23*SZXREG + xvld xr24, sp, 13*SZREG + 24*SZXREG + xvld xr25, sp, 13*SZREG + 25*SZXREG + xvld xr26, sp, 13*SZREG + 26*SZXREG + xvld xr27, sp, 13*SZREG + 27*SZXREG + xvld xr28, sp, 13*SZREG + 28*SZXREG + xvld xr29, sp, 13*SZREG + 29*SZXREG + xvld xr30, sp, 13*SZREG + 30*SZXREG + xvld xr31, sp, 13*SZREG + 31*SZXREG + REG_L t0, sp, 32*SZXREG + movgr2fcsr fcsr0, t0 +#elif defined USE_LSX + vld vr0, sp, 13*SZREG + 0*SZVREG + vld vr1, sp, 13*SZREG + 1*SZVREG + vld vr2, sp, 13*SZREG + 2*SZVREG + vld vr3, sp, 13*SZREG + 3*SZVREG + vld vr4, sp, 13*SZREG + 4*SZVREG + vld vr5, sp, 13*SZREG + 5*SZVREG + vld vr6, sp, 13*SZREG + 6*SZVREG + vld vr7, sp, 13*SZREG + 7*SZVREG + vld vr8, sp, 13*SZREG + 8*SZVREG + vld vr9, sp, 13*SZREG + 9*SZVREG + vld vr10, sp, 13*SZREG + 10*SZVREG + vld vr11, sp, 13*SZREG + 11*SZVREG + vld vr12, sp, 13*SZREG + 12*SZVREG + vld vr13, sp, 13*SZREG + 13*SZVREG + vld vr14, sp, 13*SZREG + 14*SZVREG + vld vr15, sp, 13*SZREG + 15*SZVREG + vld vr16, sp, 13*SZREG + 16*SZVREG + vld vr17, sp, 13*SZREG + 17*SZVREG + vld vr18, sp, 13*SZREG + 18*SZVREG + vld vr19, sp, 13*SZREG + 19*SZVREG + vld vr20, sp, 13*SZREG + 20*SZVREG + vld vr21, sp, 13*SZREG + 21*SZVREG + vld vr22, sp, 13*SZREG + 22*SZVREG + vld vr23, sp, 13*SZREG + 23*SZVREG + vld vr24, sp, 13*SZREG + 24*SZVREG + vld vr25, sp, 13*SZREG + 25*SZVREG + vld vr26, sp, 13*SZREG + 26*SZVREG + vld vr27, sp, 13*SZREG + 27*SZVREG + vld vr28, sp, 13*SZREG + 28*SZVREG + vld vr29, sp, 13*SZREG + 29*SZVREG + vld vr30, sp, 13*SZREG + 30*SZVREG + vld vr31, sp, 13*SZREG + 31*SZVREG + REG_L t0, sp, 32*SZVREG + movgr2fcsr fcsr0, t0 +#elif !defined __loongarch_soft_float + FREG_L fa0, sp, 13*SZREG + 0*SZFREG + FREG_L fa1, sp, 13*SZREG + 1*SZFREG + FREG_L fa2, sp, 13*SZREG + 2*SZFREG + FREG_L fa3, sp, 13*SZREG + 3*SZFREG + FREG_L fa4, sp, 13*SZREG + 4*SZFREG + FREG_L fa5, sp, 13*SZREG + 5*SZFREG + FREG_L fa6, sp, 13*SZREG + 6*SZFREG + FREG_L fa7, sp, 13*SZREG + 7*SZFREG + FREG_L ft0, sp, 13*SZREG + 8*SZFREG + FREG_L ft1, sp, 13*SZREG + 9*SZFREG + FREG_L ft2, sp, 13*SZREG + 10*SZFREG + FREG_L ft3, sp, 13*SZREG + 11*SZFREG + FREG_L ft4, sp, 13*SZREG + 12*SZFREG + FREG_L ft5, sp, 13*SZREG + 13*SZFREG + FREG_L ft6, sp, 13*SZREG + 14*SZFREG + FREG_L ft7, sp, 13*SZREG + 15*SZFREG + FREG_L ft8, sp, 13*SZREG + 16*SZFREG + FREG_L ft9, sp, 13*SZREG + 17*SZFREG + FREG_L ft10, sp, 13*SZREG + 18*SZFREG + FREG_L ft11, sp, 13*SZREG + 19*SZFREG + FREG_L ft12, sp, 13*SZREG + 20*SZFREG + FREG_L ft13, sp, 13*SZREG + 21*SZFREG + FREG_L ft14, sp, 13*SZREG + 22*SZFREG + FREG_L ft15, sp, 13*SZREG + 23*SZFREG + REG_L t0, sp, 24*SZFREG + movgr2fcsr fcsr0, t0 +#endif /* #ifdef USE_LASX */ + + ADDI sp, sp, FRAME_SIZE + b Lret + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +#endif /* #ifdef SHARED */ diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S new file mode 100644 index 0000000000..4a17079169 --- /dev/null +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -0,0 +1,93 @@ +/* Thread-local storage handling in the ELF dynamic linker. + LoongArch version. + Copyright (C) 2011-2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <tls.h> +#include "tlsdesc.h" + + .text + + /* Compute the thread pointer offset for symbols in the static + TLS block. The offset is the same for all threads. + Prototype: + _dl_tlsdesc_return (tlsdesc *); */ + .hidden _dl_tlsdesc_return + .global _dl_tlsdesc_return + .type _dl_tlsdesc_return,%function + cfi_startproc + .align 2 +_dl_tlsdesc_return: + REG_L a0, a0, 8 + RET + cfi_endproc + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return + + /* Handler for undefined weak TLS symbols. + Prototype: + _dl_tlsdesc_undefweak (tlsdesc *); + + The second word of the descriptor contains the addend. + Return the addend minus the thread pointer. This ensures + that when the caller adds on the thread pointer it gets back + the addend. */ + .hidden _dl_tlsdesc_undefweak + .global _dl_tlsdesc_undefweak + .type _dl_tlsdesc_undefweak,%function + cfi_startproc + .align 2 +_dl_tlsdesc_undefweak: + REG_L a0, a0, 8 + sub.d a0, a0, tp + RET + cfi_endproc + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + + +#ifdef SHARED + +#if !defined __loongarch_soft_float + +#define USE_LASX +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx +#define Lret Lret_lasx +#define Lslow Lslow_lasx +#include "dl-tlsdesc-dynamic.h" +#undef FRAME_SIZE +#undef USE_LASX +#undef _dl_tlsdesc_dynamic +#undef Lret +#undef Lslow + +#define USE_LSX +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx +#define Lret Lret_lsx +#define Lslow Lslow_lsx +#include "dl-tlsdesc-dynamic.h" +#undef FRAME_SIZE +#undef USE_LSX +#undef _dl_tlsdesc_dynamic +#undef Lret +#undef Lslow + +#endif + +#include "dl-tlsdesc-dynamic.h" + +#endif /* #ifdef SHARED */ diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h new file mode 100644 index 0000000000..988037a714 --- /dev/null +++ b/sysdeps/loongarch/dl-tlsdesc.h @@ -0,0 +1,53 @@ +/* Thread-local storage descriptor handling in the ELF dynamic linker. + LoongArch version. + Copyright (C) 2011-2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _DL_TLSDESC_H +#define _DL_TLSDESC_H + +#include <dl-tls.h> + +/* Type used to represent a TLS descriptor in the GOT. */ +struct tlsdesc +{ + ptrdiff_t (*entry) (struct tlsdesc *); + void *arg; +}; + +/* Type used as the argument in a TLS descriptor for a symbol that + needs dynamic TLS offsets. */ +struct tlsdesc_dynamic_arg +{ + tls_index tlsinfo; + size_t gen_count; +}; + +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); + +# ifdef SHARED +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); +#if !defined __loongarch_soft_float +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *); +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *); +#endif +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); +#endif + +#endif diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h index 4d8737ee7f..9b1773634c 100644 --- a/sysdeps/loongarch/linkmap.h +++ b/sysdeps/loongarch/linkmap.h @@ -19,4 +19,5 @@ struct link_map_machine { ElfW (Addr) plt; /* Address of .plt. */ + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ }; diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h index 51521a7eb4..23c1d12914 100644 --- a/sysdeps/loongarch/sys/asm.h +++ b/sysdeps/loongarch/sys/asm.h @@ -25,6 +25,7 @@ /* Macros to handle different pointer/register sizes for 32/64-bit code. */ #define SZREG 8 #define SZFREG 8 +#define SZFCSREG 4 #define SZVREG 16 #define SZXREG 32 #define REG_L ld.d diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h index f61ee25b25..80ce3e9c00 100644 --- a/sysdeps/loongarch/sys/regdef.h +++ b/sysdeps/loongarch/sys/regdef.h @@ -97,6 +97,7 @@ #define fcc5 $fcc5 #define fcc6 $fcc6 #define fcc7 $fcc7 +#define fcsr0 $fcsr0 #define vr0 $vr0 #define vr1 $vr1 diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c new file mode 100644 index 0000000000..a357e7619f --- /dev/null +++ b/sysdeps/loongarch/tlsdesc.c @@ -0,0 +1,39 @@ +/* Manage TLS descriptors. AArch64 version. + + Copyright (C) 2011-2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <ldsodefs.h> +#include <tls.h> +#include <dl-tlsdesc.h> +#include <dl-unmap-segments.h> +#include <tlsdeschtab.h> + +/* Unmap the dynamic object, but also release its TLS descriptor table + if there is one. */ + +void +_dl_unmap (struct link_map *map) +{ + _dl_unmap_segments (map); + +#ifdef SHARED + if (map->l_mach.tlsdesc_table) + htab_delete (map->l_mach.tlsdesc_table); +#endif +} diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym new file mode 100644 index 0000000000..bcab218631 --- /dev/null +++ b/sysdeps/loongarch/tlsdesc.sym @@ -0,0 +1,19 @@ +#include <stddef.h> +#include <sysdep.h> +#include <tls.h> +#include <link.h> +#include <dl-tlsdesc.h> + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. + +TLSDESC_ARG offsetof(struct tlsdesc, arg) +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) +TCBHEAD_DTV offsetof(tcbhead_t, dtv) +DTV_COUNTER offsetof(dtv_t, counter) +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED +TLS_DTV_OFFSET TLS_DTV_OFFSET +SIZE_OF_DTV sizeof(tcbhead_t) diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data index 547b1c1b7f..ec32e6d13f 100644 --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data @@ -5,3 +5,5 @@ libc.so: calloc libc.so: free libc.so: malloc libc.so: realloc +# The dynamic loader needs __tls_get_addr for TLS. +ld.so: __tls_get_addr