Message ID | 20240331073618.3677346-1-mengqinggang@loongson.cn |
---|---|
State | New |
Headers | show |
Series | [v4] LoongArch: Add support for TLS Descriptors | expand |
On 31/03/24 04:36, mengqinggang wrote: > This is mostly based on AArch64 and RISC-V implementation. > > Add R_LARCH_TLS_DESC32 and R_LARCH_TLS_DESC64 relocations. > > For _dl_tlsdesc_dynamic function slow path, temporarily save and restore > all vector registers. Some comments below. > --- > Changes v3 -> v4: > - Add register save/restore test case. > > Changes v2 -> v3: > - Remove _dl_tlsdesc_return_lasx, _dl_tlsdesc_return_lsx. > Provide only one _dl_tlsdesc_dynamic implementation and check the > required save/restore of vector register based on hwcap value. > - Other details mentained by Adhemerval Zanella Netto, H.J. Lu and caiyinyu. > > Changes v1 -> v2: > - Fix vr24-vr31, xr24-xr31 typo. > - Save and restore max length float or vector registors in _dl_tlsdesc_dynamic. > - Save and restore fcsr0 in _dl_tlsdesc_dynamic. > > v3 link: https://sourceware.org/pipermail/libc-alpha/2024-March/155204.html > v2 link: https://sourceware.org/pipermail/libc-alpha/2024-February/155068.html > v1 link: https://sourceware.org/pipermail/libc-alpha/2023-December/153052.html > > elf/elf.h | 2 + > sysdeps/loongarch/Makefile | 6 + > sysdeps/loongarch/dl-machine.h | 52 ++- > sysdeps/loongarch/dl-tls.h | 9 +- > sysdeps/loongarch/dl-tlsdesc.S | 417 ++++++++++++++++++ > sysdeps/loongarch/dl-tlsdesc.h | 49 ++ > sysdeps/loongarch/linkmap.h | 3 +- > sysdeps/loongarch/preconfigure | 1 + > sysdeps/loongarch/sys/asm.h | 1 + > sysdeps/loongarch/sys/regdef.h | 1 + > sysdeps/loongarch/tlsdesc.c | 39 ++ > sysdeps/loongarch/tlsdesc.sym | 28 ++ > sysdeps/loongarch/tst-gnu2-tls2.h | 357 +++++++++++++++ > .../unix/sysv/linux/loongarch/localplt.data | 2 + > 14 files changed, 963 insertions(+), 4 deletions(-) > create mode 100644 sysdeps/loongarch/dl-tlsdesc.S > create mode 100644 sysdeps/loongarch/dl-tlsdesc.h > create mode 100644 sysdeps/loongarch/tlsdesc.c > create mode 100644 sysdeps/loongarch/tlsdesc.sym > create mode 100644 sysdeps/loongarch/tst-gnu2-tls2.h > > diff --git a/elf/elf.h b/elf/elf.h > index 55b2e87860..682bce5a94 100644 > --- a/elf/elf.h > +++ b/elf/elf.h > @@ -4241,6 +4241,8 @@ enum > #define R_LARCH_TLS_TPREL32 10 > #define R_LARCH_TLS_TPREL64 11 > #define R_LARCH_IRELATIVE 12 > +#define R_LARCH_TLS_DESC32 13 > +#define R_LARCH_TLS_DESC64 14 > > /* Reserved for future relocs that the dynamic linker must understand. */ > > diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile > index 43d2f583cd..181389e787 100644 > --- a/sysdeps/loongarch/Makefile > +++ b/sysdeps/loongarch/Makefile > @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h > endif > > ifeq ($(subdir),elf) > +sysdep-dl-routines += tlsdesc dl-tlsdesc One line per entry: sysdep-dl-routines += \ dl-tlsdesc \ tlsdesc \ # sysdep-dl-routines > gen-as-const-headers += dl-link.sym > endif > > +ifeq ($(subdir),csu) > +gen-as-const-headers += tlsdesc.sym Same as before: gen-as-const-headers += \ tlsdesc.sym \ # gen-as-const-headers > +endif > + > + > # LoongArch's assembler also needs to know about PIC as it changes the > # definition of some assembler macros. > ASFLAGS-.os += $(pic-ccflag) > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index ab81b82d95..0e22337183 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -25,7 +25,7 @@ > #include <entry.h> > #include <elf/elf.h> > #include <sys/asm.h> > -#include <dl-tls.h> > +#include <dl-tlsdesc.h> > #include <dl-static-tls.h> > #include <dl-machine-rel.h> > > @@ -187,6 +187,36 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], > *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; > break; > > + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: > + { > + struct tlsdesc volatile *td = (struct tlsdesc volatile *)addr_field; > + if (sym == NULL) > + { > + td->arg = (void*)reloc->r_addend; > + td->entry = _dl_tlsdesc_undefweak; > + } > + else > + { > +# ifndef SHARED > + CHECK_STATIC_TLS (map, sym_map); > +# else > + if (!TRY_STATIC_TLS (map, sym_map)) > + { > + td->arg = _dl_make_tlsdesc_dynamic (sym_map, > + sym->st_value + reloc->r_addend); > + td->entry = _dl_tlsdesc_dynamic; > + } > + else > +# endif > + { > + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) > + + reloc->r_addend); > + td->entry = _dl_tlsdesc_return; > + } > + } > + break; > + } > + > case R_LARCH_COPY: > { > if (sym == NULL) > @@ -255,6 +285,26 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], > else > *reloc_addr = map->l_mach.plt; > } > + else if (__glibc_likely (r_type == R_LARCH_TLS_DESC64) > + || __glibc_likely (r_type == R_LARCH_TLS_DESC32)) > + { > + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); > + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); > + const ElfW (Sym) *sym = &symtab[symndx]; > + const struct r_found_version *version = NULL; > + > + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) > + { > + const ElfW (Half) *vernum = (const void *)D_PTR (map, > + l_info[VERSYMIDX (DT_VERSYM)]); > + version = &map->l_versions[vernum[symndx] & 0x7fff]; > + } > + > + /* Always initialize TLS descriptors completely, because lazy > + initialization requires synchronization at every TLS access. */ > + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, > + skip_ifunc); > + } > else > _dl_reloc_bad_type (map, r_type, 1); > } Ok. > diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h > index 29924b866d..de593c002d 100644 > --- a/sysdeps/loongarch/dl-tls.h > +++ b/sysdeps/loongarch/dl-tls.h > @@ -16,6 +16,9 @@ > License along with the GNU C Library. If not, see > <https://www.gnu.org/licenses/>. */ > > +#ifndef _DL_TLS_H > +#define _DL_TLS_H > + > /* Type used for the representation of TLS information in the GOT. */ > typedef struct > { > @@ -23,6 +26,8 @@ typedef struct > unsigned long int ti_offset; > } tls_index; > > +extern void *__tls_get_addr (tls_index *ti); > + > /* The thread pointer points to the first static TLS block. */ > #define TLS_TP_OFFSET 0 > > @@ -37,10 +42,10 @@ typedef struct > /* Compute the value for a DTPREL reloc. */ > #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) > > -extern void *__tls_get_addr (tls_index *ti); > - > #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) > #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) > > /* Value used for dtv entries for which the allocation is delayed. */ > #define TLS_DTV_UNALLOCATED ((void *) -1l) > + > +#endif Ok. > diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S > new file mode 100644 > index 0000000000..34028e988b > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.S > @@ -0,0 +1,417 @@ > +/* Thread-local storage handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2024 Free Software Foundation, Inc. Only 2024. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <sysdep.h> > +#include <tls.h> > +#include "tlsdesc.h" > + > + .text > + > + /* Compute the thread pointer offset for symbols in the static > + TLS block. The offset is the same for all threads. > + Prototype: > + _dl_tlsdesc_return (tlsdesc *); */ > + .hidden _dl_tlsdesc_return > + .global _dl_tlsdesc_return > + .type _dl_tlsdesc_return,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_return: > + REG_L a0, a0, 8 > + RET > + cfi_endproc > + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return > + > + /* Handler for undefined weak TLS symbols. > + Prototype: > + _dl_tlsdesc_undefweak (tlsdesc *); > + > + The second word of the descriptor contains the addend. > + Return the addend minus the thread pointer. This ensures > + that when the caller adds on the thread pointer it gets back > + the addend. */ > + .hidden _dl_tlsdesc_undefweak > + .global _dl_tlsdesc_undefweak > + .type _dl_tlsdesc_undefweak,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_undefweak: > + REG_L a0, a0, 8 > + sub.d a0, a0, tp > + RET > + cfi_endproc > + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak > + > + > +#ifdef SHARED > + > +#define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) > +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) > +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) > +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) > + > + /* Handler for dynamic TLS symbols. > + Prototype: > + _dl_tlsdesc_dynamic (tlsdesc *) ; > + > + The second word of the descriptor points to a > + tlsdesc_dynamic_arg structure. > + > + Returns the offset between the thread pointer and the > + object referenced by the argument. > + > + ptrdiff_t > + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) > + { > + struct tlsdesc_dynamic_arg *td = tdp->arg; > + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); > + if (__glibc_likely (td->gen_count <= dtv[0].counter > + && (dtv[td->tlsinfo.ti_module].pointer.val > + != TLS_DTV_UNALLOCATED), > + 1)) > + return dtv[td->tlsinfo.ti_module].pointer.val > + + td->tlsinfo.ti_offset > + - __thread_pointer; > + > + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; > + } */ > + .hidden _dl_tlsdesc_dynamic > + .global _dl_tlsdesc_dynamic > + .type _dl_tlsdesc_dynamic,%function > + cfi_startproc > + .align 2 > +_dl_tlsdesc_dynamic: > + /* Save just enough registers to support fast path, if we fall > + into slow path we will save additional registers. */ > + ADDI sp, sp,-24 > + REG_S t0, sp, 0 > + REG_S t1, sp, 8 > + REG_S t2, sp, 16 > + > + REG_L t0, tp, -SIZE_OF_DTV /* dtv(t0) = tp + TCBHEAD_DTV dtv start */ > + REG_L a0, a0, TLSDESC_ARG /* td(a0) = tdp->arg */ > + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ > + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ > + bltu t2, t1, .Lslow > + > + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ > + slli.d t1, t1, 3 + 1 /* sizeof(dtv_t) == sizeof(void*) * 2 */ > + add.d t1, t1, t0 /* t1 = dtv + ti_module * sizeof(dtv_t) */ > + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ > + li.d t2, TLS_DTV_UNALLOCATED > + beq t1, t2, .Lslow > + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ > + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ > + add.d a0, t1, t2 > +.Lret: > + sub.d a0, a0, tp > + REG_L t0, sp, 0 > + REG_L t1, sp, 8 > + REG_L t2, sp, 16 > + ADDI sp, sp, 24 > + RET > + > +.Lslow: > + /* This is the slow path. We need to call __tls_get_addr() which > + means we need to save and restore all the register that the > + callee will trash. */ > + > + /* Save the remaining registers that we must treat as caller save. */ > + ADDI sp, sp, -FRAME_SIZE > + REG_S ra, sp, 0 * SZREG > + REG_S a1, sp, 1 * SZREG > + REG_S a2, sp, 2 * SZREG > + REG_S a3, sp, 3 * SZREG > + REG_S a4, sp, 4 * SZREG > + REG_S a5, sp, 5 * SZREG > + REG_S a6, sp, 6 * SZREG > + REG_S a7, sp, 7 * SZREG > + REG_S t4, sp, 8 * SZREG > + REG_S t5, sp, 9 * SZREG > + REG_S t6, sp, 10 * SZREG > + REG_S t7, sp, 11 * SZREG > + REG_S t8, sp, 12 * SZREG > + > +#ifndef __loongarch_soft_float > + > + /* Save fcsr0 register. > + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases > + of some fields in fcsr0. */ > + ADDI sp, sp, -SZFCSREG > + movfcsr2gr t0, fcsr0 > + st.w t0, sp, 0 > + > + /* Whether support LASX. */ > + la.global t0, _rtld_global_ro > + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET > + andi t0, t0, HWCAP_LOONGARCH_LASX > + beqz t0, .Llsx > + > + /* Save 256-bit vector registers. > + FIXME: Without vector ABI, save all vector registers. */ > + ADDI sp, sp, -FRAME_SIZE_LASX > + xvst xr0, sp, 0*SZXREG > + xvst xr1, sp, 1*SZXREG > + xvst xr2, sp, 2*SZXREG > + xvst xr3, sp, 3*SZXREG > + xvst xr4, sp, 4*SZXREG > + xvst xr5, sp, 5*SZXREG > + xvst xr6, sp, 6*SZXREG > + xvst xr7, sp, 7*SZXREG > + xvst xr8, sp, 8*SZXREG > + xvst xr9, sp, 9*SZXREG > + xvst xr10, sp, 10*SZXREG > + xvst xr11, sp, 11*SZXREG > + xvst xr12, sp, 12*SZXREG > + xvst xr13, sp, 13*SZXREG > + xvst xr14, sp, 14*SZXREG > + xvst xr15, sp, 15*SZXREG > + xvst xr16, sp, 16*SZXREG > + xvst xr17, sp, 17*SZXREG > + xvst xr18, sp, 18*SZXREG > + xvst xr19, sp, 19*SZXREG > + xvst xr20, sp, 20*SZXREG > + xvst xr21, sp, 21*SZXREG > + xvst xr22, sp, 22*SZXREG > + xvst xr23, sp, 23*SZXREG > + xvst xr24, sp, 24*SZXREG > + xvst xr25, sp, 25*SZXREG > + xvst xr26, sp, 26*SZXREG > + xvst xr27, sp, 27*SZXREG > + xvst xr28, sp, 28*SZXREG > + xvst xr29, sp, 29*SZXREG > + xvst xr30, sp, 30*SZXREG > + xvst xr31, sp, 31*SZXREG > + b .Ltga > + > +.Llsx: > + /* Whether support LSX. */ > + andi t0, t0, HWCAP_LOONGARCH_LSX > + beqz t0, .Lfloat > + > + /* Save 128-bit vector registers. */ > + ADDI sp, sp, -FRAME_SIZE_LSX > + vst vr0, sp, 0*SZVREG > + vst vr1, sp, 1*SZVREG > + vst vr2, sp, 2*SZVREG > + vst vr3, sp, 3*SZVREG > + vst vr4, sp, 4*SZVREG > + vst vr5, sp, 5*SZVREG > + vst vr6, sp, 6*SZVREG > + vst vr7, sp, 7*SZVREG > + vst vr8, sp, 8*SZVREG > + vst vr9, sp, 9*SZVREG > + vst vr10, sp, 10*SZVREG > + vst vr11, sp, 11*SZVREG > + vst vr12, sp, 12*SZVREG > + vst vr13, sp, 13*SZVREG > + vst vr14, sp, 14*SZVREG > + vst vr15, sp, 15*SZVREG > + vst vr16, sp, 16*SZVREG > + vst vr17, sp, 17*SZVREG > + vst vr18, sp, 18*SZVREG > + vst vr19, sp, 19*SZVREG > + vst vr20, sp, 20*SZVREG > + vst vr21, sp, 21*SZVREG > + vst vr22, sp, 22*SZVREG > + vst vr23, sp, 23*SZVREG > + vst vr24, sp, 24*SZVREG > + vst vr25, sp, 25*SZVREG > + vst vr26, sp, 26*SZVREG > + vst vr27, sp, 27*SZVREG > + vst vr28, sp, 28*SZVREG > + vst vr29, sp, 29*SZVREG > + vst vr30, sp, 30*SZVREG > + vst vr31, sp, 31*SZVREG > + b .Ltga > + > +.Lfloat: > + /* Save float registers. */ > + ADDI sp, sp, -FRAME_SIZE_FLOAT > + FREG_S fa0, sp, 0*SZFREG > + FREG_S fa1, sp, 1*SZFREG > + FREG_S fa2, sp, 2*SZFREG > + FREG_S fa3, sp, 3*SZFREG > + FREG_S fa4, sp, 4*SZFREG > + FREG_S fa5, sp, 5*SZFREG > + FREG_S fa6, sp, 6*SZFREG > + FREG_S fa7, sp, 7*SZFREG > + FREG_S ft0, sp, 8*SZFREG > + FREG_S ft1, sp, 9*SZFREG > + FREG_S ft2, sp, 10*SZFREG > + FREG_S ft3, sp, 11*SZFREG > + FREG_S ft4, sp, 12*SZFREG > + FREG_S ft5, sp, 13*SZFREG > + FREG_S ft6, sp, 14*SZFREG > + FREG_S ft7, sp, 15*SZFREG > + FREG_S ft8, sp, 16*SZFREG > + FREG_S ft9, sp, 17*SZFREG > + FREG_S ft10, sp, 18*SZFREG > + FREG_S ft11, sp, 19*SZFREG > + FREG_S ft12, sp, 20*SZFREG > + FREG_S ft13, sp, 21*SZFREG > + FREG_S ft14, sp, 22*SZFREG > + FREG_S ft15, sp, 23*SZFREG > + > +#endif /* #ifndef __loongarch_soft_float */ > + > +.Ltga: > + bl __tls_get_addr > + ADDI a0, a0, -TLS_DTV_OFFSET > + > +#ifndef __loongarch_soft_float > + > + la.global t0, _rtld_global_ro > + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET > + andi t0, t0, HWCAP_LOONGARCH_LASX > + beqz t0, .Llsx1 > + > + /* Restore 256-bit vector registers. */ > + xvld xr0, sp, 0*SZXREG > + xvld xr1, sp, 1*SZXREG > + xvld xr2, sp, 2*SZXREG > + xvld xr3, sp, 3*SZXREG > + xvld xr4, sp, 4*SZXREG > + xvld xr5, sp, 5*SZXREG > + xvld xr6, sp, 6*SZXREG > + xvld xr7, sp, 7*SZXREG > + xvld xr8, sp, 8*SZXREG > + xvld xr9, sp, 9*SZXREG > + xvld xr10, sp, 10*SZXREG > + xvld xr11, sp, 11*SZXREG > + xvld xr12, sp, 12*SZXREG > + xvld xr13, sp, 13*SZXREG > + xvld xr14, sp, 14*SZXREG > + xvld xr15, sp, 15*SZXREG > + xvld xr16, sp, 16*SZXREG > + xvld xr17, sp, 17*SZXREG > + xvld xr18, sp, 18*SZXREG > + xvld xr19, sp, 19*SZXREG > + xvld xr20, sp, 20*SZXREG > + xvld xr21, sp, 21*SZXREG > + xvld xr22, sp, 22*SZXREG > + xvld xr23, sp, 23*SZXREG > + xvld xr24, sp, 24*SZXREG > + xvld xr25, sp, 25*SZXREG > + xvld xr26, sp, 26*SZXREG > + xvld xr27, sp, 27*SZXREG > + xvld xr28, sp, 28*SZXREG > + xvld xr29, sp, 29*SZXREG > + xvld xr30, sp, 30*SZXREG > + xvld xr31, sp, 31*SZXREG > + ADDI sp, sp, FRAME_SIZE_LASX > + b .Lfcsr > + > +.Llsx1: > + andi t0, s0, HWCAP_LOONGARCH_LSX > + beqz t0, .Lfloat1 > + > + /* Restore 128-bit vector registers. */ > + vld vr0, sp, 0*SZVREG > + vld vr1, sp, 1*SZVREG > + vld vr2, sp, 2*SZVREG > + vld vr3, sp, 3*SZVREG > + vld vr4, sp, 4*SZVREG > + vld vr5, sp, 5*SZVREG > + vld vr6, sp, 6*SZVREG > + vld vr7, sp, 7*SZVREG > + vld vr8, sp, 8*SZVREG > + vld vr9, sp, 9*SZVREG > + vld vr10, sp, 10*SZVREG > + vld vr11, sp, 11*SZVREG > + vld vr12, sp, 12*SZVREG > + vld vr13, sp, 13*SZVREG > + vld vr14, sp, 14*SZVREG > + vld vr15, sp, 15*SZVREG > + vld vr16, sp, 16*SZVREG > + vld vr17, sp, 17*SZVREG > + vld vr18, sp, 18*SZVREG > + vld vr19, sp, 19*SZVREG > + vld vr20, sp, 20*SZVREG > + vld vr21, sp, 21*SZVREG > + vld vr22, sp, 22*SZVREG > + vld vr23, sp, 23*SZVREG > + vld vr24, sp, 24*SZVREG > + vld vr25, sp, 25*SZVREG > + vld vr26, sp, 26*SZVREG > + vld vr27, sp, 27*SZVREG > + vld vr28, sp, 28*SZVREG > + vld vr29, sp, 29*SZVREG > + vld vr30, sp, 30*SZVREG > + vld vr31, sp, 31*SZVREG > + ADDI sp, sp, FRAME_SIZE_LSX > + b .Lfcsr > + > +.Lfloat1: > + /* Restore float registers. */ > + FREG_L fa0, sp, 0*SZFREG > + FREG_L fa1, sp, 1*SZFREG > + FREG_L fa2, sp, 2*SZFREG > + FREG_L fa3, sp, 3*SZFREG > + FREG_L fa4, sp, 4*SZFREG > + FREG_L fa5, sp, 5*SZFREG > + FREG_L fa6, sp, 6*SZFREG > + FREG_L fa7, sp, 7*SZFREG > + FREG_L ft0, sp, 8*SZFREG > + FREG_L ft1, sp, 9*SZFREG > + FREG_L ft2, sp, 10*SZFREG > + FREG_L ft3, sp, 11*SZFREG > + FREG_L ft4, sp, 12*SZFREG > + FREG_L ft5, sp, 13*SZFREG > + FREG_L ft6, sp, 14*SZFREG > + FREG_L ft7, sp, 15*SZFREG > + FREG_L ft8, sp, 16*SZFREG > + FREG_L ft9, sp, 17*SZFREG > + FREG_L ft10, sp, 18*SZFREG > + FREG_L ft11, sp, 19*SZFREG > + FREG_L ft12, sp, 20*SZFREG > + FREG_L ft13, sp, 21*SZFREG > + FREG_L ft14, sp, 22*SZFREG > + FREG_L ft15, sp, 23*SZFREG > + ADDI sp, sp, FRAME_SIZE_FLOAT > + > +.Lfcsr: > + /* Restore fcsr0 register. */ > + ld.w t0, sp, 0 > + movgr2fcsr fcsr0, t0 > + ADDI sp, sp, SZFCSREG > + > +#endif /* #ifndef __loongarch_soft_float */ > + > + REG_L ra, sp, 0 > + REG_L a1, sp, 1 * 8 > + REG_L a2, sp, 2 * 8 > + REG_L a3, sp, 3 * 8 > + REG_L a4, sp, 4 * 8 > + REG_L a5, sp, 5 * 8 > + REG_L a6, sp, 6 * 8 > + REG_L a7, sp, 7 * 8 > + REG_L t4, sp, 8 * 8 > + REG_L t5, sp, 9 * 8 > + REG_L t6, sp, 10 * 8 > + REG_L t7, sp, 11 * 8 > + REG_L t8, sp, 12 * 8 > + ADDI sp, sp, FRAME_SIZE > + > + b .Lret > + cfi_endproc > + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic > + > +#endif /* #ifdef SHARED */ > diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h > new file mode 100644 > index 0000000000..7444dac520 > --- /dev/null > +++ b/sysdeps/loongarch/dl-tlsdesc.h > @@ -0,0 +1,49 @@ > +/* Thread-local storage descriptor handling in the ELF dynamic linker. > + LoongArch version. > + Copyright (C) 2011-2023 Free Software Foundation, Inc. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#ifndef _DL_TLSDESC_H > +#define _DL_TLSDESC_H > + > +#include <dl-tls.h> > + > +/* Type used to represent a TLS descriptor in the GOT. */ > +struct tlsdesc > +{ > + ptrdiff_t (*entry) (struct tlsdesc *); > + void *arg; > +}; > + > +/* Type used as the argument in a TLS descriptor for a symbol that > + needs dynamic TLS offsets. */ > +struct tlsdesc_dynamic_arg > +{ > + tls_index tlsinfo; > + size_t gen_count; > +}; > + > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); > + > +#ifdef SHARED > +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); > +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); > +#endif > + > +#endif Ok. > diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h > index 4d8737ee7f..833dc9eb82 100644 > --- a/sysdeps/loongarch/linkmap.h > +++ b/sysdeps/loongarch/linkmap.h > @@ -18,5 +18,6 @@ > > struct link_map_machine > { > - ElfW (Addr) plt; /* Address of .plt. */ > + ElfW (Addr) plt; /* Address of .plt. */ > + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ > }; > diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure > index dfc7ecfd9e..0d1e9ed8df 100644 > --- a/sysdeps/loongarch/preconfigure > +++ b/sysdeps/loongarch/preconfigure > @@ -43,6 +43,7 @@ loongarch*) > > > base_machine=loongarch > + mtls_descriptor=desc > ;; > esac > Ok. > diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h > index 51521a7eb4..23c1d12914 100644 > --- a/sysdeps/loongarch/sys/asm.h > +++ b/sysdeps/loongarch/sys/asm.h > @@ -25,6 +25,7 @@ > /* Macros to handle different pointer/register sizes for 32/64-bit code. */ > #define SZREG 8 > #define SZFREG 8 > +#define SZFCSREG 4 > #define SZVREG 16 > #define SZXREG 32 > #define REG_L ld.d > diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h > index f61ee25b25..80ce3e9c00 100644 > --- a/sysdeps/loongarch/sys/regdef.h > +++ b/sysdeps/loongarch/sys/regdef.h > @@ -97,6 +97,7 @@ > #define fcc5 $fcc5 > #define fcc6 $fcc6 > #define fcc7 $fcc7 > +#define fcsr0 $fcsr0 > > #define vr0 $vr0 > #define vr1 $vr1 > diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c > new file mode 100644 > index 0000000000..4a3d5d22ef > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.c > @@ -0,0 +1,39 @@ > +/* Manage TLS descriptors. LoongArch64 version. > + > + Copyright (C) 2011-2024 Free Software Foundation, Inc. Only 2024. > + > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <ldsodefs.h> > +#include <tls.h> > +#include <dl-tlsdesc.h> > +#include <dl-unmap-segments.h> > +#include <tlsdeschtab.h> > + > +/* Unmap the dynamic object, but also release its TLS descriptor table > + if there is one. */ > + > +void > +_dl_unmap (struct link_map *map) > +{ > + _dl_unmap_segments (map); > + > +#ifdef SHARED > + if (map->l_mach.tlsdesc_table) > + htab_delete (map->l_mach.tlsdesc_table); > +#endif > +} > diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym > new file mode 100644 > index 0000000000..a0b945e449 > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.sym > @@ -0,0 +1,28 @@ > +#include <stddef.h> > +#include <sysdep.h> > +#include <tls.h> > +#include <link.h> > +#include <dl-tlsdesc.h> > + > +#define SHARED 1 > + > +#include <ldsodefs.h> > + > +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) > + > +-- > + > +-- Abuse tls.h macros to derive offsets relative to the thread register. > + > +TLSDESC_ARG offsetof(struct tlsdesc, arg) > +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) > +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) > +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) > +TCBHEAD_DTV offsetof(tcbhead_t, dtv) > +DTV_COUNTER offsetof(dtv_t, counter) > +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED > +TLS_DTV_OFFSET TLS_DTV_OFFSET > +SIZE_OF_DTV sizeof(tcbhead_t) > +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) > +HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX > +HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX Ok, although I would expect you can include hwcap.h on assembly files. > diff --git a/sysdeps/loongarch/tst-gnu2-tls2.h b/sysdeps/loongarch/tst-gnu2-tls2.h > new file mode 100644 > index 0000000000..91b16c0f2e > --- /dev/null > +++ b/sysdeps/loongarch/tst-gnu2-tls2.h > @@ -0,0 +1,357 @@ > +/* Test TLSDESC relocation. LoongArch64 version. > + Copyright (C) 2024 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <string.h> > +#include <stdlib.h> > +#include <sys/auxv.h> > + > +/* The instruction between BEFORE_TLSDESC_CALL and _dl_tlsdesc_dynamic, > + and the instruction between _dl_tlsdesc_dynamic and AFTER_TLSDESC_CALL, > + may modified most of the general-purpose register. */ > +#define SAVE_REGISTER(src) \ > + asm volatile ("st.d $r3, %0" :"=m"(src) :); > + > +#ifdef __loongarch_soft_float > + > +#define BEFORE_TLSDESC_CALL() \ > + uint64_t src; \ > + SAVE_REGISTER (src); > + > +#define AFTER_TLSDESC_CALL() \ > + uint64_t restore; \ > + SAVE_REGISTER (restore); \ > + if (src != restore) \ > + abort (); > + > +#else /* hard float */ > + > +#define SAVE_REGISTER_FCC(src) \ > + asm volatile ("movcf2gr $t0, $fcc0" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[0]) :); \ > + asm volatile ("movcf2gr $t0, $fcc1" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[1]) :); \ > + asm volatile ("movcf2gr $t0, $fcc2" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[2]) :); \ > + asm volatile ("movcf2gr $t0, $fcc3" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[3]) :); \ > + asm volatile ("movcf2gr $t0, $fcc4" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[4]) :); \ > + asm volatile ("movcf2gr $t0, $fcc5" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[5]) :); \ > + asm volatile ("movcf2gr $t0, $fcc6" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[6]) :); \ > + asm volatile ("movcf2gr $t0, $fcc7" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(src[7]) :); > + > +#define LOAD_REGISTER_FCSR() \ > + asm volatile ("li.d $t0, 0x01010101" ::: "$t0"); \ > + asm volatile ("movgr2fcsr $fcsr0, $t0" ::: ); > + > +#define SAVE_REGISTER_FCSR() \ > + asm volatile ("movfcsr2gr $t0, $fcsr0" ::: "$t0"); \ > + asm volatile ("st.d $t0, %0" :"=m"(restore_fcsr) :); > + > +# define INIT_TLSDESC_CALL() \ > + unsigned long hwcap = getauxval (AT_HWCAP); > + > +#define LOAD_REGISTER_FLOAT() \ > + asm volatile ("fld.d $f0, %0" ::"m"(src_float[0]) :"$f0"); \ > + asm volatile ("fld.d $f1, %0" ::"m"(src_float[1]) :"$f1"); \ > + asm volatile ("fld.d $f2, %0" ::"m"(src_float[2]) :"$f2"); \ > + asm volatile ("fld.d $f3, %0" ::"m"(src_float[3]) :"$f3"); \ > + asm volatile ("fld.d $f4, %0" ::"m"(src_float[4]) :"$f4"); \ > + asm volatile ("fld.d $f5, %0" ::"m"(src_float[5]) :"$f5"); \ > + asm volatile ("fld.d $f6, %0" ::"m"(src_float[6]) :"$f6"); \ > + asm volatile ("fld.d $f7, %0" ::"m"(src_float[7]) :"$f7"); \ > + asm volatile ("fld.d $f8, %0" ::"m"(src_float[8]) :"$f8"); \ > + asm volatile ("fld.d $f9, %0" ::"m"(src_float[9]) :"$f9"); \ > + asm volatile ("fld.d $f10, %0" ::"m"(src_float[10]) :"$f10"); \ > + asm volatile ("fld.d $f11, %0" ::"m"(src_float[11]) :"$f11"); \ > + asm volatile ("fld.d $f12, %0" ::"m"(src_float[12]) :"$f12"); \ > + asm volatile ("fld.d $f13, %0" ::"m"(src_float[13]) :"$f13"); \ > + asm volatile ("fld.d $f14, %0" ::"m"(src_float[14]) :"$f14"); \ > + asm volatile ("fld.d $f15, %0" ::"m"(src_float[15]) :"$f15"); \ > + asm volatile ("fld.d $f16, %0" ::"m"(src_float[16]) :"$f16"); \ > + asm volatile ("fld.d $f17, %0" ::"m"(src_float[17]) :"$f17"); \ > + asm volatile ("fld.d $f18, %0" ::"m"(src_float[18]) :"$f18"); \ > + asm volatile ("fld.d $f19, %0" ::"m"(src_float[19]) :"$f19"); \ > + asm volatile ("fld.d $f20, %0" ::"m"(src_float[20]) :"$f20"); \ > + asm volatile ("fld.d $f21, %0" ::"m"(src_float[21]) :"$f21"); \ > + asm volatile ("fld.d $f22, %0" ::"m"(src_float[22]) :"$f22"); \ > + asm volatile ("fld.d $f23, %0" ::"m"(src_float[23]) :"$f23"); \ > + asm volatile ("fld.d $f24, %0" ::"m"(src_float[24]) :"$f24"); \ > + asm volatile ("fld.d $f25, %0" ::"m"(src_float[25]) :"$f25"); \ > + asm volatile ("fld.d $f26, %0" ::"m"(src_float[26]) :"$f26"); \ > + asm volatile ("fld.d $f27, %0" ::"m"(src_float[27]) :"$f27"); \ > + asm volatile ("fld.d $f28, %0" ::"m"(src_float[28]) :"$f28"); \ > + asm volatile ("fld.d $f29, %0" ::"m"(src_float[29]) :"$f29"); \ > + asm volatile ("fld.d $f30, %0" ::"m"(src_float[30]) :"$f30"); \ > + asm volatile ("fld.d $f31, %0" ::"m"(src_float[31]) :"$f31"); > + > +#define SAVE_REGISTER_FLOAT() \ > + asm volatile ("fst.d $f0, %0" :"=m"(restore_float[0]) :); \ > + asm volatile ("fst.d $f1, %0" :"=m"(restore_float[1]) :); \ > + asm volatile ("fst.d $f2, %0" :"=m"(restore_float[2]) :); \ > + asm volatile ("fst.d $f3, %0" :"=m"(restore_float[3]) :); \ > + asm volatile ("fst.d $f4, %0" :"=m"(restore_float[4]) :); \ > + asm volatile ("fst.d $f5, %0" :"=m"(restore_float[5]) :); \ > + asm volatile ("fst.d $f6, %0" :"=m"(restore_float[6]) :); \ > + asm volatile ("fst.d $f7, %0" :"=m"(restore_float[7]) :); \ > + asm volatile ("fst.d $f8, %0" :"=m"(restore_float[8]) :); \ > + asm volatile ("fst.d $f9, %0" :"=m"(restore_float[9]) :); \ > + asm volatile ("fst.d $f10, %0" :"=m"(restore_float[10]) :); \ > + asm volatile ("fst.d $f11, %0" :"=m"(restore_float[11]) :); \ > + asm volatile ("fst.d $f12, %0" :"=m"(restore_float[12]) :); \ > + asm volatile ("fst.d $f13, %0" :"=m"(restore_float[13]) :); \ > + asm volatile ("fst.d $f14, %0" :"=m"(restore_float[14]) :); \ > + asm volatile ("fst.d $f15, %0" :"=m"(restore_float[15]) :); \ > + asm volatile ("fst.d $f16, %0" :"=m"(restore_float[16]) :); \ > + asm volatile ("fst.d $f17, %0" :"=m"(restore_float[17]) :); \ > + asm volatile ("fst.d $f18, %0" :"=m"(restore_float[18]) :); \ > + asm volatile ("fst.d $f19, %0" :"=m"(restore_float[19]) :); \ > + asm volatile ("fst.d $f20, %0" :"=m"(restore_float[20]) :); \ > + asm volatile ("fst.d $f21, %0" :"=m"(restore_float[21]) :); \ > + asm volatile ("fst.d $f22, %0" :"=m"(restore_float[22]) :); \ > + asm volatile ("fst.d $f23, %0" :"=m"(restore_float[23]) :); \ > + asm volatile ("fst.d $f24, %0" :"=m"(restore_float[24]) :); \ > + asm volatile ("fst.d $f25, %0" :"=m"(restore_float[25]) :); \ > + asm volatile ("fst.d $f26, %0" :"=m"(restore_float[26]) :); \ > + asm volatile ("fst.d $f27, %0" :"=m"(restore_float[27]) :); \ > + asm volatile ("fst.d $f28, %0" :"=m"(restore_float[28]) :); \ > + asm volatile ("fst.d $f29, %0" :"=m"(restore_float[29]) :); \ > + asm volatile ("fst.d $f30, %0" :"=m"(restore_float[30]) :); \ > + asm volatile ("fst.d $f31, %0" :"=m"(restore_float[31]) :); > + > +#define LOAD_REGISTER_LSX() \ > + /* Every byte in $vr0 is 1. */ \ > + asm volatile ("vldi $vr0, 1" ::: "$vr0"); \ > + asm volatile ("vldi $vr1, 2" ::: "$vr1"); \ > + asm volatile ("vldi $vr2, 3" ::: "$vr2"); \ > + asm volatile ("vldi $vr3, 4" ::: "$vr3"); \ > + asm volatile ("vldi $vr4, 5" ::: "$vr4"); \ > + asm volatile ("vldi $vr5, 6" ::: "$vr5"); \ > + asm volatile ("vldi $vr6, 7" ::: "$vr6"); \ > + asm volatile ("vldi $vr7, 8" ::: "$vr7"); \ > + asm volatile ("vldi $vr8, 9" ::: "$vr8"); \ > + asm volatile ("vldi $vr9, 10" ::: "$vr9"); \ > + asm volatile ("vldi $vr10, 11" ::: "$vr10"); \ > + asm volatile ("vldi $vr11, 12" ::: "$vr11"); \ > + asm volatile ("vldi $vr12, 13" ::: "$vr12"); \ > + asm volatile ("vldi $vr13, 14" ::: "$vr13"); \ > + asm volatile ("vldi $vr14, 15" ::: "$vr14"); \ > + asm volatile ("vldi $vr15, 16" ::: "$vr15"); \ > + asm volatile ("vldi $vr16, 17" ::: "$vr16"); \ > + asm volatile ("vldi $vr17, 18" ::: "$vr17"); \ > + asm volatile ("vldi $vr18, 19" ::: "$vr18"); \ > + asm volatile ("vldi $vr19, 20" ::: "$vr19"); \ > + asm volatile ("vldi $vr20, 21" ::: "$vr20"); \ > + asm volatile ("vldi $vr21, 22" ::: "$vr21"); \ > + asm volatile ("vldi $vr22, 23" ::: "$vr22"); \ > + asm volatile ("vldi $vr23, 24" ::: "$vr23"); \ > + asm volatile ("vldi $vr24, 25" ::: "$vr24"); \ > + asm volatile ("vldi $vr25, 26" ::: "$vr25"); \ > + asm volatile ("vldi $vr26, 27" ::: "$vr26"); \ > + asm volatile ("vldi $vr27, 28" ::: "$vr27"); \ > + asm volatile ("vldi $vr28, 29" ::: "$vr28"); \ > + asm volatile ("vldi $vr29, 30" ::: "$vr29"); \ > + asm volatile ("vldi $vr30, 31" ::: "$vr30"); \ > + asm volatile ("vldi $vr31, 32" ::: "$vr31"); > + > +#define SAVE_REGISTER_LSX() \ > + asm volatile ("vst $vr0, %0" :"=m"(restore_lsx[0]) :); \ > + asm volatile ("vst $vr1, %0" :"=m"(restore_lsx[1]) :); \ > + asm volatile ("vst $vr2, %0" :"=m"(restore_lsx[2]) :); \ > + asm volatile ("vst $vr3, %0" :"=m"(restore_lsx[3]) :); \ > + asm volatile ("vst $vr4, %0" :"=m"(restore_lsx[4]) :); \ > + asm volatile ("vst $vr5, %0" :"=m"(restore_lsx[5]) :); \ > + asm volatile ("vst $vr6, %0" :"=m"(restore_lsx[6]) :); \ > + asm volatile ("vst $vr7, %0" :"=m"(restore_lsx[7]) :); \ > + asm volatile ("vst $vr8, %0" :"=m"(restore_lsx[8]) :); \ > + asm volatile ("vst $vr9, %0" :"=m"(restore_lsx[9]) :); \ > + asm volatile ("vst $vr10, %0" :"=m"(restore_lsx[10]) :); \ > + asm volatile ("vst $vr11, %0" :"=m"(restore_lsx[11]) :); \ > + asm volatile ("vst $vr12, %0" :"=m"(restore_lsx[12]) :); \ > + asm volatile ("vst $vr13, %0" :"=m"(restore_lsx[13]) :); \ > + asm volatile ("vst $vr14, %0" :"=m"(restore_lsx[14]) :); \ > + asm volatile ("vst $vr15, %0" :"=m"(restore_lsx[15]) :); \ > + asm volatile ("vst $vr16, %0" :"=m"(restore_lsx[16]) :); \ > + asm volatile ("vst $vr17, %0" :"=m"(restore_lsx[17]) :); \ > + asm volatile ("vst $vr18, %0" :"=m"(restore_lsx[18]) :); \ > + asm volatile ("vst $vr19, %0" :"=m"(restore_lsx[19]) :); \ > + asm volatile ("vst $vr20, %0" :"=m"(restore_lsx[20]) :); \ > + asm volatile ("vst $vr21, %0" :"=m"(restore_lsx[21]) :); \ > + asm volatile ("vst $vr22, %0" :"=m"(restore_lsx[22]) :); \ > + asm volatile ("vst $vr23, %0" :"=m"(restore_lsx[23]) :); \ > + asm volatile ("vst $vr24, %0" :"=m"(restore_lsx[24]) :); \ > + asm volatile ("vst $vr25, %0" :"=m"(restore_lsx[25]) :); \ > + asm volatile ("vst $vr26, %0" :"=m"(restore_lsx[26]) :); \ > + asm volatile ("vst $vr27, %0" :"=m"(restore_lsx[27]) :); \ > + asm volatile ("vst $vr28, %0" :"=m"(restore_lsx[28]) :); \ > + asm volatile ("vst $vr29, %0" :"=m"(restore_lsx[29]) :); \ > + asm volatile ("vst $vr30, %0" :"=m"(restore_lsx[30]) :); \ > + asm volatile ("vst $vr31, %0" :"=m"(restore_lsx[31]) :); > + > +#define LOAD_REGISTER_LASX() \ > + /* Every byte in $xr0 is 1. */ \ This triggers: ../sysdeps/loongarch/tst-gnu2-tls2.h:211:3: error: unknown register name ‘$xr0’ in ‘asm’ 211 | asm volatile ("xvldi $xr0, 1" ::: "$xr0"); \ | ^~~ with gcc 13.2.1, which I take does not have support for -mlasx/-mlsx. So I think you will need a configure check to enable it. > + asm volatile ("xvldi $xr0, 1" ::: "$xr0"); \ > + asm volatile ("xvldi $xr1, 2" ::: "$xr1"); \ > + asm volatile ("xvldi $xr2, 3" ::: "$xr2"); \ > + asm volatile ("xvldi $xr3, 4" ::: "$xr3"); \ > + asm volatile ("xvldi $xr4, 5" ::: "$xr4"); \ > + asm volatile ("xvldi $xr5, 6" ::: "$xr5"); \ > + asm volatile ("xvldi $xr6, 7" ::: "$xr6"); \ > + asm volatile ("xvldi $xr7, 8" ::: "$xr7"); \ > + asm volatile ("xvldi $xr8, 9" ::: "$xr8"); \ > + asm volatile ("xvldi $xr9, 10" ::: "$xr9"); \ > + asm volatile ("xvldi $xr10, 11" ::: "$xr10"); \ > + asm volatile ("xvldi $xr11, 12" ::: "$xr11"); \ > + asm volatile ("xvldi $xr12, 13" ::: "$xr12"); \ > + asm volatile ("xvldi $xr13, 14" ::: "$xr13"); \ > + asm volatile ("xvldi $xr14, 15" ::: "$xr14"); \ > + asm volatile ("xvldi $xr15, 16" ::: "$xr15"); \ > + asm volatile ("xvldi $xr16, 17" ::: "$xr16"); \ > + asm volatile ("xvldi $xr17, 18" ::: "$xr17"); \ > + asm volatile ("xvldi $xr18, 19" ::: "$xr18"); \ > + asm volatile ("xvldi $xr19, 20" ::: "$xr19"); \ > + asm volatile ("xvldi $xr20, 21" ::: "$xr20"); \ > + asm volatile ("xvldi $xr21, 22" ::: "$xr21"); \ > + asm volatile ("xvldi $xr22, 23" ::: "$xr22"); \ > + asm volatile ("xvldi $xr23, 24" ::: "$xr23"); \ > + asm volatile ("xvldi $xr24, 25" ::: "$xr24"); \ > + asm volatile ("xvldi $xr25, 26" ::: "$xr25"); \ > + asm volatile ("xvldi $xr26, 27" ::: "$xr26"); \ > + asm volatile ("xvldi $xr27, 28" ::: "$xr27"); \ > + asm volatile ("xvldi $xr28, 29" ::: "$xr28"); \ > + asm volatile ("xvldi $xr29, 30" ::: "$xr29"); \ > + asm volatile ("xvldi $xr30, 31" ::: "$xr30"); \ > + asm volatile ("xvldi $xr31, 32" ::: "$xr31"); > + > +#define SAVE_REGISTER_LASX() \ > + asm volatile ("xvst $xr0, %0" :"=m"(restore_lasx[0]) :); \ > + asm volatile ("xvst $xr1, %0" :"=m"(restore_lasx[1]) :); \ > + asm volatile ("xvst $xr2, %0" :"=m"(restore_lasx[2]) :); \ > + asm volatile ("xvst $xr3, %0" :"=m"(restore_lasx[3]) :); \ > + asm volatile ("xvst $xr4, %0" :"=m"(restore_lasx[4]) :); \ > + asm volatile ("xvst $xr5, %0" :"=m"(restore_lasx[5]) :); \ > + asm volatile ("xvst $xr6, %0" :"=m"(restore_lasx[6]) :); \ > + asm volatile ("xvst $xr7, %0" :"=m"(restore_lasx[7]) :); \ > + asm volatile ("xvst $xr8, %0" :"=m"(restore_lasx[8]) :); \ > + asm volatile ("xvst $xr9, %0" :"=m"(restore_lasx[9]) :); \ > + asm volatile ("xvst $xr10, %0" :"=m"(restore_lasx[10]) :); \ > + asm volatile ("xvst $xr11, %0" :"=m"(restore_lasx[11]) :); \ > + asm volatile ("xvst $xr12, %0" :"=m"(restore_lasx[12]) :); \ > + asm volatile ("xvst $xr13, %0" :"=m"(restore_lasx[13]) :); \ > + asm volatile ("xvst $xr14, %0" :"=m"(restore_lasx[14]) :); \ > + asm volatile ("xvst $xr15, %0" :"=m"(restore_lasx[15]) :); \ > + asm volatile ("xvst $xr16, %0" :"=m"(restore_lasx[16]) :); \ > + asm volatile ("xvst $xr17, %0" :"=m"(restore_lasx[17]) :); \ > + asm volatile ("xvst $xr18, %0" :"=m"(restore_lasx[18]) :); \ > + asm volatile ("xvst $xr19, %0" :"=m"(restore_lasx[19]) :); \ > + asm volatile ("xvst $xr20, %0" :"=m"(restore_lasx[20]) :); \ > + asm volatile ("xvst $xr21, %0" :"=m"(restore_lasx[21]) :); \ > + asm volatile ("xvst $xr22, %0" :"=m"(restore_lasx[22]) :); \ > + asm volatile ("xvst $xr23, %0" :"=m"(restore_lasx[23]) :); \ > + asm volatile ("xvst $xr24, %0" :"=m"(restore_lasx[24]) :); \ > + asm volatile ("xvst $xr25, %0" :"=m"(restore_lasx[25]) :); \ > + asm volatile ("xvst $xr26, %0" :"=m"(restore_lasx[26]) :); \ > + asm volatile ("xvst $xr27, %0" :"=m"(restore_lasx[27]) :); \ > + asm volatile ("xvst $xr28, %0" :"=m"(restore_lasx[28]) :); \ > + asm volatile ("xvst $xr29, %0" :"=m"(restore_lasx[29]) :); \ > + asm volatile ("xvst $xr30, %0" :"=m"(restore_lasx[30]) :); \ > + asm volatile ("xvst $xr31, %0" :"=m"(restore_lasx[31]) :); > + > +#define BEFORE_TLSDESC_CALL() \ > + uint64_t src; \ > + double src_float[32]; \ > + uint64_t src_fcc[8]; \ > + SAVE_REGISTER (src); \ > + LOAD_REGISTER_FCSR (); \ > + SAVE_REGISTER_FCC(src_fcc) \ > + \ > + if (hwcap & HWCAP_LOONGARCH_LASX) \ > + { \ > + LOAD_REGISTER_LASX (); \ > + } \ > + else if (hwcap & HWCAP_LOONGARCH_LSX) \ > + { \ > + LOAD_REGISTER_LSX (); \ > + } \ > + else \ > + { \ > + for (int i = 0; i < 32; i++) \ > + src_float[i] = i + 1; \ > + LOAD_REGISTER_FLOAT (); \ > + } > + > +#define AFTER_TLSDESC_CALL() \ > + uint64_t restore; \ > + uint64_t src_fcsr = 0x01010101; \ > + uint64_t restore_fcsr; \ > + uint64_t restore_fcc[8]; \ > + SAVE_REGISTER (restore); \ > + SAVE_REGISTER_FCSR (); \ > + SAVE_REGISTER_FCC(restore_fcc) \ > + \ > + /* memcmp_lasx/strlen_lasx corrupts LSX/LASX registers, */ \ > + /* compare LSX/LASX registers first. */ \ > + if (hwcap & HWCAP_LOONGARCH_LASX) \ > + { \ > + int src_lasx[32][8]; \ > + int restore_lasx[32][8]; \ > + SAVE_REGISTER_LASX (); \ > + for (int i = 0; i < 32; i++) \ > + for (int j = 0; j < 8; j++) \ > + src_lasx[i][j] = 0x01010101 * (i + 1); \ > + \ > + if (memcmp (src_lasx, restore_lasx, sizeof (src_lasx)) != 0) \ > + abort (); \ > + } \ > + else if (hwcap & HWCAP_LOONGARCH_LSX) \ > + { \ > + int src_lsx[32][4]; \ > + int restore_lsx[32][4]; \ > + SAVE_REGISTER_LSX (); \ > + for (int i = 0; i < 32; i++) \ > + for (int j = 0; j < 4; j++) \ > + src_lsx[i][j] = 0x01010101 * (i + 1); \ > + \ > + if (memcmp (src_lsx, restore_lsx, sizeof (src_lsx)) != 0) \ > + abort (); \ > + } \ > + else \ > + { \ > + double restore_float[32]; \ > + SAVE_REGISTER_FLOAT (); \ > + \ > + if (memcmp (src_float, restore_float, sizeof (src_float)) != 0) \ > + abort (); \ > + } \ > + \ > + if (src_fcsr != restore_fcsr) \ > + abort (); \ > + \ > + if (memcmp (src_fcc, restore_fcc, sizeof (src_fcc)) != 0) \ > + abort (); \ > + \ > + if (src != restore) \ > + abort (); > + > +#endif /* #ifdef __loongarch_soft_float */ > + > +#include_next <tst-gnu2-tls2.h> > + > diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data > index 547b1c1b7f..ec32e6d13f 100644 > --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data > +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data > @@ -5,3 +5,5 @@ libc.so: calloc > libc.so: free > libc.so: malloc > libc.so: realloc > +# The dynamic loader needs __tls_get_addr for TLS. > +ld.so: __tls_get_addr You can remove this PLT call by explicit calling the hidden symbol in dl-tlsdesc.S: diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S index 34028e988b..65e1996bde 100644 --- a/sysdeps/loongarch/dl-tlsdesc.S +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -273,7 +273,7 @@ _dl_tlsdesc_dynamic: #endif /* #ifndef __loongarch_soft_float */ .Ltga: - bl __tls_get_addr + bl HIDDEN_JUMPTARGET(__tls_get_addr) ADDI a0, a0, -TLS_DTV_OFFSET #ifndef __loongarch_soft_float @@ -413,5 +413,6 @@ _dl_tlsdesc_dynamic: b .Lret cfi_endproc .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + .hidden HIDDEN_JUMPTARGET(__tls_get_addr) #endif /* #ifdef SHARED */
在 2024/4/5 上午2:46, Adhemerval Zanella Netto 写道: > diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym > new file mode 100644 > index 0000000000..a0b945e449 > --- /dev/null > +++ b/sysdeps/loongarch/tlsdesc.sym > @@ -0,0 +1,28 @@ > +#include <stddef.h> > +#include <sysdep.h> > +#include <tls.h> > +#include <link.h> > +#include <dl-tlsdesc.h> > + > +#define SHARED 1 > + > +#include <ldsodefs.h> > + > +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) > + > +-- > + > +-- Abuse tls.h macros to derive offsets relative to the thread register. > + > +TLSDESC_ARG offsetof(struct tlsdesc, arg) > +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) > +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) > +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) > +TCBHEAD_DTV offsetof(tcbhead_t, dtv) > +DTV_COUNTER offsetof(dtv_t, counter) > +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED > +TLS_DTV_OFFSET TLS_DTV_OFFSET > +SIZE_OF_DTV sizeof(tcbhead_t) > +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) > +HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX > +HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX > Ok, although I would expect you can include hwcap.h on assembly files. > Include hwcap.h gets an error: error: #error "Never include <bits/hwcap.h> directly; use <sys/auxv.h> instead. But auxv.h has c code.
diff --git a/elf/elf.h b/elf/elf.h index 55b2e87860..682bce5a94 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -4241,6 +4241,8 @@ enum #define R_LARCH_TLS_TPREL32 10 #define R_LARCH_TLS_TPREL64 11 #define R_LARCH_IRELATIVE 12 +#define R_LARCH_TLS_DESC32 13 +#define R_LARCH_TLS_DESC64 14 /* Reserved for future relocs that the dynamic linker must understand. */ diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile index 43d2f583cd..181389e787 100644 --- a/sysdeps/loongarch/Makefile +++ b/sysdeps/loongarch/Makefile @@ -3,9 +3,15 @@ sysdep_headers += sys/asm.h endif ifeq ($(subdir),elf) +sysdep-dl-routines += tlsdesc dl-tlsdesc gen-as-const-headers += dl-link.sym endif +ifeq ($(subdir),csu) +gen-as-const-headers += tlsdesc.sym +endif + + # LoongArch's assembler also needs to know about PIC as it changes the # definition of some assembler macros. ASFLAGS-.os += $(pic-ccflag) diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h index ab81b82d95..0e22337183 100644 --- a/sysdeps/loongarch/dl-machine.h +++ b/sysdeps/loongarch/dl-machine.h @@ -25,7 +25,7 @@ #include <entry.h> #include <elf/elf.h> #include <sys/asm.h> -#include <dl-tls.h> +#include <dl-tlsdesc.h> #include <dl-static-tls.h> #include <dl-machine-rel.h> @@ -187,6 +187,36 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], *addr_field = TLS_TPREL_VALUE (sym_map, sym) + reloc->r_addend; break; + case __WORDSIZE == 64 ? R_LARCH_TLS_DESC64 : R_LARCH_TLS_DESC32: + { + struct tlsdesc volatile *td = (struct tlsdesc volatile *)addr_field; + if (sym == NULL) + { + td->arg = (void*)reloc->r_addend; + td->entry = _dl_tlsdesc_undefweak; + } + else + { +# ifndef SHARED + CHECK_STATIC_TLS (map, sym_map); +# else + if (!TRY_STATIC_TLS (map, sym_map)) + { + td->arg = _dl_make_tlsdesc_dynamic (sym_map, + sym->st_value + reloc->r_addend); + td->entry = _dl_tlsdesc_dynamic; + } + else +# endif + { + td->arg = (void *)(TLS_TPREL_VALUE (sym_map, sym) + + reloc->r_addend); + td->entry = _dl_tlsdesc_return; + } + } + break; + } + case R_LARCH_COPY: { if (sym == NULL) @@ -255,6 +285,26 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], else *reloc_addr = map->l_mach.plt; } + else if (__glibc_likely (r_type == R_LARCH_TLS_DESC64) + || __glibc_likely (r_type == R_LARCH_TLS_DESC32)) + { + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); + const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]); + const ElfW (Sym) *sym = &symtab[symndx]; + const struct r_found_version *version = NULL; + + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) + { + const ElfW (Half) *vernum = (const void *)D_PTR (map, + l_info[VERSYMIDX (DT_VERSYM)]); + version = &map->l_versions[vernum[symndx] & 0x7fff]; + } + + /* Always initialize TLS descriptors completely, because lazy + initialization requires synchronization at every TLS access. */ + elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, + skip_ifunc); + } else _dl_reloc_bad_type (map, r_type, 1); } diff --git a/sysdeps/loongarch/dl-tls.h b/sysdeps/loongarch/dl-tls.h index 29924b866d..de593c002d 100644 --- a/sysdeps/loongarch/dl-tls.h +++ b/sysdeps/loongarch/dl-tls.h @@ -16,6 +16,9 @@ License along with the GNU C Library. If not, see <https://www.gnu.org/licenses/>. */ +#ifndef _DL_TLS_H +#define _DL_TLS_H + /* Type used for the representation of TLS information in the GOT. */ typedef struct { @@ -23,6 +26,8 @@ typedef struct unsigned long int ti_offset; } tls_index; +extern void *__tls_get_addr (tls_index *ti); + /* The thread pointer points to the first static TLS block. */ #define TLS_TP_OFFSET 0 @@ -37,10 +42,10 @@ typedef struct /* Compute the value for a DTPREL reloc. */ #define TLS_DTPREL_VALUE(sym) ((sym)->st_value - TLS_DTV_OFFSET) -extern void *__tls_get_addr (tls_index *ti); - #define GET_ADDR_OFFSET (ti->ti_offset + TLS_DTV_OFFSET) #define __TLS_GET_ADDR(__ti) (__tls_get_addr (__ti) - TLS_DTV_OFFSET) /* Value used for dtv entries for which the allocation is delayed. */ #define TLS_DTV_UNALLOCATED ((void *) -1l) + +#endif diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S new file mode 100644 index 0000000000..34028e988b --- /dev/null +++ b/sysdeps/loongarch/dl-tlsdesc.S @@ -0,0 +1,417 @@ +/* Thread-local storage handling in the ELF dynamic linker. + LoongArch version. + Copyright (C) 2011-2024 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <tls.h> +#include "tlsdesc.h" + + .text + + /* Compute the thread pointer offset for symbols in the static + TLS block. The offset is the same for all threads. + Prototype: + _dl_tlsdesc_return (tlsdesc *); */ + .hidden _dl_tlsdesc_return + .global _dl_tlsdesc_return + .type _dl_tlsdesc_return,%function + cfi_startproc + .align 2 +_dl_tlsdesc_return: + REG_L a0, a0, 8 + RET + cfi_endproc + .size _dl_tlsdesc_return, .-_dl_tlsdesc_return + + /* Handler for undefined weak TLS symbols. + Prototype: + _dl_tlsdesc_undefweak (tlsdesc *); + + The second word of the descriptor contains the addend. + Return the addend minus the thread pointer. This ensures + that when the caller adds on the thread pointer it gets back + the addend. */ + .hidden _dl_tlsdesc_undefweak + .global _dl_tlsdesc_undefweak + .type _dl_tlsdesc_undefweak,%function + cfi_startproc + .align 2 +_dl_tlsdesc_undefweak: + REG_L a0, a0, 8 + sub.d a0, a0, tp + RET + cfi_endproc + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + + +#ifdef SHARED + +#define FRAME_SIZE (-((-13 * SZREG) & ALMASK)) +#define FRAME_SIZE_LSX (-((-32 * SZVREG) & ALMASK)) +#define FRAME_SIZE_LASX (-((-32 * SZXREG) & ALMASK)) +#define FRAME_SIZE_FLOAT (-((-24 * SZFREG) & ALMASK)) + + /* Handler for dynamic TLS symbols. + Prototype: + _dl_tlsdesc_dynamic (tlsdesc *) ; + + The second word of the descriptor points to a + tlsdesc_dynamic_arg structure. + + Returns the offset between the thread pointer and the + object referenced by the argument. + + ptrdiff_t + _dl_tlsdesc_dynamic (struct tlsdesc *tdp) + { + struct tlsdesc_dynamic_arg *td = tdp->arg; + dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV); + if (__glibc_likely (td->gen_count <= dtv[0].counter + && (dtv[td->tlsinfo.ti_module].pointer.val + != TLS_DTV_UNALLOCATED), + 1)) + return dtv[td->tlsinfo.ti_module].pointer.val + + td->tlsinfo.ti_offset + - __thread_pointer; + + return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; + } */ + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,%function + cfi_startproc + .align 2 +_dl_tlsdesc_dynamic: + /* Save just enough registers to support fast path, if we fall + into slow path we will save additional registers. */ + ADDI sp, sp,-24 + REG_S t0, sp, 0 + REG_S t1, sp, 8 + REG_S t2, sp, 16 + + REG_L t0, tp, -SIZE_OF_DTV /* dtv(t0) = tp + TCBHEAD_DTV dtv start */ + REG_L a0, a0, TLSDESC_ARG /* td(a0) = tdp->arg */ + REG_L t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */ + REG_L t2, t0, DTV_COUNTER /* t2 = dtv[0].counter */ + bltu t2, t1, .Lslow + + REG_L t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */ + slli.d t1, t1, 3 + 1 /* sizeof(dtv_t) == sizeof(void*) * 2 */ + add.d t1, t1, t0 /* t1 = dtv + ti_module * sizeof(dtv_t) */ + REG_L t1, t1, 0 /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */ + li.d t2, TLS_DTV_UNALLOCATED + beq t1, t2, .Lslow + REG_L t2, a0, TLSDESC_MODOFF /* t2 = td->tlsinfo.ti_offset */ + /* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */ + add.d a0, t1, t2 +.Lret: + sub.d a0, a0, tp + REG_L t0, sp, 0 + REG_L t1, sp, 8 + REG_L t2, sp, 16 + ADDI sp, sp, 24 + RET + +.Lslow: + /* This is the slow path. We need to call __tls_get_addr() which + means we need to save and restore all the register that the + callee will trash. */ + + /* Save the remaining registers that we must treat as caller save. */ + ADDI sp, sp, -FRAME_SIZE + REG_S ra, sp, 0 * SZREG + REG_S a1, sp, 1 * SZREG + REG_S a2, sp, 2 * SZREG + REG_S a3, sp, 3 * SZREG + REG_S a4, sp, 4 * SZREG + REG_S a5, sp, 5 * SZREG + REG_S a6, sp, 6 * SZREG + REG_S a7, sp, 7 * SZREG + REG_S t4, sp, 8 * SZREG + REG_S t5, sp, 9 * SZREG + REG_S t6, sp, 10 * SZREG + REG_S t7, sp, 11 * SZREG + REG_S t8, sp, 12 * SZREG + +#ifndef __loongarch_soft_float + + /* Save fcsr0 register. + Only one physical fcsr0 register, fcsr1-fcsr3 are aliases + of some fields in fcsr0. */ + ADDI sp, sp, -SZFCSREG + movfcsr2gr t0, fcsr0 + st.w t0, sp, 0 + + /* Whether support LASX. */ + la.global t0, _rtld_global_ro + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET + andi t0, t0, HWCAP_LOONGARCH_LASX + beqz t0, .Llsx + + /* Save 256-bit vector registers. + FIXME: Without vector ABI, save all vector registers. */ + ADDI sp, sp, -FRAME_SIZE_LASX + xvst xr0, sp, 0*SZXREG + xvst xr1, sp, 1*SZXREG + xvst xr2, sp, 2*SZXREG + xvst xr3, sp, 3*SZXREG + xvst xr4, sp, 4*SZXREG + xvst xr5, sp, 5*SZXREG + xvst xr6, sp, 6*SZXREG + xvst xr7, sp, 7*SZXREG + xvst xr8, sp, 8*SZXREG + xvst xr9, sp, 9*SZXREG + xvst xr10, sp, 10*SZXREG + xvst xr11, sp, 11*SZXREG + xvst xr12, sp, 12*SZXREG + xvst xr13, sp, 13*SZXREG + xvst xr14, sp, 14*SZXREG + xvst xr15, sp, 15*SZXREG + xvst xr16, sp, 16*SZXREG + xvst xr17, sp, 17*SZXREG + xvst xr18, sp, 18*SZXREG + xvst xr19, sp, 19*SZXREG + xvst xr20, sp, 20*SZXREG + xvst xr21, sp, 21*SZXREG + xvst xr22, sp, 22*SZXREG + xvst xr23, sp, 23*SZXREG + xvst xr24, sp, 24*SZXREG + xvst xr25, sp, 25*SZXREG + xvst xr26, sp, 26*SZXREG + xvst xr27, sp, 27*SZXREG + xvst xr28, sp, 28*SZXREG + xvst xr29, sp, 29*SZXREG + xvst xr30, sp, 30*SZXREG + xvst xr31, sp, 31*SZXREG + b .Ltga + +.Llsx: + /* Whether support LSX. */ + andi t0, t0, HWCAP_LOONGARCH_LSX + beqz t0, .Lfloat + + /* Save 128-bit vector registers. */ + ADDI sp, sp, -FRAME_SIZE_LSX + vst vr0, sp, 0*SZVREG + vst vr1, sp, 1*SZVREG + vst vr2, sp, 2*SZVREG + vst vr3, sp, 3*SZVREG + vst vr4, sp, 4*SZVREG + vst vr5, sp, 5*SZVREG + vst vr6, sp, 6*SZVREG + vst vr7, sp, 7*SZVREG + vst vr8, sp, 8*SZVREG + vst vr9, sp, 9*SZVREG + vst vr10, sp, 10*SZVREG + vst vr11, sp, 11*SZVREG + vst vr12, sp, 12*SZVREG + vst vr13, sp, 13*SZVREG + vst vr14, sp, 14*SZVREG + vst vr15, sp, 15*SZVREG + vst vr16, sp, 16*SZVREG + vst vr17, sp, 17*SZVREG + vst vr18, sp, 18*SZVREG + vst vr19, sp, 19*SZVREG + vst vr20, sp, 20*SZVREG + vst vr21, sp, 21*SZVREG + vst vr22, sp, 22*SZVREG + vst vr23, sp, 23*SZVREG + vst vr24, sp, 24*SZVREG + vst vr25, sp, 25*SZVREG + vst vr26, sp, 26*SZVREG + vst vr27, sp, 27*SZVREG + vst vr28, sp, 28*SZVREG + vst vr29, sp, 29*SZVREG + vst vr30, sp, 30*SZVREG + vst vr31, sp, 31*SZVREG + b .Ltga + +.Lfloat: + /* Save float registers. */ + ADDI sp, sp, -FRAME_SIZE_FLOAT + FREG_S fa0, sp, 0*SZFREG + FREG_S fa1, sp, 1*SZFREG + FREG_S fa2, sp, 2*SZFREG + FREG_S fa3, sp, 3*SZFREG + FREG_S fa4, sp, 4*SZFREG + FREG_S fa5, sp, 5*SZFREG + FREG_S fa6, sp, 6*SZFREG + FREG_S fa7, sp, 7*SZFREG + FREG_S ft0, sp, 8*SZFREG + FREG_S ft1, sp, 9*SZFREG + FREG_S ft2, sp, 10*SZFREG + FREG_S ft3, sp, 11*SZFREG + FREG_S ft4, sp, 12*SZFREG + FREG_S ft5, sp, 13*SZFREG + FREG_S ft6, sp, 14*SZFREG + FREG_S ft7, sp, 15*SZFREG + FREG_S ft8, sp, 16*SZFREG + FREG_S ft9, sp, 17*SZFREG + FREG_S ft10, sp, 18*SZFREG + FREG_S ft11, sp, 19*SZFREG + FREG_S ft12, sp, 20*SZFREG + FREG_S ft13, sp, 21*SZFREG + FREG_S ft14, sp, 22*SZFREG + FREG_S ft15, sp, 23*SZFREG + +#endif /* #ifndef __loongarch_soft_float */ + +.Ltga: + bl __tls_get_addr + ADDI a0, a0, -TLS_DTV_OFFSET + +#ifndef __loongarch_soft_float + + la.global t0, _rtld_global_ro + REG_L t0, t0, GLRO_DL_HWCAP_OFFSET + andi t0, t0, HWCAP_LOONGARCH_LASX + beqz t0, .Llsx1 + + /* Restore 256-bit vector registers. */ + xvld xr0, sp, 0*SZXREG + xvld xr1, sp, 1*SZXREG + xvld xr2, sp, 2*SZXREG + xvld xr3, sp, 3*SZXREG + xvld xr4, sp, 4*SZXREG + xvld xr5, sp, 5*SZXREG + xvld xr6, sp, 6*SZXREG + xvld xr7, sp, 7*SZXREG + xvld xr8, sp, 8*SZXREG + xvld xr9, sp, 9*SZXREG + xvld xr10, sp, 10*SZXREG + xvld xr11, sp, 11*SZXREG + xvld xr12, sp, 12*SZXREG + xvld xr13, sp, 13*SZXREG + xvld xr14, sp, 14*SZXREG + xvld xr15, sp, 15*SZXREG + xvld xr16, sp, 16*SZXREG + xvld xr17, sp, 17*SZXREG + xvld xr18, sp, 18*SZXREG + xvld xr19, sp, 19*SZXREG + xvld xr20, sp, 20*SZXREG + xvld xr21, sp, 21*SZXREG + xvld xr22, sp, 22*SZXREG + xvld xr23, sp, 23*SZXREG + xvld xr24, sp, 24*SZXREG + xvld xr25, sp, 25*SZXREG + xvld xr26, sp, 26*SZXREG + xvld xr27, sp, 27*SZXREG + xvld xr28, sp, 28*SZXREG + xvld xr29, sp, 29*SZXREG + xvld xr30, sp, 30*SZXREG + xvld xr31, sp, 31*SZXREG + ADDI sp, sp, FRAME_SIZE_LASX + b .Lfcsr + +.Llsx1: + andi t0, s0, HWCAP_LOONGARCH_LSX + beqz t0, .Lfloat1 + + /* Restore 128-bit vector registers. */ + vld vr0, sp, 0*SZVREG + vld vr1, sp, 1*SZVREG + vld vr2, sp, 2*SZVREG + vld vr3, sp, 3*SZVREG + vld vr4, sp, 4*SZVREG + vld vr5, sp, 5*SZVREG + vld vr6, sp, 6*SZVREG + vld vr7, sp, 7*SZVREG + vld vr8, sp, 8*SZVREG + vld vr9, sp, 9*SZVREG + vld vr10, sp, 10*SZVREG + vld vr11, sp, 11*SZVREG + vld vr12, sp, 12*SZVREG + vld vr13, sp, 13*SZVREG + vld vr14, sp, 14*SZVREG + vld vr15, sp, 15*SZVREG + vld vr16, sp, 16*SZVREG + vld vr17, sp, 17*SZVREG + vld vr18, sp, 18*SZVREG + vld vr19, sp, 19*SZVREG + vld vr20, sp, 20*SZVREG + vld vr21, sp, 21*SZVREG + vld vr22, sp, 22*SZVREG + vld vr23, sp, 23*SZVREG + vld vr24, sp, 24*SZVREG + vld vr25, sp, 25*SZVREG + vld vr26, sp, 26*SZVREG + vld vr27, sp, 27*SZVREG + vld vr28, sp, 28*SZVREG + vld vr29, sp, 29*SZVREG + vld vr30, sp, 30*SZVREG + vld vr31, sp, 31*SZVREG + ADDI sp, sp, FRAME_SIZE_LSX + b .Lfcsr + +.Lfloat1: + /* Restore float registers. */ + FREG_L fa0, sp, 0*SZFREG + FREG_L fa1, sp, 1*SZFREG + FREG_L fa2, sp, 2*SZFREG + FREG_L fa3, sp, 3*SZFREG + FREG_L fa4, sp, 4*SZFREG + FREG_L fa5, sp, 5*SZFREG + FREG_L fa6, sp, 6*SZFREG + FREG_L fa7, sp, 7*SZFREG + FREG_L ft0, sp, 8*SZFREG + FREG_L ft1, sp, 9*SZFREG + FREG_L ft2, sp, 10*SZFREG + FREG_L ft3, sp, 11*SZFREG + FREG_L ft4, sp, 12*SZFREG + FREG_L ft5, sp, 13*SZFREG + FREG_L ft6, sp, 14*SZFREG + FREG_L ft7, sp, 15*SZFREG + FREG_L ft8, sp, 16*SZFREG + FREG_L ft9, sp, 17*SZFREG + FREG_L ft10, sp, 18*SZFREG + FREG_L ft11, sp, 19*SZFREG + FREG_L ft12, sp, 20*SZFREG + FREG_L ft13, sp, 21*SZFREG + FREG_L ft14, sp, 22*SZFREG + FREG_L ft15, sp, 23*SZFREG + ADDI sp, sp, FRAME_SIZE_FLOAT + +.Lfcsr: + /* Restore fcsr0 register. */ + ld.w t0, sp, 0 + movgr2fcsr fcsr0, t0 + ADDI sp, sp, SZFCSREG + +#endif /* #ifndef __loongarch_soft_float */ + + REG_L ra, sp, 0 + REG_L a1, sp, 1 * 8 + REG_L a2, sp, 2 * 8 + REG_L a3, sp, 3 * 8 + REG_L a4, sp, 4 * 8 + REG_L a5, sp, 5 * 8 + REG_L a6, sp, 6 * 8 + REG_L a7, sp, 7 * 8 + REG_L t4, sp, 8 * 8 + REG_L t5, sp, 9 * 8 + REG_L t6, sp, 10 * 8 + REG_L t7, sp, 11 * 8 + REG_L t8, sp, 12 * 8 + ADDI sp, sp, FRAME_SIZE + + b .Lret + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic + +#endif /* #ifdef SHARED */ diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h new file mode 100644 index 0000000000..7444dac520 --- /dev/null +++ b/sysdeps/loongarch/dl-tlsdesc.h @@ -0,0 +1,49 @@ +/* Thread-local storage descriptor handling in the ELF dynamic linker. + LoongArch version. + Copyright (C) 2011-2023 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _DL_TLSDESC_H +#define _DL_TLSDESC_H + +#include <dl-tls.h> + +/* Type used to represent a TLS descriptor in the GOT. */ +struct tlsdesc +{ + ptrdiff_t (*entry) (struct tlsdesc *); + void *arg; +}; + +/* Type used as the argument in a TLS descriptor for a symbol that + needs dynamic TLS offsets. */ +struct tlsdesc_dynamic_arg +{ + tls_index tlsinfo; + size_t gen_count; +}; + +extern ptrdiff_t attribute_hidden _dl_tlsdesc_return (struct tlsdesc *); +extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *); + +#ifdef SHARED +extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t); +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *); +#endif + +#endif diff --git a/sysdeps/loongarch/linkmap.h b/sysdeps/loongarch/linkmap.h index 4d8737ee7f..833dc9eb82 100644 --- a/sysdeps/loongarch/linkmap.h +++ b/sysdeps/loongarch/linkmap.h @@ -18,5 +18,6 @@ struct link_map_machine { - ElfW (Addr) plt; /* Address of .plt. */ + ElfW (Addr) plt; /* Address of .plt. */ + void *tlsdesc_table; /* Address of TLS descriptor hash table. */ }; diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure index dfc7ecfd9e..0d1e9ed8df 100644 --- a/sysdeps/loongarch/preconfigure +++ b/sysdeps/loongarch/preconfigure @@ -43,6 +43,7 @@ loongarch*) base_machine=loongarch + mtls_descriptor=desc ;; esac diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h index 51521a7eb4..23c1d12914 100644 --- a/sysdeps/loongarch/sys/asm.h +++ b/sysdeps/loongarch/sys/asm.h @@ -25,6 +25,7 @@ /* Macros to handle different pointer/register sizes for 32/64-bit code. */ #define SZREG 8 #define SZFREG 8 +#define SZFCSREG 4 #define SZVREG 16 #define SZXREG 32 #define REG_L ld.d diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h index f61ee25b25..80ce3e9c00 100644 --- a/sysdeps/loongarch/sys/regdef.h +++ b/sysdeps/loongarch/sys/regdef.h @@ -97,6 +97,7 @@ #define fcc5 $fcc5 #define fcc6 $fcc6 #define fcc7 $fcc7 +#define fcsr0 $fcsr0 #define vr0 $vr0 #define vr1 $vr1 diff --git a/sysdeps/loongarch/tlsdesc.c b/sysdeps/loongarch/tlsdesc.c new file mode 100644 index 0000000000..4a3d5d22ef --- /dev/null +++ b/sysdeps/loongarch/tlsdesc.c @@ -0,0 +1,39 @@ +/* Manage TLS descriptors. LoongArch64 version. + + Copyright (C) 2011-2024 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <ldsodefs.h> +#include <tls.h> +#include <dl-tlsdesc.h> +#include <dl-unmap-segments.h> +#include <tlsdeschtab.h> + +/* Unmap the dynamic object, but also release its TLS descriptor table + if there is one. */ + +void +_dl_unmap (struct link_map *map) +{ + _dl_unmap_segments (map); + +#ifdef SHARED + if (map->l_mach.tlsdesc_table) + htab_delete (map->l_mach.tlsdesc_table); +#endif +} diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym new file mode 100644 index 0000000000..a0b945e449 --- /dev/null +++ b/sysdeps/loongarch/tlsdesc.sym @@ -0,0 +1,28 @@ +#include <stddef.h> +#include <sysdep.h> +#include <tls.h> +#include <link.h> +#include <dl-tlsdesc.h> + +#define SHARED 1 + +#include <ldsodefs.h> + +#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name) + +-- + +-- Abuse tls.h macros to derive offsets relative to the thread register. + +TLSDESC_ARG offsetof(struct tlsdesc, arg) +TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count) +TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module) +TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset) +TCBHEAD_DTV offsetof(tcbhead_t, dtv) +DTV_COUNTER offsetof(dtv_t, counter) +TLS_DTV_UNALLOCATED TLS_DTV_UNALLOCATED +TLS_DTV_OFFSET TLS_DTV_OFFSET +SIZE_OF_DTV sizeof(tcbhead_t) +GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap) +HWCAP_LOONGARCH_LSX HWCAP_LOONGARCH_LSX +HWCAP_LOONGARCH_LASX HWCAP_LOONGARCH_LASX diff --git a/sysdeps/loongarch/tst-gnu2-tls2.h b/sysdeps/loongarch/tst-gnu2-tls2.h new file mode 100644 index 0000000000..91b16c0f2e --- /dev/null +++ b/sysdeps/loongarch/tst-gnu2-tls2.h @@ -0,0 +1,357 @@ +/* Test TLSDESC relocation. LoongArch64 version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <string.h> +#include <stdlib.h> +#include <sys/auxv.h> + +/* The instruction between BEFORE_TLSDESC_CALL and _dl_tlsdesc_dynamic, + and the instruction between _dl_tlsdesc_dynamic and AFTER_TLSDESC_CALL, + may modified most of the general-purpose register. */ +#define SAVE_REGISTER(src) \ + asm volatile ("st.d $r3, %0" :"=m"(src) :); + +#ifdef __loongarch_soft_float + +#define BEFORE_TLSDESC_CALL() \ + uint64_t src; \ + SAVE_REGISTER (src); + +#define AFTER_TLSDESC_CALL() \ + uint64_t restore; \ + SAVE_REGISTER (restore); \ + if (src != restore) \ + abort (); + +#else /* hard float */ + +#define SAVE_REGISTER_FCC(src) \ + asm volatile ("movcf2gr $t0, $fcc0" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[0]) :); \ + asm volatile ("movcf2gr $t0, $fcc1" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[1]) :); \ + asm volatile ("movcf2gr $t0, $fcc2" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[2]) :); \ + asm volatile ("movcf2gr $t0, $fcc3" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[3]) :); \ + asm volatile ("movcf2gr $t0, $fcc4" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[4]) :); \ + asm volatile ("movcf2gr $t0, $fcc5" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[5]) :); \ + asm volatile ("movcf2gr $t0, $fcc6" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[6]) :); \ + asm volatile ("movcf2gr $t0, $fcc7" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(src[7]) :); + +#define LOAD_REGISTER_FCSR() \ + asm volatile ("li.d $t0, 0x01010101" ::: "$t0"); \ + asm volatile ("movgr2fcsr $fcsr0, $t0" ::: ); + +#define SAVE_REGISTER_FCSR() \ + asm volatile ("movfcsr2gr $t0, $fcsr0" ::: "$t0"); \ + asm volatile ("st.d $t0, %0" :"=m"(restore_fcsr) :); + +# define INIT_TLSDESC_CALL() \ + unsigned long hwcap = getauxval (AT_HWCAP); + +#define LOAD_REGISTER_FLOAT() \ + asm volatile ("fld.d $f0, %0" ::"m"(src_float[0]) :"$f0"); \ + asm volatile ("fld.d $f1, %0" ::"m"(src_float[1]) :"$f1"); \ + asm volatile ("fld.d $f2, %0" ::"m"(src_float[2]) :"$f2"); \ + asm volatile ("fld.d $f3, %0" ::"m"(src_float[3]) :"$f3"); \ + asm volatile ("fld.d $f4, %0" ::"m"(src_float[4]) :"$f4"); \ + asm volatile ("fld.d $f5, %0" ::"m"(src_float[5]) :"$f5"); \ + asm volatile ("fld.d $f6, %0" ::"m"(src_float[6]) :"$f6"); \ + asm volatile ("fld.d $f7, %0" ::"m"(src_float[7]) :"$f7"); \ + asm volatile ("fld.d $f8, %0" ::"m"(src_float[8]) :"$f8"); \ + asm volatile ("fld.d $f9, %0" ::"m"(src_float[9]) :"$f9"); \ + asm volatile ("fld.d $f10, %0" ::"m"(src_float[10]) :"$f10"); \ + asm volatile ("fld.d $f11, %0" ::"m"(src_float[11]) :"$f11"); \ + asm volatile ("fld.d $f12, %0" ::"m"(src_float[12]) :"$f12"); \ + asm volatile ("fld.d $f13, %0" ::"m"(src_float[13]) :"$f13"); \ + asm volatile ("fld.d $f14, %0" ::"m"(src_float[14]) :"$f14"); \ + asm volatile ("fld.d $f15, %0" ::"m"(src_float[15]) :"$f15"); \ + asm volatile ("fld.d $f16, %0" ::"m"(src_float[16]) :"$f16"); \ + asm volatile ("fld.d $f17, %0" ::"m"(src_float[17]) :"$f17"); \ + asm volatile ("fld.d $f18, %0" ::"m"(src_float[18]) :"$f18"); \ + asm volatile ("fld.d $f19, %0" ::"m"(src_float[19]) :"$f19"); \ + asm volatile ("fld.d $f20, %0" ::"m"(src_float[20]) :"$f20"); \ + asm volatile ("fld.d $f21, %0" ::"m"(src_float[21]) :"$f21"); \ + asm volatile ("fld.d $f22, %0" ::"m"(src_float[22]) :"$f22"); \ + asm volatile ("fld.d $f23, %0" ::"m"(src_float[23]) :"$f23"); \ + asm volatile ("fld.d $f24, %0" ::"m"(src_float[24]) :"$f24"); \ + asm volatile ("fld.d $f25, %0" ::"m"(src_float[25]) :"$f25"); \ + asm volatile ("fld.d $f26, %0" ::"m"(src_float[26]) :"$f26"); \ + asm volatile ("fld.d $f27, %0" ::"m"(src_float[27]) :"$f27"); \ + asm volatile ("fld.d $f28, %0" ::"m"(src_float[28]) :"$f28"); \ + asm volatile ("fld.d $f29, %0" ::"m"(src_float[29]) :"$f29"); \ + asm volatile ("fld.d $f30, %0" ::"m"(src_float[30]) :"$f30"); \ + asm volatile ("fld.d $f31, %0" ::"m"(src_float[31]) :"$f31"); + +#define SAVE_REGISTER_FLOAT() \ + asm volatile ("fst.d $f0, %0" :"=m"(restore_float[0]) :); \ + asm volatile ("fst.d $f1, %0" :"=m"(restore_float[1]) :); \ + asm volatile ("fst.d $f2, %0" :"=m"(restore_float[2]) :); \ + asm volatile ("fst.d $f3, %0" :"=m"(restore_float[3]) :); \ + asm volatile ("fst.d $f4, %0" :"=m"(restore_float[4]) :); \ + asm volatile ("fst.d $f5, %0" :"=m"(restore_float[5]) :); \ + asm volatile ("fst.d $f6, %0" :"=m"(restore_float[6]) :); \ + asm volatile ("fst.d $f7, %0" :"=m"(restore_float[7]) :); \ + asm volatile ("fst.d $f8, %0" :"=m"(restore_float[8]) :); \ + asm volatile ("fst.d $f9, %0" :"=m"(restore_float[9]) :); \ + asm volatile ("fst.d $f10, %0" :"=m"(restore_float[10]) :); \ + asm volatile ("fst.d $f11, %0" :"=m"(restore_float[11]) :); \ + asm volatile ("fst.d $f12, %0" :"=m"(restore_float[12]) :); \ + asm volatile ("fst.d $f13, %0" :"=m"(restore_float[13]) :); \ + asm volatile ("fst.d $f14, %0" :"=m"(restore_float[14]) :); \ + asm volatile ("fst.d $f15, %0" :"=m"(restore_float[15]) :); \ + asm volatile ("fst.d $f16, %0" :"=m"(restore_float[16]) :); \ + asm volatile ("fst.d $f17, %0" :"=m"(restore_float[17]) :); \ + asm volatile ("fst.d $f18, %0" :"=m"(restore_float[18]) :); \ + asm volatile ("fst.d $f19, %0" :"=m"(restore_float[19]) :); \ + asm volatile ("fst.d $f20, %0" :"=m"(restore_float[20]) :); \ + asm volatile ("fst.d $f21, %0" :"=m"(restore_float[21]) :); \ + asm volatile ("fst.d $f22, %0" :"=m"(restore_float[22]) :); \ + asm volatile ("fst.d $f23, %0" :"=m"(restore_float[23]) :); \ + asm volatile ("fst.d $f24, %0" :"=m"(restore_float[24]) :); \ + asm volatile ("fst.d $f25, %0" :"=m"(restore_float[25]) :); \ + asm volatile ("fst.d $f26, %0" :"=m"(restore_float[26]) :); \ + asm volatile ("fst.d $f27, %0" :"=m"(restore_float[27]) :); \ + asm volatile ("fst.d $f28, %0" :"=m"(restore_float[28]) :); \ + asm volatile ("fst.d $f29, %0" :"=m"(restore_float[29]) :); \ + asm volatile ("fst.d $f30, %0" :"=m"(restore_float[30]) :); \ + asm volatile ("fst.d $f31, %0" :"=m"(restore_float[31]) :); + +#define LOAD_REGISTER_LSX() \ + /* Every byte in $vr0 is 1. */ \ + asm volatile ("vldi $vr0, 1" ::: "$vr0"); \ + asm volatile ("vldi $vr1, 2" ::: "$vr1"); \ + asm volatile ("vldi $vr2, 3" ::: "$vr2"); \ + asm volatile ("vldi $vr3, 4" ::: "$vr3"); \ + asm volatile ("vldi $vr4, 5" ::: "$vr4"); \ + asm volatile ("vldi $vr5, 6" ::: "$vr5"); \ + asm volatile ("vldi $vr6, 7" ::: "$vr6"); \ + asm volatile ("vldi $vr7, 8" ::: "$vr7"); \ + asm volatile ("vldi $vr8, 9" ::: "$vr8"); \ + asm volatile ("vldi $vr9, 10" ::: "$vr9"); \ + asm volatile ("vldi $vr10, 11" ::: "$vr10"); \ + asm volatile ("vldi $vr11, 12" ::: "$vr11"); \ + asm volatile ("vldi $vr12, 13" ::: "$vr12"); \ + asm volatile ("vldi $vr13, 14" ::: "$vr13"); \ + asm volatile ("vldi $vr14, 15" ::: "$vr14"); \ + asm volatile ("vldi $vr15, 16" ::: "$vr15"); \ + asm volatile ("vldi $vr16, 17" ::: "$vr16"); \ + asm volatile ("vldi $vr17, 18" ::: "$vr17"); \ + asm volatile ("vldi $vr18, 19" ::: "$vr18"); \ + asm volatile ("vldi $vr19, 20" ::: "$vr19"); \ + asm volatile ("vldi $vr20, 21" ::: "$vr20"); \ + asm volatile ("vldi $vr21, 22" ::: "$vr21"); \ + asm volatile ("vldi $vr22, 23" ::: "$vr22"); \ + asm volatile ("vldi $vr23, 24" ::: "$vr23"); \ + asm volatile ("vldi $vr24, 25" ::: "$vr24"); \ + asm volatile ("vldi $vr25, 26" ::: "$vr25"); \ + asm volatile ("vldi $vr26, 27" ::: "$vr26"); \ + asm volatile ("vldi $vr27, 28" ::: "$vr27"); \ + asm volatile ("vldi $vr28, 29" ::: "$vr28"); \ + asm volatile ("vldi $vr29, 30" ::: "$vr29"); \ + asm volatile ("vldi $vr30, 31" ::: "$vr30"); \ + asm volatile ("vldi $vr31, 32" ::: "$vr31"); + +#define SAVE_REGISTER_LSX() \ + asm volatile ("vst $vr0, %0" :"=m"(restore_lsx[0]) :); \ + asm volatile ("vst $vr1, %0" :"=m"(restore_lsx[1]) :); \ + asm volatile ("vst $vr2, %0" :"=m"(restore_lsx[2]) :); \ + asm volatile ("vst $vr3, %0" :"=m"(restore_lsx[3]) :); \ + asm volatile ("vst $vr4, %0" :"=m"(restore_lsx[4]) :); \ + asm volatile ("vst $vr5, %0" :"=m"(restore_lsx[5]) :); \ + asm volatile ("vst $vr6, %0" :"=m"(restore_lsx[6]) :); \ + asm volatile ("vst $vr7, %0" :"=m"(restore_lsx[7]) :); \ + asm volatile ("vst $vr8, %0" :"=m"(restore_lsx[8]) :); \ + asm volatile ("vst $vr9, %0" :"=m"(restore_lsx[9]) :); \ + asm volatile ("vst $vr10, %0" :"=m"(restore_lsx[10]) :); \ + asm volatile ("vst $vr11, %0" :"=m"(restore_lsx[11]) :); \ + asm volatile ("vst $vr12, %0" :"=m"(restore_lsx[12]) :); \ + asm volatile ("vst $vr13, %0" :"=m"(restore_lsx[13]) :); \ + asm volatile ("vst $vr14, %0" :"=m"(restore_lsx[14]) :); \ + asm volatile ("vst $vr15, %0" :"=m"(restore_lsx[15]) :); \ + asm volatile ("vst $vr16, %0" :"=m"(restore_lsx[16]) :); \ + asm volatile ("vst $vr17, %0" :"=m"(restore_lsx[17]) :); \ + asm volatile ("vst $vr18, %0" :"=m"(restore_lsx[18]) :); \ + asm volatile ("vst $vr19, %0" :"=m"(restore_lsx[19]) :); \ + asm volatile ("vst $vr20, %0" :"=m"(restore_lsx[20]) :); \ + asm volatile ("vst $vr21, %0" :"=m"(restore_lsx[21]) :); \ + asm volatile ("vst $vr22, %0" :"=m"(restore_lsx[22]) :); \ + asm volatile ("vst $vr23, %0" :"=m"(restore_lsx[23]) :); \ + asm volatile ("vst $vr24, %0" :"=m"(restore_lsx[24]) :); \ + asm volatile ("vst $vr25, %0" :"=m"(restore_lsx[25]) :); \ + asm volatile ("vst $vr26, %0" :"=m"(restore_lsx[26]) :); \ + asm volatile ("vst $vr27, %0" :"=m"(restore_lsx[27]) :); \ + asm volatile ("vst $vr28, %0" :"=m"(restore_lsx[28]) :); \ + asm volatile ("vst $vr29, %0" :"=m"(restore_lsx[29]) :); \ + asm volatile ("vst $vr30, %0" :"=m"(restore_lsx[30]) :); \ + asm volatile ("vst $vr31, %0" :"=m"(restore_lsx[31]) :); + +#define LOAD_REGISTER_LASX() \ + /* Every byte in $xr0 is 1. */ \ + asm volatile ("xvldi $xr0, 1" ::: "$xr0"); \ + asm volatile ("xvldi $xr1, 2" ::: "$xr1"); \ + asm volatile ("xvldi $xr2, 3" ::: "$xr2"); \ + asm volatile ("xvldi $xr3, 4" ::: "$xr3"); \ + asm volatile ("xvldi $xr4, 5" ::: "$xr4"); \ + asm volatile ("xvldi $xr5, 6" ::: "$xr5"); \ + asm volatile ("xvldi $xr6, 7" ::: "$xr6"); \ + asm volatile ("xvldi $xr7, 8" ::: "$xr7"); \ + asm volatile ("xvldi $xr8, 9" ::: "$xr8"); \ + asm volatile ("xvldi $xr9, 10" ::: "$xr9"); \ + asm volatile ("xvldi $xr10, 11" ::: "$xr10"); \ + asm volatile ("xvldi $xr11, 12" ::: "$xr11"); \ + asm volatile ("xvldi $xr12, 13" ::: "$xr12"); \ + asm volatile ("xvldi $xr13, 14" ::: "$xr13"); \ + asm volatile ("xvldi $xr14, 15" ::: "$xr14"); \ + asm volatile ("xvldi $xr15, 16" ::: "$xr15"); \ + asm volatile ("xvldi $xr16, 17" ::: "$xr16"); \ + asm volatile ("xvldi $xr17, 18" ::: "$xr17"); \ + asm volatile ("xvldi $xr18, 19" ::: "$xr18"); \ + asm volatile ("xvldi $xr19, 20" ::: "$xr19"); \ + asm volatile ("xvldi $xr20, 21" ::: "$xr20"); \ + asm volatile ("xvldi $xr21, 22" ::: "$xr21"); \ + asm volatile ("xvldi $xr22, 23" ::: "$xr22"); \ + asm volatile ("xvldi $xr23, 24" ::: "$xr23"); \ + asm volatile ("xvldi $xr24, 25" ::: "$xr24"); \ + asm volatile ("xvldi $xr25, 26" ::: "$xr25"); \ + asm volatile ("xvldi $xr26, 27" ::: "$xr26"); \ + asm volatile ("xvldi $xr27, 28" ::: "$xr27"); \ + asm volatile ("xvldi $xr28, 29" ::: "$xr28"); \ + asm volatile ("xvldi $xr29, 30" ::: "$xr29"); \ + asm volatile ("xvldi $xr30, 31" ::: "$xr30"); \ + asm volatile ("xvldi $xr31, 32" ::: "$xr31"); + +#define SAVE_REGISTER_LASX() \ + asm volatile ("xvst $xr0, %0" :"=m"(restore_lasx[0]) :); \ + asm volatile ("xvst $xr1, %0" :"=m"(restore_lasx[1]) :); \ + asm volatile ("xvst $xr2, %0" :"=m"(restore_lasx[2]) :); \ + asm volatile ("xvst $xr3, %0" :"=m"(restore_lasx[3]) :); \ + asm volatile ("xvst $xr4, %0" :"=m"(restore_lasx[4]) :); \ + asm volatile ("xvst $xr5, %0" :"=m"(restore_lasx[5]) :); \ + asm volatile ("xvst $xr6, %0" :"=m"(restore_lasx[6]) :); \ + asm volatile ("xvst $xr7, %0" :"=m"(restore_lasx[7]) :); \ + asm volatile ("xvst $xr8, %0" :"=m"(restore_lasx[8]) :); \ + asm volatile ("xvst $xr9, %0" :"=m"(restore_lasx[9]) :); \ + asm volatile ("xvst $xr10, %0" :"=m"(restore_lasx[10]) :); \ + asm volatile ("xvst $xr11, %0" :"=m"(restore_lasx[11]) :); \ + asm volatile ("xvst $xr12, %0" :"=m"(restore_lasx[12]) :); \ + asm volatile ("xvst $xr13, %0" :"=m"(restore_lasx[13]) :); \ + asm volatile ("xvst $xr14, %0" :"=m"(restore_lasx[14]) :); \ + asm volatile ("xvst $xr15, %0" :"=m"(restore_lasx[15]) :); \ + asm volatile ("xvst $xr16, %0" :"=m"(restore_lasx[16]) :); \ + asm volatile ("xvst $xr17, %0" :"=m"(restore_lasx[17]) :); \ + asm volatile ("xvst $xr18, %0" :"=m"(restore_lasx[18]) :); \ + asm volatile ("xvst $xr19, %0" :"=m"(restore_lasx[19]) :); \ + asm volatile ("xvst $xr20, %0" :"=m"(restore_lasx[20]) :); \ + asm volatile ("xvst $xr21, %0" :"=m"(restore_lasx[21]) :); \ + asm volatile ("xvst $xr22, %0" :"=m"(restore_lasx[22]) :); \ + asm volatile ("xvst $xr23, %0" :"=m"(restore_lasx[23]) :); \ + asm volatile ("xvst $xr24, %0" :"=m"(restore_lasx[24]) :); \ + asm volatile ("xvst $xr25, %0" :"=m"(restore_lasx[25]) :); \ + asm volatile ("xvst $xr26, %0" :"=m"(restore_lasx[26]) :); \ + asm volatile ("xvst $xr27, %0" :"=m"(restore_lasx[27]) :); \ + asm volatile ("xvst $xr28, %0" :"=m"(restore_lasx[28]) :); \ + asm volatile ("xvst $xr29, %0" :"=m"(restore_lasx[29]) :); \ + asm volatile ("xvst $xr30, %0" :"=m"(restore_lasx[30]) :); \ + asm volatile ("xvst $xr31, %0" :"=m"(restore_lasx[31]) :); + +#define BEFORE_TLSDESC_CALL() \ + uint64_t src; \ + double src_float[32]; \ + uint64_t src_fcc[8]; \ + SAVE_REGISTER (src); \ + LOAD_REGISTER_FCSR (); \ + SAVE_REGISTER_FCC(src_fcc) \ + \ + if (hwcap & HWCAP_LOONGARCH_LASX) \ + { \ + LOAD_REGISTER_LASX (); \ + } \ + else if (hwcap & HWCAP_LOONGARCH_LSX) \ + { \ + LOAD_REGISTER_LSX (); \ + } \ + else \ + { \ + for (int i = 0; i < 32; i++) \ + src_float[i] = i + 1; \ + LOAD_REGISTER_FLOAT (); \ + } + +#define AFTER_TLSDESC_CALL() \ + uint64_t restore; \ + uint64_t src_fcsr = 0x01010101; \ + uint64_t restore_fcsr; \ + uint64_t restore_fcc[8]; \ + SAVE_REGISTER (restore); \ + SAVE_REGISTER_FCSR (); \ + SAVE_REGISTER_FCC(restore_fcc) \ + \ + /* memcmp_lasx/strlen_lasx corrupts LSX/LASX registers, */ \ + /* compare LSX/LASX registers first. */ \ + if (hwcap & HWCAP_LOONGARCH_LASX) \ + { \ + int src_lasx[32][8]; \ + int restore_lasx[32][8]; \ + SAVE_REGISTER_LASX (); \ + for (int i = 0; i < 32; i++) \ + for (int j = 0; j < 8; j++) \ + src_lasx[i][j] = 0x01010101 * (i + 1); \ + \ + if (memcmp (src_lasx, restore_lasx, sizeof (src_lasx)) != 0) \ + abort (); \ + } \ + else if (hwcap & HWCAP_LOONGARCH_LSX) \ + { \ + int src_lsx[32][4]; \ + int restore_lsx[32][4]; \ + SAVE_REGISTER_LSX (); \ + for (int i = 0; i < 32; i++) \ + for (int j = 0; j < 4; j++) \ + src_lsx[i][j] = 0x01010101 * (i + 1); \ + \ + if (memcmp (src_lsx, restore_lsx, sizeof (src_lsx)) != 0) \ + abort (); \ + } \ + else \ + { \ + double restore_float[32]; \ + SAVE_REGISTER_FLOAT (); \ + \ + if (memcmp (src_float, restore_float, sizeof (src_float)) != 0) \ + abort (); \ + } \ + \ + if (src_fcsr != restore_fcsr) \ + abort (); \ + \ + if (memcmp (src_fcc, restore_fcc, sizeof (src_fcc)) != 0) \ + abort (); \ + \ + if (src != restore) \ + abort (); + +#endif /* #ifdef __loongarch_soft_float */ + +#include_next <tst-gnu2-tls2.h> + diff --git a/sysdeps/unix/sysv/linux/loongarch/localplt.data b/sysdeps/unix/sysv/linux/loongarch/localplt.data index 547b1c1b7f..ec32e6d13f 100644 --- a/sysdeps/unix/sysv/linux/loongarch/localplt.data +++ b/sysdeps/unix/sysv/linux/loongarch/localplt.data @@ -5,3 +5,5 @@ libc.so: calloc libc.so: free libc.so: malloc libc.so: realloc +# The dynamic loader needs __tls_get_addr for TLS. +ld.so: __tls_get_addr