Message ID | 20230911103401.2485168-1-caiyinyu@loongson.cn |
---|---|
State | New |
Headers | show |
Series | LoongArch: Add lasx/lsx support for _dl_runtime_profile. | expand |
I think this characterize as a ABI break, so it should follow what aarch64 did when we fixed BZ#26643 (ce9a68c57c260c8417afc93972849ac9ad243ec4) and bump LAV_CURRENT (it was not done on this specific commit, but rather on 32612615c58b394c3eb09f020f31310797ad3854 to fix BZ #23734). So add a loongarch link_lavcurrent.h with value of 3 for !__loongarch_soft_float. On 11/09/23 07:34, caiyinyu wrote: > --- > sysdeps/loongarch/bits/link.h | 24 ++- > sysdeps/loongarch/dl-link.sym | 8 +- > sysdeps/loongarch/dl-machine.h | 11 +- > sysdeps/loongarch/dl-trampoline.S | 175 +-------------------- > sysdeps/loongarch/dl-trampoline.h | 242 ++++++++++++++++++++++++++++++ > 5 files changed, 283 insertions(+), 177 deletions(-) > > diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h > index 7fa6131280..00f6f25f2d 100644 > --- a/sysdeps/loongarch/bits/link.h > +++ b/sysdeps/loongarch/bits/link.h > @@ -20,10 +20,26 @@ > #error "Never include <bits/link.h> directly; use <link.h> instead." > #endif > > +#ifndef __loongarch_soft_float > +typedef float La_loongarch_vr > + __attribute__ ((__vector_size__ (16), __aligned__ (16))); > +typedef float La_loongarch_xr > + __attribute__ ((__vector_size__ (32), __aligned__ (16))); > + > +typedef union > +{ > + double fpreg[4]; > + La_loongarch_vr vr[2]; > + La_loongarch_xr xr[1]; > +} La_loongarch_vector __attribute__ ((__aligned__ (16))); > +#endif > + > typedef struct La_loongarch_regs > { > unsigned long int lr_reg[8]; /* a0 - a7 */ > - double lr_fpreg[8]; /* fa0 - fa7 */ > +#ifndef __loongarch_soft_float > + La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/ > +#endif > unsigned long int lr_ra; > unsigned long int lr_sp; > } La_loongarch_regs; > @@ -33,8 +49,10 @@ typedef struct La_loongarch_retval > { > unsigned long int lrv_a0; > unsigned long int lrv_a1; > - double lrv_fa0; > - double lrv_fa1; > +#ifndef __loongarch_soft_float > + La_loongarch_vector lrv_vec0; > + La_loongarch_vector lrv_vec1; > +#endif > } La_loongarch_retval; > > __BEGIN_DECLS > diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym > index 868ab7c6eb..b534968e30 100644 > --- a/sysdeps/loongarch/dl-link.sym > +++ b/sysdeps/loongarch/dl-link.sym > @@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs) > DL_SIZEOF_RV sizeof(struct La_loongarch_retval) > > DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg) > -DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg) > +#ifndef __loongarch_soft_float > +DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec) > +#endif > DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra) > DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp) > > DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0) > -DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1) > +#ifndef __loongarch_soft_float > +DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0) > +#endif > diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h > index 066bb233ac..8a2db9de3c 100644 > --- a/sysdeps/loongarch/dl-machine.h > +++ b/sysdeps/loongarch/dl-machine.h > @@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], > #if !defined __loongarch_soft_float > extern void _dl_runtime_resolve_lasx (void) attribute_hidden; > extern void _dl_runtime_resolve_lsx (void) attribute_hidden; > + extern void _dl_runtime_profile_lasx (void) attribute_hidden; > + extern void _dl_runtime_profile_lsx (void) attribute_hidden; > #endif > extern void _dl_runtime_resolve (void) attribute_hidden; > extern void _dl_runtime_profile (void) attribute_hidden; > @@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], > end in this function. */ > if (profile != 0) > { > - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; > +#if !defined __loongarch_soft_float > + if (SUPPORT_LASX) > + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; > + else if (SUPPORT_LSX) > + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; > + else > +#endif > + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; > > if (GLRO(dl_profile) != NULL > && _dl_name_match_p (GLRO(dl_profile), l)) > diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S > index 8fd9146978..dce1c2f122 100644 > --- a/sysdeps/loongarch/dl-trampoline.S > +++ b/sysdeps/loongarch/dl-trampoline.S > @@ -22,190 +22,23 @@ > #if !defined __loongarch_soft_float > #define USE_LASX > #define _dl_runtime_resolve _dl_runtime_resolve_lasx > +#define _dl_runtime_profile _dl_runtime_profile_lasx > #include "dl-trampoline.h" > #undef FRAME_SIZE > #undef USE_LASX > #undef _dl_runtime_resolve > +#undef _dl_runtime_profile > > #define USE_LSX > #define _dl_runtime_resolve _dl_runtime_resolve_lsx > +#define _dl_runtime_profile _dl_runtime_profile_lsx > #include "dl-trampoline.h" > #undef FRAME_SIZE > #undef USE_LSX > #undef _dl_runtime_resolve > +#undef _dl_runtime_profile > #endif > > #include "dl-trampoline.h" > > -#include "dl-link.h" > > -ENTRY (_dl_runtime_profile) > - /* LoongArch we get called with: > - t0 linkr_map pointer > - t1 the scaled offset stored in t0, which can be used > - to calculate the offset of the current symbol in .rela.plt > - t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function > - t3 dl resolver entry point, no use in this function > - > - Stack frame layout: > - [sp, #96] La_loongarch_regs > - [sp, #48] La_loongarch_retval > - [sp, #40] frame size return from pltenter > - [sp, #32] dl_profile_call saved a1 > - [sp, #24] dl_profile_call saved a0 > - [sp, #16] T1 > - [sp, #0] ra, fp <- fp > - */ > - > -# define OFFSET_T1 16 > -# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 > -# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 > -# define OFFSET_RV OFFSET_FS + 8 > -# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV > - > -# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) > - > - /* Save arguments to stack. */ > - ADDI sp, sp, -SF_SIZE > - REG_S ra, sp, 0 > - REG_S fp, sp, 8 > - > - or fp, sp, zero > - > - REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > - REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > - REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > - REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > - REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > - REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > - REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > - REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > - > -#ifndef __loongarch_soft_float > - FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG > - FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG > - FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG > - FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG > - FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG > - FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG > - FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG > - FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG > -#endif > - > - /* Update .got.plt and obtain runtime address of callee. */ > - SLLI a1, t1, 1 > - or a0, t0, zero > - ADD a1, a1, t1 > - or a2, ra, zero /* return addr */ > - ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ > - ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ > - > - REG_S a0, fp, OFFSET_SAVED_CALL_A0 > - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > - > - la t2, _dl_profile_fixup > - jirl ra, t2, 0 > - > - REG_L t3, fp, OFFSET_FS > - bge t3, zero, 1f > - > - /* Save the return. */ > - or t4, v0, zero > - > - /* Restore arguments from stack. */ > - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > - > -#ifndef __loongarch_soft_float > - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG > - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG > - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG > - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG > - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG > - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG > - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG > - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG > -#endif > - > - REG_L ra, fp, 0 > - REG_L fp, fp, SZREG > - > - ADDI sp, sp, SF_SIZE > - jirl zero, t4, 0 > - > -1: > - /* The new frame size is in t3. */ > - SUB sp, fp, t3 > - BSTRINS sp, zero, 3, 0 > - > - REG_S a0, fp, OFFSET_T1 > - > - or a0, sp, zero > - ADDI a1, fp, SF_SIZE > - or a2, t3, zero > - la t5, memcpy > - jirl ra, t5, 0 > - > - REG_L t6, fp, OFFSET_T1 > - > - /* Call the function. */ > - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > - > -#ifndef __loongarch_soft_float > - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG > - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG > - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG > - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG > - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG > - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG > - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG > - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG > -#endif > - jirl ra, t6, 0 > - > - REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 > - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG > - > -#ifndef __loongarch_soft_float > - FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 > - FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG > -#endif > - > - /* Setup call to pltexit. */ > - REG_L a0, fp, OFFSET_SAVED_CALL_A0 > - REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > - ADDI a2, fp, OFFSET_RG > - ADDI a3, fp, OFFSET_RV > - la t7, _dl_audit_pltexit > - jirl ra, t7, 0 > - > - REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 > - REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG > - > -#ifndef __loongarch_soft_float > - FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0 > - FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG > -#endif > - > - /* RA from within La_loongarch_reg. */ > - REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA > - or sp, fp, zero > - ADDI sp, sp, SF_SIZE > - REG_S fp, fp, SZREG > - > - jirl zero, ra, 0 > - > -END (_dl_runtime_profile) > diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h > index 02375286f8..cb4a287c65 100644 > --- a/sysdeps/loongarch/dl-trampoline.h > +++ b/sysdeps/loongarch/dl-trampoline.h > @@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve) > /* Invoke the callee. */ > jirl zero, t1, 0 > END (_dl_runtime_resolve) > + > +#include "dl-link.h" > + > +ENTRY (_dl_runtime_profile) > + /* LoongArch we get called with: > + t0 linkr_map pointer > + t1 the scaled offset stored in t0, which can be used > + to calculate the offset of the current symbol in .rela.plt > + t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function > + t3 dl resolver entry point, no use in this function > + > + Stack frame layout: > + [sp, #208] La_loongarch_regs > + [sp, #128] La_loongarch_retval // align: 16 > + [sp, #112] frame size return from pltenter > + [sp, #80 ] dl_profile_call saved vec1 > + [sp, #48 ] dl_profile_call saved vec0 // align: 16 > + [sp, #32 ] dl_profile_call saved a1 > + [sp, #24 ] dl_profile_call saved a0 > + [sp, #16 ] T1 > + [sp, #0 ] ra, fp <- fp > + */ > + > +# define OFFSET_T1 16 > +# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 > +# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64 > +# define OFFSET_RV OFFSET_FS + 8 + 8 > +# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV > + > +# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) > + > + /* Save arguments to stack. */ > + ADDI sp, sp, -SF_SIZE > + REG_S ra, sp, 0 > + REG_S fp, sp, 8 > + > + or fp, sp, zero > + > + REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > + REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > + REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > + REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > + REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > + REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > + REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > + REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > + > +#ifdef USE_LASX > + xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG > + xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG > + xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG > + xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG > + xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG > + xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG > + xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG > + xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG > +#elif defined USE_LSX > + vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG > + vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG > + vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG > + vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG > + vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG > + vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG > + vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG > + vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG > + FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG > + FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG > + FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG > + FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG > + FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG > + FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG > + FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG > +#endif > + > + /* Update .got.plt and obtain runtime address of callee. */ > + SLLI a1, t1, 1 > + or a0, t0, zero > + ADD a1, a1, t1 > + or a2, ra, zero /* return addr */ > + ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ > + ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ > + > + REG_S a0, fp, OFFSET_SAVED_CALL_A0 > + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > + > + la t2, _dl_profile_fixup > + jirl ra, t2, 0 > + > + REG_L t3, fp, OFFSET_FS > + bge t3, zero, 1f > + > + /* Save the return. */ > + or t4, v0, zero > + > + /* Restore arguments from stack. */ > + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > + > +#ifdef USE_LASX > + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG > + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG > + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG > + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG > + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG > + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG > + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG > + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG > +#elif defined USE_LSX > + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG > + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG > + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG > + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG > + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG > + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG > + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG > + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG > + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG > + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG > + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG > + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG > + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG > + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG > + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG > +#endif > + > + REG_L ra, fp, 0 > + REG_L fp, fp, SZREG > + > + ADDI sp, sp, SF_SIZE > + jirl zero, t4, 0 > + > +1: > + /* The new frame size is in t3. */ > + SUB sp, fp, t3 > + BSTRINS sp, zero, 3, 0 > + > + REG_S a0, fp, OFFSET_T1 > + > + or a0, sp, zero > + ADDI a1, fp, SF_SIZE > + or a2, t3, zero > + la t5, memcpy > + jirl ra, t5, 0 > + > + REG_L t6, fp, OFFSET_T1 > + > + /* Call the function. */ > + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG > + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG > + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG > + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG > + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG > + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG > + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG > + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG > + > +#ifdef USE_LASX > + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG > + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG > + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG > + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG > + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG > + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG > + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG > + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG > +#elif defined USE_LSX > + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG > + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG > + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG > + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG > + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG > + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG > + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG > + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG > +#elif !defined __loongarch_soft_float > + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG > + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG > + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG > + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG > + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG > + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG > + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG > + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG > +#endif > + > + jirl ra, t6, 0 > + > + REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 > + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG > + > +#ifdef USE_LASX > + xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG > +#elif defined USE_LSX > + vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG > +#elif !defined __loongarch_soft_float > + FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG > +#endif > + > + /* Setup call to pltexit. */ > + REG_L a0, fp, OFFSET_SAVED_CALL_A0 > + REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG > + ADDI a2, fp, OFFSET_RG > + ADDI a3, fp, OFFSET_RV > + la t7, _dl_audit_pltexit > + jirl ra, t7, 0 > + > + REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 > + REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG > + > +#ifdef USE_LASX > + xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG > +#elif defined USE_LSX > + vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG > +#elif !defined __loongarch_soft_float > + FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 > + FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG > +#endif > + > + /* RA from within La_loongarch_reg. */ > + REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA > + or sp, fp, zero > + ADDI sp, sp, SF_SIZE > + REG_S fp, fp, SZREG > + > + jirl zero, ra, 0 > + > +END (_dl_runtime_profile)
diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h index 7fa6131280..00f6f25f2d 100644 --- a/sysdeps/loongarch/bits/link.h +++ b/sysdeps/loongarch/bits/link.h @@ -20,10 +20,26 @@ #error "Never include <bits/link.h> directly; use <link.h> instead." #endif +#ifndef __loongarch_soft_float +typedef float La_loongarch_vr + __attribute__ ((__vector_size__ (16), __aligned__ (16))); +typedef float La_loongarch_xr + __attribute__ ((__vector_size__ (32), __aligned__ (16))); + +typedef union +{ + double fpreg[4]; + La_loongarch_vr vr[2]; + La_loongarch_xr xr[1]; +} La_loongarch_vector __attribute__ ((__aligned__ (16))); +#endif + typedef struct La_loongarch_regs { unsigned long int lr_reg[8]; /* a0 - a7 */ - double lr_fpreg[8]; /* fa0 - fa7 */ +#ifndef __loongarch_soft_float + La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/ +#endif unsigned long int lr_ra; unsigned long int lr_sp; } La_loongarch_regs; @@ -33,8 +49,10 @@ typedef struct La_loongarch_retval { unsigned long int lrv_a0; unsigned long int lrv_a1; - double lrv_fa0; - double lrv_fa1; +#ifndef __loongarch_soft_float + La_loongarch_vector lrv_vec0; + La_loongarch_vector lrv_vec1; +#endif } La_loongarch_retval; __BEGIN_DECLS diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym index 868ab7c6eb..b534968e30 100644 --- a/sysdeps/loongarch/dl-link.sym +++ b/sysdeps/loongarch/dl-link.sym @@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs) DL_SIZEOF_RV sizeof(struct La_loongarch_retval) DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg) -DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg) +#ifndef __loongarch_soft_float +DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec) +#endif DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra) DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp) DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0) -DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1) +#ifndef __loongarch_soft_float +DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0) +#endif diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h index 066bb233ac..8a2db9de3c 100644 --- a/sysdeps/loongarch/dl-machine.h +++ b/sysdeps/loongarch/dl-machine.h @@ -273,6 +273,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], #if !defined __loongarch_soft_float extern void _dl_runtime_resolve_lasx (void) attribute_hidden; extern void _dl_runtime_resolve_lsx (void) attribute_hidden; + extern void _dl_runtime_profile_lasx (void) attribute_hidden; + extern void _dl_runtime_profile_lsx (void) attribute_hidden; #endif extern void _dl_runtime_resolve (void) attribute_hidden; extern void _dl_runtime_profile (void) attribute_hidden; @@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], end in this function. */ if (profile != 0) { - gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; +#if !defined __loongarch_soft_float + if (SUPPORT_LASX) + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; + else if (SUPPORT_LSX) + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; + else +#endif + gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; if (GLRO(dl_profile) != NULL && _dl_name_match_p (GLRO(dl_profile), l)) diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S index 8fd9146978..dce1c2f122 100644 --- a/sysdeps/loongarch/dl-trampoline.S +++ b/sysdeps/loongarch/dl-trampoline.S @@ -22,190 +22,23 @@ #if !defined __loongarch_soft_float #define USE_LASX #define _dl_runtime_resolve _dl_runtime_resolve_lasx +#define _dl_runtime_profile _dl_runtime_profile_lasx #include "dl-trampoline.h" #undef FRAME_SIZE #undef USE_LASX #undef _dl_runtime_resolve +#undef _dl_runtime_profile #define USE_LSX #define _dl_runtime_resolve _dl_runtime_resolve_lsx +#define _dl_runtime_profile _dl_runtime_profile_lsx #include "dl-trampoline.h" #undef FRAME_SIZE #undef USE_LSX #undef _dl_runtime_resolve +#undef _dl_runtime_profile #endif #include "dl-trampoline.h" -#include "dl-link.h" -ENTRY (_dl_runtime_profile) - /* LoongArch we get called with: - t0 linkr_map pointer - t1 the scaled offset stored in t0, which can be used - to calculate the offset of the current symbol in .rela.plt - t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function - t3 dl resolver entry point, no use in this function - - Stack frame layout: - [sp, #96] La_loongarch_regs - [sp, #48] La_loongarch_retval - [sp, #40] frame size return from pltenter - [sp, #32] dl_profile_call saved a1 - [sp, #24] dl_profile_call saved a0 - [sp, #16] T1 - [sp, #0] ra, fp <- fp - */ - -# define OFFSET_T1 16 -# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 -# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 -# define OFFSET_RV OFFSET_FS + 8 -# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV - -# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) - - /* Save arguments to stack. */ - ADDI sp, sp, -SF_SIZE - REG_S ra, sp, 0 - REG_S fp, sp, 8 - - or fp, sp, zero - - REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG - REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG - REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG - REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG - REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG - REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG - REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG - REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG - -#ifndef __loongarch_soft_float - FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG - FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG - FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG - FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG - FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG - FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG - FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG - FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG -#endif - - /* Update .got.plt and obtain runtime address of callee. */ - SLLI a1, t1, 1 - or a0, t0, zero - ADD a1, a1, t1 - or a2, ra, zero /* return addr */ - ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ - ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ - - REG_S a0, fp, OFFSET_SAVED_CALL_A0 - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG - - la t2, _dl_profile_fixup - jirl ra, t2, 0 - - REG_L t3, fp, OFFSET_FS - bge t3, zero, 1f - - /* Save the return. */ - or t4, v0, zero - - /* Restore arguments from stack. */ - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG - -#ifndef __loongarch_soft_float - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG -#endif - - REG_L ra, fp, 0 - REG_L fp, fp, SZREG - - ADDI sp, sp, SF_SIZE - jirl zero, t4, 0 - -1: - /* The new frame size is in t3. */ - SUB sp, fp, t3 - BSTRINS sp, zero, 3, 0 - - REG_S a0, fp, OFFSET_T1 - - or a0, sp, zero - ADDI a1, fp, SF_SIZE - or a2, t3, zero - la t5, memcpy - jirl ra, t5, 0 - - REG_L t6, fp, OFFSET_T1 - - /* Call the function. */ - REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG - REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG - REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG - REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG - REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG - REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG - REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG - REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG - -#ifndef __loongarch_soft_float - FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG - FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG - FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG - FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG - FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG - FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG - FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG - FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG -#endif - jirl ra, t6, 0 - - REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 - REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG - -#ifndef __loongarch_soft_float - FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 - FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG -#endif - - /* Setup call to pltexit. */ - REG_L a0, fp, OFFSET_SAVED_CALL_A0 - REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG - ADDI a2, fp, OFFSET_RG - ADDI a3, fp, OFFSET_RV - la t7, _dl_audit_pltexit - jirl ra, t7, 0 - - REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 - REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG - -#ifndef __loongarch_soft_float - FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0 - FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG -#endif - - /* RA from within La_loongarch_reg. */ - REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA - or sp, fp, zero - ADDI sp, sp, SF_SIZE - REG_S fp, fp, SZREG - - jirl zero, ra, 0 - -END (_dl_runtime_profile) diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h index 02375286f8..cb4a287c65 100644 --- a/sysdeps/loongarch/dl-trampoline.h +++ b/sysdeps/loongarch/dl-trampoline.h @@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve) /* Invoke the callee. */ jirl zero, t1, 0 END (_dl_runtime_resolve) + +#include "dl-link.h" + +ENTRY (_dl_runtime_profile) + /* LoongArch we get called with: + t0 linkr_map pointer + t1 the scaled offset stored in t0, which can be used + to calculate the offset of the current symbol in .rela.plt + t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function + t3 dl resolver entry point, no use in this function + + Stack frame layout: + [sp, #208] La_loongarch_regs + [sp, #128] La_loongarch_retval // align: 16 + [sp, #112] frame size return from pltenter + [sp, #80 ] dl_profile_call saved vec1 + [sp, #48 ] dl_profile_call saved vec0 // align: 16 + [sp, #32 ] dl_profile_call saved a1 + [sp, #24 ] dl_profile_call saved a0 + [sp, #16 ] T1 + [sp, #0 ] ra, fp <- fp + */ + +# define OFFSET_T1 16 +# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 +# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64 +# define OFFSET_RV OFFSET_FS + 8 + 8 +# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV + +# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) + + /* Save arguments to stack. */ + ADDI sp, sp, -SF_SIZE + REG_S ra, sp, 0 + REG_S fp, sp, 8 + + or fp, sp, zero + + REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG + REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG + REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG + REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG + REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG + REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG + REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG + REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG + +#ifdef USE_LASX + xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG + xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG + xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG + xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG + xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG + xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG + xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG + xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG +#elif defined USE_LSX + vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG + vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG + vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG + vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG + vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG + vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG + vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG + vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG +#elif !defined __loongarch_soft_float + FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG + FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG + FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG + FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG + FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG + FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG + FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG + FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG +#endif + + /* Update .got.plt and obtain runtime address of callee. */ + SLLI a1, t1, 1 + or a0, t0, zero + ADD a1, a1, t1 + or a2, ra, zero /* return addr */ + ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ + ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ + + REG_S a0, fp, OFFSET_SAVED_CALL_A0 + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG + + la t2, _dl_profile_fixup + jirl ra, t2, 0 + + REG_L t3, fp, OFFSET_FS + bge t3, zero, 1f + + /* Save the return. */ + or t4, v0, zero + + /* Restore arguments from stack. */ + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG + +#ifdef USE_LASX + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG +#elif defined USE_LSX + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG +#elif !defined __loongarch_soft_float + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG +#endif + + REG_L ra, fp, 0 + REG_L fp, fp, SZREG + + ADDI sp, sp, SF_SIZE + jirl zero, t4, 0 + +1: + /* The new frame size is in t3. */ + SUB sp, fp, t3 + BSTRINS sp, zero, 3, 0 + + REG_S a0, fp, OFFSET_T1 + + or a0, sp, zero + ADDI a1, fp, SF_SIZE + or a2, t3, zero + la t5, memcpy + jirl ra, t5, 0 + + REG_L t6, fp, OFFSET_T1 + + /* Call the function. */ + REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG + REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG + REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG + REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG + REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG + REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG + REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG + REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG + +#ifdef USE_LASX + xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG + xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG + xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG + xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG + xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG + xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG + xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG + xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG +#elif defined USE_LSX + vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG + vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG + vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG + vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG + vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG + vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG + vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG + vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG +#elif !defined __loongarch_soft_float + FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG + FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG + FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG + FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG + FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG + FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG + FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG + FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG +#endif + + jirl ra, t6, 0 + + REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG + +#ifdef USE_LASX + xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG +#elif defined USE_LSX + vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG +#elif !defined __loongarch_soft_float + FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG +#endif + + /* Setup call to pltexit. */ + REG_L a0, fp, OFFSET_SAVED_CALL_A0 + REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG + ADDI a2, fp, OFFSET_RG + ADDI a3, fp, OFFSET_RV + la t7, _dl_audit_pltexit + jirl ra, t7, 0 + + REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 + REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG + +#ifdef USE_LASX + xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG +#elif defined USE_LSX + vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG +#elif !defined __loongarch_soft_float + FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG +#endif + + /* RA from within La_loongarch_reg. */ + REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA + or sp, fp, zero + ADDI sp, sp, SF_SIZE + REG_S fp, fp, SZREG + + jirl zero, ra, 0 + +END (_dl_runtime_profile)