diff mbox series

[v1] LoongArch: Add cfi pseudo instructions to _dl_tlsdes_dynamic

Message ID 20240607081444.4054046-1-mengqinggang@loongson.cn
State New
Headers show
Series [v1] LoongArch: Add cfi pseudo instructions to _dl_tlsdes_dynamic | expand

Commit Message

mengqinggang June 7, 2024, 8:14 a.m. UTC
Add cfi pseudo instructions to _dl_tlsdes_dynamic. But it only works
on LASX situation. I don't know how to write cfi pseudo instructions for
LSX part and Float part. The problem is that cfi instructions are always
executed in stack unwinding, resulting in incorrect CFA address.

Save FCSR to 8-byte aligned address.
---
 sysdeps/loongarch/dl-tlsdesc.S | 46 ++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 5 deletions(-)

Comments

Jinyang He June 12, 2024, 10:02 a.m. UTC | #1
On 2024-06-07 16:14, mengqinggang wrote:

> Add cfi pseudo instructions to _dl_tlsdes_dynamic. But it only works
> on LASX situation. I don't know how to write cfi pseudo instructions for
> LSX part and Float part. The problem is that cfi instructions are always
> executed in stack unwinding, resulting in incorrect CFA address.
>
> Save FCSR to 8-byte aligned address.
> ---
>   sysdeps/loongarch/dl-tlsdesc.S | 46 ++++++++++++++++++++++++++++++----
>   1 file changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
> index 15d5fa1c42..35f028c53e 100644
> --- a/sysdeps/loongarch/dl-tlsdesc.S
> +++ b/sysdeps/loongarch/dl-tlsdesc.S
> @@ -81,7 +81,7 @@ _dl_tlsdesc_undefweak:
>   	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
>   	   {
>   	     struct tlsdesc_dynamic_arg *td = tdp->arg;
> -	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - TCBHEAD_DTV);
> +	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
>   	     if (__glibc_likely (td->gen_count <= dtv[0].counter
>   		&& (dtv[td->tlsinfo.ti_module].pointer.val
>   		    != TLS_DTV_UNALLOCATED),
> @@ -101,9 +101,13 @@ _dl_tlsdesc_dynamic:
>   	/* Save just enough registers to support fast path, if we fall
>   	   into slow path we will save additional registers.  */
>   	ADDI	sp, sp, -24
> +	cfi_adjust_cfa_offset (24)
As [1] point out that the stack should be 16bytes aligment, I think it 
may need some fix.
[1] 
https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#the-stack

Thanks,
Jinyang
>   	REG_S	t0, sp, 0
>   	REG_S	t1, sp, 8
>   	REG_S	t2, sp, 16
> +	cfi_rel_offset (12, 0)
> +	cfi_rel_offset (13, 8)
> +	cfi_rel_offset (14, 16)
>   
>   /* Runtime Storage Layout of Thread-Local Storage
>      TP point to the start of TLS block.
> @@ -133,6 +137,7 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	   goto slow path.  */
>   	beq	t1, t2, .Lslow
>   
> +	cfi_remember_state
>   	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
>   	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
>   	add.d	a0, t1, t2
> @@ -142,6 +147,7 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	REG_L	t1, sp, 8
>   	REG_L	t2, sp, 16
>   	ADDI	sp, sp, 24
> +	cfi_adjust_cfa_offset (-24)
>   	RET
>   
>   .Lslow:
> @@ -150,7 +156,9 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	   callee will trash.  */
>   
>   	/* Save the remaining registers that we must treat as caller save.  */
> +	cfi_restore_state
>   	ADDI	sp, sp, -FRAME_SIZE
> +	cfi_adjust_cfa_offset (FRAME_SIZE)
>   	REG_S	ra, sp, 0 * SZREG
>   	REG_S	a1, sp, 1 * SZREG
>   	REG_S	a2, sp, 2 * SZREG
> @@ -165,15 +173,30 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	REG_S	t6, sp, 11 * SZREG
>   	REG_S	t7, sp, 12 * SZREG
>   	REG_S	t8, sp, 13 * SZREG
> +	cfi_rel_offset (1, 0 * SZREG)
> +	cfi_rel_offset (5, 1 * SZREG)
> +	cfi_rel_offset (6, 2 * SZREG)
> +	cfi_rel_offset (7, 3 * SZREG)
> +	cfi_rel_offset (8, 4 * SZREG)
> +	cfi_rel_offset (9, 5 * SZREG)
> +	cfi_rel_offset (10, 6 * SZREG)
> +	cfi_rel_offset (11, 7 * SZREG)
> +	cfi_rel_offset (15, 8 * SZREG)
> +	cfi_rel_offset (16, 9 * SZREG)
> +	cfi_rel_offset (17, 10 * SZREG)
> +	cfi_rel_offset (18, 11 * SZREG)
> +	cfi_rel_offset (19, 12 * SZREG)
> +	cfi_rel_offset (20, 13 * SZREG)
>   
>   #ifndef __loongarch_soft_float
>   
>   	/* Save fcsr0 register.
>   	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
>   	   of some fields in fcsr0.  */
> -	ADDI	sp, sp, -SZFCSREG
> +	ADDI	sp, sp, -SZREG
> +	cfi_adjust_cfa_offset (SZREG)
>   	movfcsr2gr  t0, fcsr0
> -	st.w	t0, sp, 0
> +	st.d	t0, sp, 0
>   
>   	/* Whether support LASX.  */
>   	la.global   t0, _rtld_global_ro
> @@ -184,6 +207,7 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	/* Save 256-bit vector registers.
>   	   FIXME: Without vector ABI, save all vector registers.  */
>   	ADDI	sp, sp, -FRAME_SIZE_LASX
> +	cfi_adjust_cfa_offset (FRAME_SIZE_LASX)
>   	xvst	xr0, sp, 0*SZXREG
>   	xvst	xr1, sp, 1*SZXREG
>   	xvst	xr2, sp, 2*SZXREG
> @@ -225,6 +249,8 @@ Hign address	dynamic_block1 <----- dtv5  */
>   
>   	/* Save 128-bit vector registers.  */
>   	ADDI	sp, sp, -FRAME_SIZE_LSX
> +	# FIXME: cfi
> +	# cfi_adjust_cfa_offset (FRAME_SIZE_LSX)
>   	vst	vr0, sp, 0*SZVREG
>   	vst	vr1, sp, 1*SZVREG
>   	vst	vr2, sp, 2*SZVREG
> @@ -262,6 +288,8 @@ Hign address	dynamic_block1 <----- dtv5  */
>   .Lfloat:
>   	/* Save float registers.  */
>   	ADDI	sp, sp, -FRAME_SIZE_FLOAT
> +	# FIXME: cfi
> +	# cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT)
>   	FREG_S	fa0, sp, 0*SZFREG
>   	FREG_S	fa1, sp, 1*SZFREG
>   	FREG_S	fa2, sp, 2*SZFREG
> @@ -334,6 +362,7 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	xvld	xr30, sp, 30*SZXREG
>   	xvld	xr31, sp, 31*SZXREG
>   	ADDI	sp, sp, FRAME_SIZE_LASX
> +	cfi_adjust_cfa_offset (-FRAME_SIZE_LASX)
>   	b .Lfcsr
>   
>   .Llsx1:
> @@ -374,6 +403,8 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	vld	vr30, sp, 30*SZVREG
>   	vld	vr31, sp, 31*SZVREG
>   	ADDI	sp, sp, FRAME_SIZE_LSX
> +	# FIXME: cfi
> +	# cfi_adjust_cfa_offset (-FRAME_SIZE_LSX)
>   	b	    .Lfcsr
>   
>   .Lfloat1:
> @@ -403,12 +434,15 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	FREG_L	ft14, sp, 22*SZFREG
>   	FREG_L	ft15, sp, 23*SZFREG
>   	ADDI	sp, sp, FRAME_SIZE_FLOAT
> +	# FIXME: cfi
> +	# cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT)
>   
>   .Lfcsr:
>   	/* Restore fcsr0 register.  */
> -	ld.w	t0, sp, 0
> +	ld.d	t0, sp, 0
>   	movgr2fcsr  fcsr0, t0
> -	ADDI	sp, sp, SZFCSREG
> +	ADDI	sp, sp, SZREG
> +	cfi_adjust_cfa_offset (-SZREG)
>   
>   #endif /* #ifndef __loongarch_soft_float */
>   
> @@ -427,6 +461,8 @@ Hign address	dynamic_block1 <----- dtv5  */
>   	REG_L	t7, sp, 12 * SZREG
>   	REG_L	t8, sp, 13 * SZREG
>   	ADDI	sp, sp, FRAME_SIZE
> +	cfi_adjust_cfa_offset (-FRAME_SIZE)
> +	cfi_restore (1)
>   
>   	b	.Lret
>   	cfi_endproc
Xi Ruoyao June 13, 2024, 1:30 p.m. UTC | #2
On Wed, 2024-06-12 at 18:02 +0800, Jinyang He wrote:
> On 2024-06-07 16:14, mengqinggang wrote:

> >   	ADDI	sp, sp, -24
> > +	cfi_adjust_cfa_offset (24)
> As [1] point out that the stack should be 16bytes aligment, I think it
> may need some fix.
> [1] 
> https://github.com/loongson/la-abi-specs/blob/release/lapcs.adoc#the-stack

https://sourceware.org/pipermail/libc-alpha/2024-June/157444.html
diff mbox series

Patch

diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
index 15d5fa1c42..35f028c53e 100644
--- a/sysdeps/loongarch/dl-tlsdesc.S
+++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -81,7 +81,7 @@  _dl_tlsdesc_undefweak:
 	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
 	   {
 	     struct tlsdesc_dynamic_arg *td = tdp->arg;
-	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - TCBHEAD_DTV);
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
 	     if (__glibc_likely (td->gen_count <= dtv[0].counter
 		&& (dtv[td->tlsinfo.ti_module].pointer.val
 		    != TLS_DTV_UNALLOCATED),
@@ -101,9 +101,13 @@  _dl_tlsdesc_dynamic:
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
 	ADDI	sp, sp, -24
+	cfi_adjust_cfa_offset (24)
 	REG_S	t0, sp, 0
 	REG_S	t1, sp, 8
 	REG_S	t2, sp, 16
+	cfi_rel_offset (12, 0)
+	cfi_rel_offset (13, 8)
+	cfi_rel_offset (14, 16)
 
 /* Runtime Storage Layout of Thread-Local Storage
    TP point to the start of TLS block.
@@ -133,6 +137,7 @@  Hign address	dynamic_block1 <----- dtv5  */
 	   goto slow path.  */
 	beq	t1, t2, .Lslow
 
+	cfi_remember_state
 	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
 	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
 	add.d	a0, t1, t2
@@ -142,6 +147,7 @@  Hign address	dynamic_block1 <----- dtv5  */
 	REG_L	t1, sp, 8
 	REG_L	t2, sp, 16
 	ADDI	sp, sp, 24
+	cfi_adjust_cfa_offset (-24)
 	RET
 
 .Lslow:
@@ -150,7 +156,9 @@  Hign address	dynamic_block1 <----- dtv5  */
 	   callee will trash.  */
 
 	/* Save the remaining registers that we must treat as caller save.  */
+	cfi_restore_state
 	ADDI	sp, sp, -FRAME_SIZE
+	cfi_adjust_cfa_offset (FRAME_SIZE)
 	REG_S	ra, sp, 0 * SZREG
 	REG_S	a1, sp, 1 * SZREG
 	REG_S	a2, sp, 2 * SZREG
@@ -165,15 +173,30 @@  Hign address	dynamic_block1 <----- dtv5  */
 	REG_S	t6, sp, 11 * SZREG
 	REG_S	t7, sp, 12 * SZREG
 	REG_S	t8, sp, 13 * SZREG
+	cfi_rel_offset (1, 0 * SZREG)
+	cfi_rel_offset (5, 1 * SZREG)
+	cfi_rel_offset (6, 2 * SZREG)
+	cfi_rel_offset (7, 3 * SZREG)
+	cfi_rel_offset (8, 4 * SZREG)
+	cfi_rel_offset (9, 5 * SZREG)
+	cfi_rel_offset (10, 6 * SZREG)
+	cfi_rel_offset (11, 7 * SZREG)
+	cfi_rel_offset (15, 8 * SZREG)
+	cfi_rel_offset (16, 9 * SZREG)
+	cfi_rel_offset (17, 10 * SZREG)
+	cfi_rel_offset (18, 11 * SZREG)
+	cfi_rel_offset (19, 12 * SZREG)
+	cfi_rel_offset (20, 13 * SZREG)
 
 #ifndef __loongarch_soft_float
 
 	/* Save fcsr0 register.
 	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
 	   of some fields in fcsr0.  */
-	ADDI	sp, sp, -SZFCSREG
+	ADDI	sp, sp, -SZREG
+	cfi_adjust_cfa_offset (SZREG)
 	movfcsr2gr  t0, fcsr0
-	st.w	t0, sp, 0
+	st.d	t0, sp, 0
 
 	/* Whether support LASX.  */
 	la.global   t0, _rtld_global_ro
@@ -184,6 +207,7 @@  Hign address	dynamic_block1 <----- dtv5  */
 	/* Save 256-bit vector registers.
 	   FIXME: Without vector ABI, save all vector registers.  */
 	ADDI	sp, sp, -FRAME_SIZE_LASX
+	cfi_adjust_cfa_offset (FRAME_SIZE_LASX)
 	xvst	xr0, sp, 0*SZXREG
 	xvst	xr1, sp, 1*SZXREG
 	xvst	xr2, sp, 2*SZXREG
@@ -225,6 +249,8 @@  Hign address	dynamic_block1 <----- dtv5  */
 
 	/* Save 128-bit vector registers.  */
 	ADDI	sp, sp, -FRAME_SIZE_LSX
+	# FIXME: cfi
+	# cfi_adjust_cfa_offset (FRAME_SIZE_LSX)
 	vst	vr0, sp, 0*SZVREG
 	vst	vr1, sp, 1*SZVREG
 	vst	vr2, sp, 2*SZVREG
@@ -262,6 +288,8 @@  Hign address	dynamic_block1 <----- dtv5  */
 .Lfloat:
 	/* Save float registers.  */
 	ADDI	sp, sp, -FRAME_SIZE_FLOAT
+	# FIXME: cfi
+	# cfi_adjust_cfa_offset (FRAME_SIZE_FLOAT)
 	FREG_S	fa0, sp, 0*SZFREG
 	FREG_S	fa1, sp, 1*SZFREG
 	FREG_S	fa2, sp, 2*SZFREG
@@ -334,6 +362,7 @@  Hign address	dynamic_block1 <----- dtv5  */
 	xvld	xr30, sp, 30*SZXREG
 	xvld	xr31, sp, 31*SZXREG
 	ADDI	sp, sp, FRAME_SIZE_LASX
+	cfi_adjust_cfa_offset (-FRAME_SIZE_LASX)
 	b .Lfcsr
 
 .Llsx1:
@@ -374,6 +403,8 @@  Hign address	dynamic_block1 <----- dtv5  */
 	vld	vr30, sp, 30*SZVREG
 	vld	vr31, sp, 31*SZVREG
 	ADDI	sp, sp, FRAME_SIZE_LSX
+	# FIXME: cfi
+	# cfi_adjust_cfa_offset (-FRAME_SIZE_LSX)
 	b	    .Lfcsr
 
 .Lfloat1:
@@ -403,12 +434,15 @@  Hign address	dynamic_block1 <----- dtv5  */
 	FREG_L	ft14, sp, 22*SZFREG
 	FREG_L	ft15, sp, 23*SZFREG
 	ADDI	sp, sp, FRAME_SIZE_FLOAT
+	# FIXME: cfi
+	# cfi_adjust_cfa_offset (-FRAME_SIZE_FLOAT)
 
 .Lfcsr:
 	/* Restore fcsr0 register.  */
-	ld.w	t0, sp, 0
+	ld.d	t0, sp, 0
 	movgr2fcsr  fcsr0, t0
-	ADDI	sp, sp, SZFCSREG
+	ADDI	sp, sp, SZREG
+	cfi_adjust_cfa_offset (-SZREG)
 
 #endif /* #ifndef __loongarch_soft_float */
 
@@ -427,6 +461,8 @@  Hign address	dynamic_block1 <----- dtv5  */
 	REG_L	t7, sp, 12 * SZREG
 	REG_L	t8, sp, 13 * SZREG
 	ADDI	sp, sp, FRAME_SIZE
+	cfi_adjust_cfa_offset (-FRAME_SIZE)
+	cfi_restore (1)
 
 	b	.Lret
 	cfi_endproc