testsuite: arm: Relax register selection [PR116623]

Message ID	20241019170351.1887608-1-torbjorn.svensson@foss.st.com
State	New
Headers	show Return-Path: <gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 3E1663858406 From: =?utf-8?q?Torbj=C3=B6rn_SVENSSON?= <torbjorn.svensson@foss.st.com> To: <gcc-patches@gcc.gnu.org> CC: <richard.earnshaw@arm.com>, <christophe.lyon@linaro.org>, <thiago.bauermann@linaro.org>, <jskumari@linux.ibm.com>, =?utf-8?q?Torbj?= =?utf-8?q?=C3=B6rn_SVENSSON?= <torbjorn.svensson@foss.st.com> Subject: [PATCH] testsuite: arm: Relax register selection [PR116623] Date: Sat, 19 Oct 2024 19:03:52 +0200 Message-ID: <20241019170351.1887608-1-torbjorn.svensson@foss.st.com> MIME-Version: 1.0 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 8bit Precedence: list Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	testsuite: arm: Relax register selection [PR116623] \| expand testsuite: arm: Relax register selection [PR116623]

Message ID

20241019170351.1887608-1-torbjorn.svensson@foss.st.com

State

New

Headers

DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 3E1663858406
From: =?utf-8?q?Torbj=C3=B6rn_SVENSSON?= <torbjorn.svensson@foss.st.com>
To: <gcc-patches@gcc.gnu.org>
CC: <richard.earnshaw@arm.com>, <christophe.lyon@linaro.org>,
 <thiago.bauermann@linaro.org>, <jskumari@linux.ibm.com>, =?utf-8?q?Torbj?=
	=?utf-8?q?=C3=B6rn_SVENSSON?= <torbjorn.svensson@foss.st.com>
Subject: [PATCH] testsuite: arm: Relax register selection [PR116623]
Date: Sat, 19 Oct 2024 19:03:52 +0200
Message-ID: <20241019170351.1887608-1-torbjorn.svensson@foss.st.com>
MIME-Version: 1.0
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: 8bit
Precedence: list
Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org

Series

testsuite: arm: Relax register selection [PR116623] | expand

Commit Message

Torbjorn SVENSSON Oct. 19, 2024, 5:03 p.m. UTC

With r15-1618-g9f168b412f4, I get the following asm generated for the test case:

        .align  1
        .align  2
        .global test5
        .syntax unified
        .thumb
        .thumb_func
        .type   test5, %function
test5:
        @ args = 4, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        push    {r4, r5, r6, lr}
        ldr     r4, [sp, #16]
        cmp     r4, #0
        ble     .L37
        sub     ip, r4, #16
        adds    r6, r2, r4
        adds    r5, r1, r4
        add     r0, r0, r4
        dlstp.8 lr, r4
.L39:
        subs    r2, r5, r4
        subs    r1, r0, r4
        vldrb.8 q3, [r1]
        vldrb.8 q2, [r2]
        subs    r2, r6, r4
        mov     r4, ip
        sub     ip, ip, #16
        vadd.i8 q3, q3, q2
        vstrb.8 q3, [r2]
        vstrb.8 q3, [r3]
        letp    lr, .L39
.L37:
        pop     {r4, r5, r6, pc}
        .size   test5, .-test5

...

        .align  1
        .align  2
        .global test8
        .syntax unified
        .thumb
        .thumb_func
        .type   test8, %function
test8:
        @ args = 4, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        push    {r4, lr}
        ldr     r4, [sp, #8]
        cmp     r3, #0
        ble     .L59
        dlstp.32        lr, r3
.L61:
        vldrw.32        q3, [r0], #16
        vctp.32 r4
        vpst
        vldrwt.32       q2, [r1], #16
        adds    r4, r4, #1
        vadd.i32        q3, q3, q2
        vstrw.32        q3, [r2], #16
        letp    lr, .L61
.L59:
        pop     {r4, pc}
        .size   test8, .-test8




With r15-1619-g3b9b8d6cfdf, I instead get:

        .align  1
        .align  2
        .global test5
        .syntax unified
        .thumb
        .thumb_func
        .type   test5, %function
test5:
        @ args = 4, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        push    {r4, r5, r6, lr}
        ldr     ip, [sp, #16]
        cmp     ip, #0
        ble     .L37
        mov     r6, r3
        sub     r3, ip, #16
        add     r5, r2, ip
        add     r4, r1, ip
        add     r0, r0, ip
        dlstp.8 lr, ip
.L39:
        sub     r2, r4, ip
        sub     r1, r0, ip
        vldrb.8 q3, [r1]
        vldrb.8 q2, [r2]
        sub     r2, r5, ip
        mov     ip, r3
        subs    r3, r3, #16
        vadd.i8 q3, q3, q2
        vstrb.8 q3, [r2]
        vstrb.8 q3, [r6]
        letp    lr, .L39
.L37:
        pop     {r4, r5, r6, pc}
        .size   test5, .-test5

...

        .align  1
        .align  2
        .global test8
        .syntax unified
        .thumb
        .thumb_func
        .type   test8, %function
test8:
        @ args = 4, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        push    {lr}
        ldr     ip, [sp, #4]
        cmp     r3, #0
        ble     .L59
        dlstp.32        lr, r3
.L61:
        vldrw.32        q3, [r0], #16
        vctp.32 ip
        vpst
        vldrwt.32       q2, [r1], #16
        add     ip, ip, #1
        vadd.i32        q3, q3, q2
        vstrw.32        q3, [r2], #16
        letp    lr, .L61
.L59:
        ldr     pc, [sp], #4
        .size   test8, .-test8



As can be seen, with r15-1619-g3b9b8d6cfdf, it now uses ip in ways that it did
not before. I think this part is fine.
It also, for some reason, decides to move r3 into r6 in test5 and then use
that later for the vstrb.8. While I suppose it does work, it will consume one
extra mov, so it's slightly bigger.

With below patch, I no longer see any failure reported for arm-none-eabi.

Even with the slight size increase for test5, is it ok for trunk?

--

Since r15-1619-g3b9b8d6cfdf, test5 and test8 fails due to that "ip"
might be used and r3 might be moved to another register for later
dereference.

gcc/testsuite/ChangeLog:

	PR testsuite/116623
	* gcc.target/arm/mve/dlstp-compile-asm-2.c: Align test5 and
	test8 with changes in r15-1619-g3b9b8d6cfdf.

Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
---
 gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c
index 84f4a2fc4f9..c62f592a60d 100644
--- a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c
+++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c
@@ -147,15 +147,17 @@  void test5 (uint8_t *a, uint8_t *b, uint8_t *c,  uint8_t *d, int n)
 /*
 ** test5:
 **...
-**	dlstp.8	lr, r[0-9]+
+**	(?:mov	(r[0-9]+), r3)?
+**...
+**	dlstp.8	lr, (?:r[0-9]+|ip)
 **...
 **	vldrb.8	q[0-9]+, \[r1\]
 **	vldrb.8	q[0-9]+, \[r2\]
 **...
 **	vadd.i8	(q[0-9]+), q[0-9]+, q[0-9]+
 **...
-**	vstrb.8	\1, \[r2\]
-**	vstrb.8	\1, \[r3\]
+**	vstrb.8	\2, \[r2\]
+**	vstrb.8	\2, \[(r3|\1)\]
 **	letp	lr, .*
 **...
 */
@@ -247,7 +249,7 @@  void test8 (int32_t *a, int32_t *b, int32_t *c, int n, int g)
 **...
 **	dlstp.32	lr, r3
 **	vldrw.32	q[0-9]+, \[r0\], #16
-**	vctp.32	r4
+**	vctp.32	(?:r4|ip)
 **	vpst
 **	vldrwt.32	q[0-9]+, \[r1\], #16
 **...

testsuite: arm: Relax register selection [PR116623]

Commit Message

Patch