diff mbox series

testsuite: arm: Check that a far jump is used in thumb1-far-jump-2.c

Message ID 20241108181753.2662923-1-torbjorn.svensson@foss.st.com
State New
Headers show
Series testsuite: arm: Check that a far jump is used in thumb1-far-jump-2.c | expand

Commit Message

Torbjorn SVENSSON Nov. 8, 2024, 6:17 p.m. UTC
Ok for trunk?

--

With the changes in r15-1579-g792f97b44ff, the code used as "padding" in
the test case is optimized way. Prevent this optimization by forcing a
read of the volatile memory.
Also, validate that there is a far jump in the generated assembler.

Without this patch, the generated assembler is reduced to:
f3:
        cmp     r0, #0
        beq     .L1
        ldr     r4, .L6
.L1:
        bx      lr
.L7:
        .align  2
.L6:
        .word   g_0_1

With the patch, the generated assembler is:
f3:
        push    {lr}
        cmp     r0, #0
        bne     .LCB7
        bl      .L1     @far jump
.LCB7:
        ldr     r3, .L6
        ldr     r3, [r3]
...
        ldr     r3, .L9+976
        ldr     r4, [r3]
        b       .L10
.L11:
        .align  2
.L9:
        .word   g_0_3_7_5
...
        .word   g_0_1
.L10:
.L1:
        pop     {pc}

gcc/testsuite/ChangeLog:

	* gcc.target/arm/thumb1-far-jump-2.c: Force a read of volatile
	memory in macro to avoid optimization.

Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
---
 gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Comments

Christophe Lyon Nov. 8, 2024, 6:29 p.m. UTC | #1
On Fri, 8 Nov 2024 at 19:20, Torbjörn SVENSSON
<torbjorn.svensson@foss.st.com> wrote:
>
> Ok for trunk?
>
> --
>
> With the changes in r15-1579-g792f97b44ff, the code used as "padding" in
> the test case is optimized way. Prevent this optimization by forcing a
> read of the volatile memory.
> Also, validate that there is a far jump in the generated assembler.
>
> Without this patch, the generated assembler is reduced to:
> f3:
>         cmp     r0, #0
>         beq     .L1
>         ldr     r4, .L6
> .L1:
>         bx      lr
> .L7:
>         .align  2
> .L6:
>         .word   g_0_1
>
> With the patch, the generated assembler is:
> f3:
>         push    {lr}
>         cmp     r0, #0
>         bne     .LCB7
>         bl      .L1     @far jump
> .LCB7:
>         ldr     r3, .L6
>         ldr     r3, [r3]
> ...
>         ldr     r3, .L9+976
>         ldr     r4, [r3]
>         b       .L10
> .L11:
>         .align  2
> .L9:
>         .word   g_0_3_7_5
> ...
>         .word   g_0_1
> .L10:
> .L1:
>         pop     {pc}
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/arm/thumb1-far-jump-2.c: Force a read of volatile
>         memory in macro to avoid optimization.
>
> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
> ---
>  gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
> index 78fcafaaf7d..1cf7a0a86e8 100644
> --- a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
> +++ b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
> @@ -10,7 +10,7 @@ void f3(int i)
>  {
>  #define GO(n) \
>    extern volatile int g_##n; \
> -  r4=(int)&g_##n;
> +  r4=(int)g_##n;
>

It really seems to me that this was a typo in the original submission:
this volatile was probably here to prevent optimization and make sure
we have a large function. But taking the address of a volatile
variable does not have the intended effect, you need to actually
access the variable.

LGTM, but wait for Richard's approval.

Thanks,

Christophe

>  #define GO8(n) \
>    GO(n##_0) \
> @@ -54,4 +54,5 @@ void f3(int i)
>    }
>  }
>
> -/* { dg-final { scan-assembler "push.*lr" } } */
> +/* { dg-final { scan-assembler "\tpush.*lr" } } */
> +/* { dg-final { scan-assembler "\tbl\t\\.L\[0-9\]+\t@far jump" } } */
> --
> 2.25.1
>
Richard Earnshaw (lists) Nov. 20, 2024, 4:56 p.m. UTC | #2
On 08/11/2024 18:29, Christophe Lyon wrote:
> On Fri, 8 Nov 2024 at 19:20, Torbjörn SVENSSON
> <torbjorn.svensson@foss.st.com> wrote:
>>
>> Ok for trunk?
>>
>> --
>>
>> With the changes in r15-1579-g792f97b44ff, the code used as "padding" in
>> the test case is optimized way. Prevent this optimization by forcing a
>> read of the volatile memory.
>> Also, validate that there is a far jump in the generated assembler.
>>
>> Without this patch, the generated assembler is reduced to:
>> f3:
>>         cmp     r0, #0
>>         beq     .L1
>>         ldr     r4, .L6
>> .L1:
>>         bx      lr
>> .L7:
>>         .align  2
>> .L6:
>>         .word   g_0_1
>>
>> With the patch, the generated assembler is:
>> f3:
>>         push    {lr}
>>         cmp     r0, #0
>>         bne     .LCB7
>>         bl      .L1     @far jump
>> .LCB7:
>>         ldr     r3, .L6
>>         ldr     r3, [r3]
>> ...
>>         ldr     r3, .L9+976
>>         ldr     r4, [r3]
>>         b       .L10
>> .L11:
>>         .align  2
>> .L9:
>>         .word   g_0_3_7_5
>> ...
>>         .word   g_0_1
>> .L10:
>> .L1:
>>         pop     {pc}
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.target/arm/thumb1-far-jump-2.c: Force a read of volatile
>>         memory in macro to avoid optimization.
>>
>> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
>> ---
>>  gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
>> index 78fcafaaf7d..1cf7a0a86e8 100644
>> --- a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
>> +++ b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
>> @@ -10,7 +10,7 @@ void f3(int i)
>>  {
>>  #define GO(n) \
>>    extern volatile int g_##n; \
>> -  r4=(int)&g_##n;
>> +  r4=(int)g_##n;
>>
> 
> It really seems to me that this was a typo in the original submission:
> this volatile was probably here to prevent optimization and make sure
> we have a large function. But taking the address of a volatile
> variable does not have the intended effect, you need to actually
> access the variable.
> 
> LGTM, but wait for Richard's approval.

I'm not sure it's really enough with this test to just fiddle it a bit until it 'passes': it's supposed to be testing some real or heuristic limit in the compiler.

But the way this test is written, and the comment in the code itself makes it quite hard to divine now exactly what was intended.  I can think of three possibilities:

1) The size of the function is just large enough to force a push of LR (by the conservative heuristic).
2) The distance of the conditional branch is just large enough that we don't need a BL instruction to implement the branch sequence
3) The distance of the conditional branch is just large enough that we do need a BL instruction to implement the branch sequence.

Obviously, only the last case is where we must push LR in the prologue to avoid wrong code.  I'm not entirely sure what Joey had in mind when he wrote the original test.

The heuristic is based on the size of the function, but that makes any test that pushes the first two extremes quite fragile as it depends on the number of additional instructions we insert, and the precise way we count the size of each instruction.

But there's an additional problem with the way the test is written: the use of many different volatile variables means we end up with a large number of constant pool entries that get dumped into minipools within the code itself, including some within the loop body.

I'm inclined to rewrite this test entirely, perhaps using:

volatile int r4;

#define GO() \
  r4 = 1;

as the basis for the repeated operation.  If we then write:

void f3(int i)
{
  GO ();
  if (i)
    GO498();
}

we end up with a code sequence that looks something like:
        push    {lr}
        ldr     r3, .L6
        movs    r2, #1
        str     r2, [r3]
        cmp     r0, #0
        bne     .LCB15
        b       .L1   @long jump  // Or BL if the range is too far for B.
.LCB15:
        str     r2, [r3]
        str     r2, [r3]
        str     r2, [r3]
	...
        str     r2, [r3]
.L1:
        pop     {pc}

And we have very tight control over the size of the loop, and reasonably tight control over the size of the function as a whole: each instruction in the body of the loop will account for exactly 2 bytes.

My inclination is to pick option 3 for this test, and to enforce that by scanning for a BL in the output in addition to the push lr.  The key point about correct code is that if we need a BL then we also need to push LR.

R.
> 
> Thanks,
> 
> Christophe
> 
>>  #define GO8(n) \
>>    GO(n##_0) \
>> @@ -54,4 +54,5 @@ void f3(int i)
>>    }
>>  }
>>
>> -/* { dg-final { scan-assembler "push.*lr" } } */
>> +/* { dg-final { scan-assembler "\tpush.*lr" } } */
>> +/* { dg-final { scan-assembler "\tbl\t\\.L\[0-9\]+\t@far jump" } } */
>> --
>> 2.25.1
>>
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
index 78fcafaaf7d..1cf7a0a86e8 100644
--- a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
+++ b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
@@ -10,7 +10,7 @@  void f3(int i)
 {
 #define GO(n) \
   extern volatile int g_##n; \
-  r4=(int)&g_##n;
+  r4=(int)g_##n;
 
 #define GO8(n) \
   GO(n##_0) \
@@ -54,4 +54,5 @@  void f3(int i)
   }
 }
 
-/* { dg-final { scan-assembler "push.*lr" } } */
+/* { dg-final { scan-assembler "\tpush.*lr" } } */
+/* { dg-final { scan-assembler "\tbl\t\\.L\[0-9\]+\t@far jump" } } */