Message ID | 20210413132945.rhhjcj6fcaeqxe5g@arm.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Avoid duplicating bti j insns for jump tables [PR99988] | expand |
Looks good in general, but like you say, it's GCC 12 material. Alex Coplan <alex.coplan@arm.com> writes: > diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c > index 936649769c7..943fa3c1097 100644 > --- a/gcc/config/aarch64/aarch64-bti-insert.c > +++ b/gcc/config/aarch64/aarch64-bti-insert.c > @@ -120,6 +120,13 @@ aarch64_pac_insn_p (rtx x) > return false; > } > > +static bool > +aarch64_bti_j_insn_p (rtx_insn *insn) > +{ > + rtx pat = PATTERN (insn); > + return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J; > +} > + Nit, but even a simple function like this should have a comment. :-) > /* Insert the BTI instruction. */ > /* This is implemented as a late RTL pass that runs before branch > shortening and does the following. */ > @@ -165,6 +172,9 @@ rest_of_insert_bti (void) > for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j) > { > label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0)); > + if (aarch64_bti_j_insn_p (next_nonnote_insn (label))) > + continue; > + This should be next_nonnote_nondebug_insn (quite the mouthful), otherwise debug instructions could affect the choice. The thing returned by next_nonnote_nondebug_insn isn't in general guaranteed to be an insn (unlike next_real_nondebug_insn). It might also be null in very odd cases. I think we should therefore check for null and INSN_P before checking PATTERN. Thanks, Richard > bti_insn = gen_bti_j (); > emit_insn_after (bti_insn, label); > } > diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c > new file mode 100644 > index 00000000000..2d87f41a717 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c > @@ -0,0 +1,66 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mbranch-protection=standard" } */ > +/* { dg-final { scan-assembler-times {bti j} 13 } } */ > +int a; > +int c(); > +int d(); > +int e(); > +int f(); > +int g(); > +void h() { > + switch (a) { > + case 0: > + case 56: > + case 57: > + break; > + case 58: > + case 59: > + case 61: > + case 62: > + c(); > + case 64: > + case 63: > + d(); > + case 66: > + case 65: > + d(); > + case 68: > + case 67: > + d(); > + case 69: > + case 70: > + d(); > + case 71: > + case 72: > + case 88: > + case 87: > + d(); > + case 90: > + case 89: > + d(); > + case 92: > + case 1: > + d(); > + case 93: > + case 73: > + case 4: > + e(); > + case 76: > + case 5: > + f(); > + case 7: > + case 8: > + case 84: > + case 85: > + break; > + case 6: > + case 299: > + case 9: > + case 80: > + case 2: > + case 3: > + e(); > + default: > + g(); > + } > +}
Hi Richard, On 15/04/2021 18:45, Richard Sandiford wrote: > Looks good in general, but like you say, it's GCC 12 material. Thanks for the review. The attached patch addresses these comments and bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk? Thanks, Alex > > Alex Coplan <alex.coplan@arm.com> writes: > > diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c > > index 936649769c7..943fa3c1097 100644 > > --- a/gcc/config/aarch64/aarch64-bti-insert.c > > +++ b/gcc/config/aarch64/aarch64-bti-insert.c > > @@ -120,6 +120,13 @@ aarch64_pac_insn_p (rtx x) > > return false; > > } > > > > +static bool > > +aarch64_bti_j_insn_p (rtx_insn *insn) > > +{ > > + rtx pat = PATTERN (insn); > > + return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J; > > +} > > + > > Nit, but even a simple function like this should have a comment. :-) > > > /* Insert the BTI instruction. */ > > /* This is implemented as a late RTL pass that runs before branch > > shortening and does the following. */ > > @@ -165,6 +172,9 @@ rest_of_insert_bti (void) > > for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j) > > { > > label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0)); > > + if (aarch64_bti_j_insn_p (next_nonnote_insn (label))) > > + continue; > > + > > This should be next_nonnote_nondebug_insn (quite the mouthful), > otherwise debug instructions could affect the choice. > > The thing returned by next_nonnote_nondebug_insn isn't in general > guaranteed to be an insn (unlike next_real_nondebug_insn). It might > also be null in very odd cases. I think we should therefore check > for null and INSN_P before checking PATTERN. > > Thanks, > Richard > > > bti_insn = gen_bti_j (); > > emit_insn_after (bti_insn, label); > > } diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c index 936649769c7..5d6bc169d6b 100644 --- a/gcc/config/aarch64/aarch64-bti-insert.c +++ b/gcc/config/aarch64/aarch64-bti-insert.c @@ -120,6 +120,17 @@ aarch64_pac_insn_p (rtx x) return false; } +/* Check if INSN is a BTI J insn. */ +static bool +aarch64_bti_j_insn_p (rtx_insn *insn) +{ + if (!insn || !INSN_P (insn)) + return false; + + rtx pat = PATTERN (insn); + return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J; +} + /* Insert the BTI instruction. */ /* This is implemented as a late RTL pass that runs before branch shortening and does the following. */ @@ -165,6 +176,10 @@ rest_of_insert_bti (void) for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j) { label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0)); + rtx_insn *next = next_nonnote_nondebug_insn (label); + if (aarch64_bti_j_insn_p (next)) + continue; + bti_insn = gen_bti_j (); emit_insn_after (bti_insn, label); } diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c new file mode 100644 index 00000000000..2d87f41a717 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbranch-protection=standard" } */ +/* { dg-final { scan-assembler-times {bti j} 13 } } */ +int a; +int c(); +int d(); +int e(); +int f(); +int g(); +void h() { + switch (a) { + case 0: + case 56: + case 57: + break; + case 58: + case 59: + case 61: + case 62: + c(); + case 64: + case 63: + d(); + case 66: + case 65: + d(); + case 68: + case 67: + d(); + case 69: + case 70: + d(); + case 71: + case 72: + case 88: + case 87: + d(); + case 90: + case 89: + d(); + case 92: + case 1: + d(); + case 93: + case 73: + case 4: + e(); + case 76: + case 5: + f(); + case 7: + case 8: + case 84: + case 85: + break; + case 6: + case 299: + case 9: + case 80: + case 2: + case 3: + e(); + default: + g(); + } +}
Alex Coplan <alex.coplan@arm.com> writes: > Hi Richard, > > On 15/04/2021 18:45, Richard Sandiford wrote: >> Looks good in general, but like you say, it's GCC 12 material. > > Thanks for the review. The attached patch addresses these comments and > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk? OK, thanks. Richard
On Wed, 21 Apr 2021 at 14:05, Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Alex Coplan <alex.coplan@arm.com> writes: > > Hi Richard, > > > > On 15/04/2021 18:45, Richard Sandiford wrote: > >> Looks good in general, but like you say, it's GCC 12 material. > > > > Thanks for the review. The attached patch addresses these comments and > > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk? > > OK, thanks. > The new test fails with -mabi=ilp32: sorry, unimplemented: return address signing is only supported for '-mabi=lp64' Is that OK: diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c index 2d87f41..7cca496 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr99988.c +++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target lp64 } } */ /* { dg-options "-O2 -mbranch-protection=standard" } */ /* { dg-final { scan-assembler-times {bti j} 13 } } */ int a; Thanks, Christophe
Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > On Wed, 21 Apr 2021 at 14:05, Richard Sandiford via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: >> >> Alex Coplan <alex.coplan@arm.com> writes: >> > Hi Richard, >> > >> > On 15/04/2021 18:45, Richard Sandiford wrote: >> >> Looks good in general, but like you say, it's GCC 12 material. >> > >> > Thanks for the review. The attached patch addresses these comments and >> > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk? >> >> OK, thanks. >> > > The new test fails with -mabi=ilp32: > sorry, unimplemented: return address signing is only supported for '-mabi=lp64' > > Is that OK: > diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c > b/gcc/testsuite/gcc.target/aarch64/pr99988.c > index 2d87f41..7cca496 100644 > --- a/gcc/testsuite/gcc.target/aarch64/pr99988.c > +++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c > @@ -1,4 +1,4 @@ > -/* { dg-do compile } */ > +/* { dg-do compile { target lp64 } } */ > /* { dg-options "-O2 -mbranch-protection=standard" } */ > /* { dg-final { scan-assembler-times {bti j} 13 } } */ > int a; OK, thanks. Richard
Hi Richard, On 21/04/2021 13:05, Richard Sandiford wrote: > Alex Coplan <alex.coplan@arm.com> writes: > > Hi Richard, > > > > On 15/04/2021 18:45, Richard Sandiford wrote: > >> Looks good in general, but like you say, it's GCC 12 material. > > > > Thanks for the review. The attached patch addresses these comments and > > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk? > > OK, thanks. The patch applies cleanly and bootstraps/regtests OK on the GCC 11 branch. OK to backport there and to 10 and 9 if the same is true for those branches? Thanks, Alex
Alex Coplan <alex.coplan@arm.com> writes: > Hi Richard, > > On 21/04/2021 13:05, Richard Sandiford wrote: >> Alex Coplan <alex.coplan@arm.com> writes: >> > Hi Richard, >> > >> > On 15/04/2021 18:45, Richard Sandiford wrote: >> >> Looks good in general, but like you say, it's GCC 12 material. >> > >> > Thanks for the review. The attached patch addresses these comments and >> > bootstraps/regtests OK on aarch64-linux-gnu. OK for trunk? >> >> OK, thanks. > > The patch applies cleanly and bootstraps/regtests OK on the GCC 11 > branch. OK to backport there and to 10 and 9 if the same is true for > those branches? To summarise what we discussed off-list: this initially looked like it was “just” a new DCE optimisation, which is why it seemed like GCC 12 material. However, as Alex pointed out, the unpatched BTI support can generate code whose size is quadratic in the number of switch cases, which is a serious regression whichever way you cut it. So this is OK for backports, and would have been OK during GCC 11 stage 4 too. Thanks, Richard
diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c index 936649769c7..943fa3c1097 100644 --- a/gcc/config/aarch64/aarch64-bti-insert.c +++ b/gcc/config/aarch64/aarch64-bti-insert.c @@ -120,6 +120,13 @@ aarch64_pac_insn_p (rtx x) return false; } +static bool +aarch64_bti_j_insn_p (rtx_insn *insn) +{ + rtx pat = PATTERN (insn); + return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J; +} + /* Insert the BTI instruction. */ /* This is implemented as a late RTL pass that runs before branch shortening and does the following. */ @@ -165,6 +172,9 @@ rest_of_insert_bti (void) for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j) { label = as_a <rtx_insn *> (XEXP (RTVEC_ELT (vec, j), 0)); + if (aarch64_bti_j_insn_p (next_nonnote_insn (label))) + continue; + bti_insn = gen_bti_j (); emit_insn_after (bti_insn, label); } diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c new file mode 100644 index 00000000000..2d87f41a717 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbranch-protection=standard" } */ +/* { dg-final { scan-assembler-times {bti j} 13 } } */ +int a; +int c(); +int d(); +int e(); +int f(); +int g(); +void h() { + switch (a) { + case 0: + case 56: + case 57: + break; + case 58: + case 59: + case 61: + case 62: + c(); + case 64: + case 63: + d(); + case 66: + case 65: + d(); + case 68: + case 67: + d(); + case 69: + case 70: + d(); + case 71: + case 72: + case 88: + case 87: + d(); + case 90: + case 89: + d(); + case 92: + case 1: + d(); + case 93: + case 73: + case 4: + e(); + case 76: + case 5: + f(); + case 7: + case 8: + case 84: + case 85: + break; + case 6: + case 299: + case 9: + case 80: + case 2: + case 3: + e(); + default: + g(); + } +}