diff mbox series

[APX,PPX] Avoid generating unmatched pushp/popp in pro/epilogue

Message ID 20240702032336.2766166-1-hongyu.wang@intel.com
State New
Headers show
Series [APX,PPX] Avoid generating unmatched pushp/popp in pro/epilogue | expand

Commit Message

Hongyu Wang July 2, 2024, 3:23 a.m. UTC
Hi,

According to APX spec, the pushp/popp pairs should be matched,
otherwise the PPX hint cannot take effect and cause performance loss.

In the ix86_expand_epilogue, there are several optimizations that may
cause the epilogue using mov to restore the regs. Check if PPX applied
and prevent usage of mov/leave in the epilogue.

Bootstrapped/regtested on x86_64-pc-linux-gnu.

Ok for trunk?

gcc/ChangeLog:

	* config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
	flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
	(ix86_emit_save_regs): Emit ppx is available only when
	TARGET_APX_PPX && !crtl->calls_eh_return.
	(ix86_expand_epilogue): Don't restore reg using mov when
	apx_ppx_used flag is true.
	* config/i386/i386.h (struct machine_frame_state):
	Add apx_ppx_used flag.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-ppx-2.c: New test.
	* gcc.target/i386/apx-ppx-3.c: Likewise.
---
 gcc/config/i386/i386.cc                   | 13 +++++++++----
 gcc/config/i386/i386.h                    |  4 ++++
 gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
 4 files changed, 34 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c

Comments

Richard Biener July 2, 2024, 8:17 a.m. UTC | #1
On Tue, Jul 2, 2024 at 5:24 AM Hongyu Wang <hongyu.wang@intel.com> wrote:
>
> Hi,
>
> According to APX spec, the pushp/popp pairs should be matched,
> otherwise the PPX hint cannot take effect and cause performance loss.
>
> In the ix86_expand_epilogue, there are several optimizations that may
> cause the epilogue using mov to restore the regs. Check if PPX applied
> and prevent usage of mov/leave in the epilogue.

Isn't it true in general that mismatched push/pop confuses a CPUs stack
engine?  Does the distance of the push/pop make any difference here
or in general?

> Bootstrapped/regtested on x86_64-pc-linux-gnu.
>
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
>         flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
>         (ix86_emit_save_regs): Emit ppx is available only when
>         TARGET_APX_PPX && !crtl->calls_eh_return.
>         (ix86_expand_epilogue): Don't restore reg using mov when
>         apx_ppx_used flag is true.
>         * config/i386/i386.h (struct machine_frame_state):
>         Add apx_ppx_used flag.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ppx-2.c: New test.
>         * gcc.target/i386/apx-ppx-3.c: Likewise.
> ---
>  gcc/config/i386/i386.cc                   | 13 +++++++++----
>  gcc/config/i386/i386.h                    |  4 ++++
>  gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
>  4 files changed, 34 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index bd7411190af..99def8d4a77 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
>  {
>    int regno;
>    rtx_insn *insn;
> +  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
>
>    if (!TARGET_APX_PUSH2POP2
>        || !ix86_can_use_push2pop2 ()
> @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
>         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
>           {
>             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> -                                       TARGET_APX_PPX));
> +                                       use_ppx));
>             RTX_FRAME_RELATED_P (insn) = 1;
>           }
>      }
> @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
>                                                               regno_list[0]),
>                                                  gen_rtx_REG (word_mode,
>                                                               regno_list[1]),
> -                                                TARGET_APX_PPX));
> +                                                use_ppx));
>                     RTX_FRAME_RELATED_P (insn) = 1;
>                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
>
> @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
>             else
>               {
>                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> -                                           TARGET_APX_PPX));
> +                                           use_ppx));
>                 RTX_FRAME_RELATED_P (insn) = 1;
>                 aligned = true;
>               }
> @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
>         {
>           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
>                                                    regno_list[0]),
> -                                     TARGET_APX_PPX));
> +                                     use_ppx));
>           RTX_FRAME_RELATED_P (insn) = 1;
>         }
>      }
> @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
>        if (!frame.save_regs_using_mov)
>         {
>           ix86_emit_save_regs ();
> +         m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
>           int_registers_saved = true;
>           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
>         }
> @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
>    /* SEH requires the use of pops to identify the epilogue.  */
>    else if (TARGET_SEH)
>      restore_regs_via_mov = false;
> +  /* If we already save reg with pushp, don't use move at epilogue.  */
> +  else if (m->fs.apx_ppx_used)
> +    restore_regs_via_mov = false;
>    /* If we're only restoring one register and sp cannot be used then
>       using a move instruction to restore the register since it's
>       less work than reloading sp and popping the register.  */
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 147b12cd014..0c5292e1d64 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
>       The flags realigned and sp_realigned are mutually exclusive.  */
>    BOOL_BITFIELD sp_realigned : 1;
>
> +  /* When APX_PPX used in prologue, force epilogue to emit
> +  popp instead of move and leave.  */
> +  BOOL_BITFIELD apx_ppx_used : 1;
> +
>    /* If sp_realigned is set, this is the last valid offset from the CFA
>       that can be used for access with the frame pointer.  */
>    HOST_WIDE_INT sp_realigned_fp_last;
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> new file mode 100644
> index 00000000000..42a95340b55
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> +
> +/* { dg-final { scan-assembler "pushp" } } */
> +/* { dg-final { scan-assembler "popp" } } */
> +/* { dg-final { scan-assembler-not "leave" } } */
> +
> +extern int bar (int a);
> +extern int *q;
> +
> +void foo (int *a)
> +{
> +  q[2] = bar (q[1]);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> new file mode 100644
> index 00000000000..76931fbe294
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mapx-features=ppx" } */
> +
> +/* { dg-final { scan-assembler-not "pushp" } } */
> +/* { dg-final { scan-assembler-not "popp" } } */
> +
> +#include "eh_return-2.c"
> --
> 2.31.1
>
Hongyu Wang July 2, 2024, 8:38 a.m. UTC | #2
> Isn't it true in general that mismatched push/pop confuses a CPUs stack
> engine?  Does the distance of the push/pop make any difference here
> or in general?

We are not sure about the impact for distance, but the PPX hint was
just a performance hint. As mentioned in the apx spec, the mismatched
pushp/popp pair does confused the fast-forwarding logic and turns off
the PPX optimization. We just need to make sure every pushp for a
certain reg has corresponding popp for that reg.


Richard Biener <richard.guenther@gmail.com> 于2024年7月2日周二 16:18写道:

>
> On Tue, Jul 2, 2024 at 5:24 AM Hongyu Wang <hongyu.wang@intel.com> wrote:
> >
> > Hi,
> >
> > According to APX spec, the pushp/popp pairs should be matched,
> > otherwise the PPX hint cannot take effect and cause performance loss.
> >
> > In the ix86_expand_epilogue, there are several optimizations that may
> > cause the epilogue using mov to restore the regs. Check if PPX applied
> > and prevent usage of mov/leave in the epilogue.
>
> Isn't it true in general that mismatched push/pop confuses a CPUs stack
> engine?  Does the distance of the push/pop make any difference here
> or in general?
>
> > Bootstrapped/regtested on x86_64-pc-linux-gnu.
> >
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >         * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
> >         flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
> >         (ix86_emit_save_regs): Emit ppx is available only when
> >         TARGET_APX_PPX && !crtl->calls_eh_return.
> >         (ix86_expand_epilogue): Don't restore reg using mov when
> >         apx_ppx_used flag is true.
> >         * config/i386/i386.h (struct machine_frame_state):
> >         Add apx_ppx_used flag.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/apx-ppx-2.c: New test.
> >         * gcc.target/i386/apx-ppx-3.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc                   | 13 +++++++++----
> >  gcc/config/i386/i386.h                    |  4 ++++
> >  gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
> >  gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
> >  4 files changed, 34 insertions(+), 4 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index bd7411190af..99def8d4a77 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
> >  {
> >    int regno;
> >    rtx_insn *insn;
> > +  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
> >
> >    if (!TARGET_APX_PUSH2POP2
> >        || !ix86_can_use_push2pop2 ()
> > @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
> >         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
> >           {
> >             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > -                                       TARGET_APX_PPX));
> > +                                       use_ppx));
> >             RTX_FRAME_RELATED_P (insn) = 1;
> >           }
> >      }
> > @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
> >                                                               regno_list[0]),
> >                                                  gen_rtx_REG (word_mode,
> >                                                               regno_list[1]),
> > -                                                TARGET_APX_PPX));
> > +                                                use_ppx));
> >                     RTX_FRAME_RELATED_P (insn) = 1;
> >                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
> >
> > @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
> >             else
> >               {
> >                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > -                                           TARGET_APX_PPX));
> > +                                           use_ppx));
> >                 RTX_FRAME_RELATED_P (insn) = 1;
> >                 aligned = true;
> >               }
> > @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
> >         {
> >           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
> >                                                    regno_list[0]),
> > -                                     TARGET_APX_PPX));
> > +                                     use_ppx));
> >           RTX_FRAME_RELATED_P (insn) = 1;
> >         }
> >      }
> > @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
> >        if (!frame.save_regs_using_mov)
> >         {
> >           ix86_emit_save_regs ();
> > +         m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
> >           int_registers_saved = true;
> >           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
> >         }
> > @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
> >    /* SEH requires the use of pops to identify the epilogue.  */
> >    else if (TARGET_SEH)
> >      restore_regs_via_mov = false;
> > +  /* If we already save reg with pushp, don't use move at epilogue.  */
> > +  else if (m->fs.apx_ppx_used)
> > +    restore_regs_via_mov = false;
> >    /* If we're only restoring one register and sp cannot be used then
> >       using a move instruction to restore the register since it's
> >       less work than reloading sp and popping the register.  */
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 147b12cd014..0c5292e1d64 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
> >       The flags realigned and sp_realigned are mutually exclusive.  */
> >    BOOL_BITFIELD sp_realigned : 1;
> >
> > +  /* When APX_PPX used in prologue, force epilogue to emit
> > +  popp instead of move and leave.  */
> > +  BOOL_BITFIELD apx_ppx_used : 1;
> > +
> >    /* If sp_realigned is set, this is the last valid offset from the CFA
> >       that can be used for access with the frame pointer.  */
> >    HOST_WIDE_INT sp_realigned_fp_last;
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > new file mode 100644
> > index 00000000000..42a95340b55
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> > +
> > +/* { dg-final { scan-assembler "pushp" } } */
> > +/* { dg-final { scan-assembler "popp" } } */
> > +/* { dg-final { scan-assembler-not "leave" } } */
> > +
> > +extern int bar (int a);
> > +extern int *q;
> > +
> > +void foo (int *a)
> > +{
> > +  q[2] = bar (q[1]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > new file mode 100644
> > index 00000000000..76931fbe294
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > @@ -0,0 +1,7 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mapx-features=ppx" } */
> > +
> > +/* { dg-final { scan-assembler-not "pushp" } } */
> > +/* { dg-final { scan-assembler-not "popp" } } */
> > +
> > +#include "eh_return-2.c"
> > --
> > 2.31.1
> >
Hongtao Liu July 4, 2024, 3 a.m. UTC | #3
On Tue, Jul 2, 2024 at 11:24 AM Hongyu Wang <hongyu.wang@intel.com> wrote:
>
> Hi,
>
> According to APX spec, the pushp/popp pairs should be matched,
> otherwise the PPX hint cannot take effect and cause performance loss.
>
> In the ix86_expand_epilogue, there are several optimizations that may
> cause the epilogue using mov to restore the regs. Check if PPX applied
> and prevent usage of mov/leave in the epilogue.
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu.
>
> Ok for trunk?
Ok.
>
> gcc/ChangeLog:
>
>         * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
>         flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
>         (ix86_emit_save_regs): Emit ppx is available only when
>         TARGET_APX_PPX && !crtl->calls_eh_return.
>         (ix86_expand_epilogue): Don't restore reg using mov when
>         apx_ppx_used flag is true.
>         * config/i386/i386.h (struct machine_frame_state):
>         Add apx_ppx_used flag.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ppx-2.c: New test.
>         * gcc.target/i386/apx-ppx-3.c: Likewise.
> ---
>  gcc/config/i386/i386.cc                   | 13 +++++++++----
>  gcc/config/i386/i386.h                    |  4 ++++
>  gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
>  gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
>  4 files changed, 34 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index bd7411190af..99def8d4a77 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
>  {
>    int regno;
>    rtx_insn *insn;
> +  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
>
>    if (!TARGET_APX_PUSH2POP2
>        || !ix86_can_use_push2pop2 ()
> @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
>         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
>           {
>             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> -                                       TARGET_APX_PPX));
> +                                       use_ppx));
>             RTX_FRAME_RELATED_P (insn) = 1;
>           }
>      }
> @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
>                                                               regno_list[0]),
>                                                  gen_rtx_REG (word_mode,
>                                                               regno_list[1]),
> -                                                TARGET_APX_PPX));
> +                                                use_ppx));
>                     RTX_FRAME_RELATED_P (insn) = 1;
>                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
>
> @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
>             else
>               {
>                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> -                                           TARGET_APX_PPX));
> +                                           use_ppx));
>                 RTX_FRAME_RELATED_P (insn) = 1;
>                 aligned = true;
>               }
> @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
>         {
>           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
>                                                    regno_list[0]),
> -                                     TARGET_APX_PPX));
> +                                     use_ppx));
>           RTX_FRAME_RELATED_P (insn) = 1;
>         }
>      }
> @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
>        if (!frame.save_regs_using_mov)
>         {
>           ix86_emit_save_regs ();
> +         m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
>           int_registers_saved = true;
>           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
>         }
> @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
>    /* SEH requires the use of pops to identify the epilogue.  */
>    else if (TARGET_SEH)
>      restore_regs_via_mov = false;
> +  /* If we already save reg with pushp, don't use move at epilogue.  */
> +  else if (m->fs.apx_ppx_used)
> +    restore_regs_via_mov = false;
>    /* If we're only restoring one register and sp cannot be used then
>       using a move instruction to restore the register since it's
>       less work than reloading sp and popping the register.  */
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 147b12cd014..0c5292e1d64 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
>       The flags realigned and sp_realigned are mutually exclusive.  */
>    BOOL_BITFIELD sp_realigned : 1;
>
> +  /* When APX_PPX used in prologue, force epilogue to emit
> +  popp instead of move and leave.  */
> +  BOOL_BITFIELD apx_ppx_used : 1;
> +
>    /* If sp_realigned is set, this is the last valid offset from the CFA
>       that can be used for access with the frame pointer.  */
>    HOST_WIDE_INT sp_realigned_fp_last;
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> new file mode 100644
> index 00000000000..42a95340b55
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> +
> +/* { dg-final { scan-assembler "pushp" } } */
> +/* { dg-final { scan-assembler "popp" } } */
> +/* { dg-final { scan-assembler-not "leave" } } */
> +
> +extern int bar (int a);
> +extern int *q;
> +
> +void foo (int *a)
> +{
> +  q[2] = bar (q[1]);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> new file mode 100644
> index 00000000000..76931fbe294
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mapx-features=ppx" } */
> +
> +/* { dg-final { scan-assembler-not "pushp" } } */
> +/* { dg-final { scan-assembler-not "popp" } } */
> +
> +#include "eh_return-2.c"
> --
> 2.31.1
>
Hongtao Liu Oct. 31, 2024, 1:05 a.m. UTC | #4
On Thu, Jul 4, 2024 at 11:00 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Jul 2, 2024 at 11:24 AM Hongyu Wang <hongyu.wang@intel.com> wrote:
> >
> > Hi,
> >
> > According to APX spec, the pushp/popp pairs should be matched,
> > otherwise the PPX hint cannot take effect and cause performance loss.
> >
> > In the ix86_expand_epilogue, there are several optimizations that may
> > cause the epilogue using mov to restore the regs. Check if PPX applied
> > and prevent usage of mov/leave in the epilogue.
> >
> > Bootstrapped/regtested on x86_64-pc-linux-gnu.
> >
> > Ok for trunk?
> Ok.
Please backport the fix to GCC14 branch.
> >
> > gcc/ChangeLog:
> >
> >         * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
> >         flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
> >         (ix86_emit_save_regs): Emit ppx is available only when
> >         TARGET_APX_PPX && !crtl->calls_eh_return.
> >         (ix86_expand_epilogue): Don't restore reg using mov when
> >         apx_ppx_used flag is true.
> >         * config/i386/i386.h (struct machine_frame_state):
> >         Add apx_ppx_used flag.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/apx-ppx-2.c: New test.
> >         * gcc.target/i386/apx-ppx-3.c: Likewise.
> > ---
> >  gcc/config/i386/i386.cc                   | 13 +++++++++----
> >  gcc/config/i386/i386.h                    |  4 ++++
> >  gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
> >  gcc/testsuite/gcc.target/i386/apx-ppx-3.c |  7 +++++++
> >  4 files changed, 34 insertions(+), 4 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index bd7411190af..99def8d4a77 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
> >  {
> >    int regno;
> >    rtx_insn *insn;
> > +  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
> >
> >    if (!TARGET_APX_PUSH2POP2
> >        || !ix86_can_use_push2pop2 ()
> > @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
> >         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
> >           {
> >             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > -                                       TARGET_APX_PPX));
> > +                                       use_ppx));
> >             RTX_FRAME_RELATED_P (insn) = 1;
> >           }
> >      }
> > @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
> >                                                               regno_list[0]),
> >                                                  gen_rtx_REG (word_mode,
> >                                                               regno_list[1]),
> > -                                                TARGET_APX_PPX));
> > +                                                use_ppx));
> >                     RTX_FRAME_RELATED_P (insn) = 1;
> >                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
> >
> > @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
> >             else
> >               {
> >                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > -                                           TARGET_APX_PPX));
> > +                                           use_ppx));
> >                 RTX_FRAME_RELATED_P (insn) = 1;
> >                 aligned = true;
> >               }
> > @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
> >         {
> >           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
> >                                                    regno_list[0]),
> > -                                     TARGET_APX_PPX));
> > +                                     use_ppx));
> >           RTX_FRAME_RELATED_P (insn) = 1;
> >         }
> >      }
> > @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
> >        if (!frame.save_regs_using_mov)
> >         {
> >           ix86_emit_save_regs ();
> > +         m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
> >           int_registers_saved = true;
> >           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
> >         }
> > @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
> >    /* SEH requires the use of pops to identify the epilogue.  */
> >    else if (TARGET_SEH)
> >      restore_regs_via_mov = false;
> > +  /* If we already save reg with pushp, don't use move at epilogue.  */
> > +  else if (m->fs.apx_ppx_used)
> > +    restore_regs_via_mov = false;
> >    /* If we're only restoring one register and sp cannot be used then
> >       using a move instruction to restore the register since it's
> >       less work than reloading sp and popping the register.  */
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 147b12cd014..0c5292e1d64 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
> >       The flags realigned and sp_realigned are mutually exclusive.  */
> >    BOOL_BITFIELD sp_realigned : 1;
> >
> > +  /* When APX_PPX used in prologue, force epilogue to emit
> > +  popp instead of move and leave.  */
> > +  BOOL_BITFIELD apx_ppx_used : 1;
> > +
> >    /* If sp_realigned is set, this is the last valid offset from the CFA
> >       that can be used for access with the frame pointer.  */
> >    HOST_WIDE_INT sp_realigned_fp_last;
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > new file mode 100644
> > index 00000000000..42a95340b55
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> > +
> > +/* { dg-final { scan-assembler "pushp" } } */
> > +/* { dg-final { scan-assembler "popp" } } */
> > +/* { dg-final { scan-assembler-not "leave" } } */
> > +
> > +extern int bar (int a);
> > +extern int *q;
> > +
> > +void foo (int *a)
> > +{
> > +  q[2] = bar (q[1]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > new file mode 100644
> > index 00000000000..76931fbe294
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > @@ -0,0 +1,7 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mapx-features=ppx" } */
> > +
> > +/* { dg-final { scan-assembler-not "pushp" } } */
> > +/* { dg-final { scan-assembler-not "popp" } } */
> > +
> > +#include "eh_return-2.c"
> > --
> > 2.31.1
> >
>
>
> --
> BR,
> Hongtao
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index bd7411190af..99def8d4a77 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7429,6 +7429,7 @@  ix86_emit_save_regs (void)
 {
   int regno;
   rtx_insn *insn;
+  bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
 
   if (!TARGET_APX_PUSH2POP2
       || !ix86_can_use_push2pop2 ()
@@ -7438,7 +7439,7 @@  ix86_emit_save_regs (void)
 	if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
 	  {
 	    insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-					TARGET_APX_PPX));
+					use_ppx));
 	    RTX_FRAME_RELATED_P (insn) = 1;
 	  }
     }
@@ -7469,7 +7470,7 @@  ix86_emit_save_regs (void)
 							      regno_list[0]),
 						 gen_rtx_REG (word_mode,
 							      regno_list[1]),
-						 TARGET_APX_PPX));
+						 use_ppx));
 		    RTX_FRAME_RELATED_P (insn) = 1;
 		    rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
 
@@ -7502,7 +7503,7 @@  ix86_emit_save_regs (void)
 	    else
 	      {
 		insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
-					    TARGET_APX_PPX));
+					    use_ppx));
 		RTX_FRAME_RELATED_P (insn) = 1;
 		aligned = true;
 	      }
@@ -7511,7 +7512,7 @@  ix86_emit_save_regs (void)
 	{
 	  insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
 						   regno_list[0]),
-				      TARGET_APX_PPX));
+				      use_ppx));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
     }
@@ -8985,6 +8986,7 @@  ix86_expand_prologue (void)
       if (!frame.save_regs_using_mov)
 	{
 	  ix86_emit_save_regs ();
+	  m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
 	  int_registers_saved = true;
 	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
 	}
@@ -9870,6 +9872,9 @@  ix86_expand_epilogue (int style)
   /* SEH requires the use of pops to identify the epilogue.  */
   else if (TARGET_SEH)
     restore_regs_via_mov = false;
+  /* If we already save reg with pushp, don't use move at epilogue.  */
+  else if (m->fs.apx_ppx_used)
+    restore_regs_via_mov = false;
   /* If we're only restoring one register and sp cannot be used then
      using a move instruction to restore the register since it's
      less work than reloading sp and popping the register.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 147b12cd014..0c5292e1d64 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2693,6 +2693,10 @@  struct GTY(()) machine_frame_state
      The flags realigned and sp_realigned are mutually exclusive.  */
   BOOL_BITFIELD sp_realigned : 1;
 
+  /* When APX_PPX used in prologue, force epilogue to emit
+  popp instead of move and leave.  */
+  BOOL_BITFIELD apx_ppx_used : 1;
+
   /* If sp_realigned is set, this is the last valid offset from the CFA
      that can be used for access with the frame pointer.  */
   HOST_WIDE_INT sp_realigned_fp_last;
diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
new file mode 100644
index 00000000000..42a95340b55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
+
+/* { dg-final { scan-assembler "pushp" } } */
+/* { dg-final { scan-assembler "popp" } } */
+/* { dg-final { scan-assembler-not "leave" } } */
+
+extern int bar (int a);
+extern int *q;
+
+void foo (int *a)
+{
+  q[2] = bar (q[1]);
+}
diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
new file mode 100644
index 00000000000..76931fbe294
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
@@ -0,0 +1,7 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mapx-features=ppx" } */
+
+/* { dg-final { scan-assembler-not "pushp" } } */
+/* { dg-final { scan-assembler-not "popp" } } */
+
+#include "eh_return-2.c"