Message ID | 20231117072548.3543202-1-hongyu.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | [APX,PPX] Support Intel APX PPX | expand |
On Fri, Nov 17, 2023 at 3:26 PM Hongyu Wang <hongyu.wang@intel.com> wrote: > > Intel APX PPX feature has been released in [1]. > > PPX stands for Push-Pop Acceleration. PUSH/PUSH2 and its corresponding POP > can be marked with a 1-bit hint to indicate that the POP reads the > value written by the PUSH from the stack. The processor tracks these marked > instructions internally and fast-forwards register data between > matching PUSH and POP instructions, without going through memory or > through the training loop of the Fast Store Forwarding Predictor (FSFP). > This feature can also be adopted to PUSH2/POP2. > > For GCC, we emit explicit suffix 'p' (paired) to indicate the push/pop > pair are marked with PPX hint. To separate form original push/pop, we > use UNSPEC to restrict the PPX related patterns. So for pushp/popp, the > cfi is manually adjusted for the UNSPEC PPX insns. > > In the first implementation we only emit them under prologue/epilogue > when saving/restoring callee-saved registers to make sure push/pop are > paired. So an extra flag was added to check if PPX insns can be emitted > for those register save/restore interfaces. > > The PPX hint is purely a performance hint. If the 'p' suffix is not > emitted for paired push/pop, the PPX optimization will be disabled, > while program sematic will not be affected at all. > > Bootstrapped/regtest on x86-64-pc-linux-gnu{-m32,}. > > Ok for master? > > [1].https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.ht > > gcc/ChangeLog: > > * config/i386/i386-opts.h (enum apx_features): Add apx_ppx, add > it to apx_all. > * config/i386/i386.cc (ix86_emit_restore_reg_using_pop): Add > ppx_p parameter for function declaration. > (gen_push2): Add ppx_p parameter, emit push2p if ppx_p is true. > (ix86_emit_restore_reg_using_pop2): Likewise for pop2p. > (gen_pushp): New function to emit pushp and adjust cfi. > (ix86_emit_save_regs): Emit pushp/push2p under TARGET_APX_PPX. > (ix86_emit_restore_reg_using_pop): Add ppx_p, emit popp insn > and adjust cfi when ppx_p is ture. > (ix86_emit_restore_reg_using_pop2): Add ppx_p and parse to its > callee. > (ix86_emit_restore_regs_using_pop2): Likewise. > (ix86_expand_epilogue): Parse TARGET_APX_PPX to > ix86_emit_restore_reg_using_pop. > * config/i386/i386.h (TARGET_APX_PPX): New. > * config/i386/i386.md (UNSPEC_APXPUSHP): New unspec. > (UNSPEC_APXPOPP): Likewise. > (UNSPEC_APXPUSH2P): Likewise. > (UNSPEC_APXPOP2P_LOW): Likewise. > (UNSPEC_APXPOP2P_HIGH): Likewise. > (pushp_di): New define_insn. > (popp_di): Likewise. > (push2p_di): Likewise. > (pop2p_di): Likewise. > * config/i386/i386.opt: Add apx_ppx enum. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/apx-interrupt-1.c: Adjust option to restrict them > under certain subfeatures. > * gcc.target/i386/apx-push2pop2-1.c: Likewise. > * gcc.target/i386/apx-push2pop2_force_drap-1.c: Likewise. > * gcc.target/i386/apx-push2pop2_interrupt-1.c: Likewise. > * gcc.target/i386/apx-ppx-1.c: New test. > --- > gcc/config/i386/i386-opts.h | 3 +- > gcc/config/i386/i386.cc | 113 ++++++++++++++---- > gcc/config/i386/i386.h | 1 + > gcc/config/i386/i386.md | 47 +++++++- > gcc/config/i386/i386.opt | 3 + > .../gcc.target/i386/apx-interrupt-1.c | 2 +- > gcc/testsuite/gcc.target/i386/apx-ppx-1.c | 9 ++ > .../gcc.target/i386/apx-push2pop2-1.c | 2 +- > .../i386/apx-push2pop2_force_drap-1.c | 2 +- > .../i386/apx-push2pop2_interrupt-1.c | 2 +- > 10 files changed, 158 insertions(+), 26 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-1.c > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h > index 2ec76a16bce..4d293edb399 100644 > --- a/gcc/config/i386/i386-opts.h > +++ b/gcc/config/i386/i386-opts.h > @@ -139,7 +139,8 @@ enum apx_features { > apx_egpr = 1 << 0, > apx_push2pop2 = 1 << 1, > apx_ndd = 1 << 2, > - apx_all = apx_egpr | apx_push2pop2 | apx_ndd, > + apx_ppx = 1 << 3, > + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx, > }; > > #endif > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 683ac643bc8..df2fc236c0a 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -105,7 +105,7 @@ along with GCC; see the file COPYING3. If not see > static rtx legitimize_dllimport_symbol (rtx, bool); > static rtx legitimize_pe_coff_extern_decl (rtx, bool); > static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); > -static void ix86_emit_restore_reg_using_pop (rtx); > +static void ix86_emit_restore_reg_using_pop (rtx, bool = false); > > > #ifndef CHECK_STACK_LIMIT > @@ -6512,7 +6512,7 @@ gen_popfl (void) > > /* Generate a "push2" pattern for input ARG. */ > rtx > -gen_push2 (rtx mem, rtx reg1, rtx reg2) > +gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false) > { > struct machine_function *m = cfun->machine; > const int offset = UNITS_PER_WORD * 2; > @@ -6527,9 +6527,49 @@ gen_push2 (rtx mem, rtx reg1, rtx reg2) > if (REG_P (reg2) && GET_MODE (reg2) != word_mode) > reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); > > - return gen_push2_di (mem, reg1, reg2); > + return ppx_p ? gen_push2p_di (mem, reg1, reg2): > + gen_push2_di (mem, reg1, reg2); > } > > +/* Generate a "pushp" insn for input ARG, and set reg notes. */ > +rtx_insn * > +gen_pushp (rtx arg) > +{ Can we just overload gen_push with an new arg bool ppx just like we did for push2/pop2. > + struct machine_function *m = cfun->machine; > + > + if (m->fs.cfa_reg == stack_pointer_rtx) > + m->fs.cfa_offset += UNITS_PER_WORD; > + m->fs.sp_offset += UNITS_PER_WORD; > + > + if (REG_P (arg) && GET_MODE (arg) != word_mode) > + arg = gen_rtx_REG (word_mode, REGNO (arg)); > + > + rtx pat = gen_rtx_MEM (word_mode, > + gen_rtx_PRE_DEC (Pmode, > + stack_pointer_rtx)); > + rtx_insn *insn = emit_insn (gen_pushp_di (pat, arg)); > + Why do we need .. > + rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); > + > + rtx dwarf_reg = gen_rtx_REG (word_mode, > + REGNO (arg)); > + rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, > + stack_pointer_rtx), > + dwarf_reg); > + RTX_FRAME_RELATED_P (tmp) = 1; > + XVECEXP (dwarf, 0, 1) = tmp; > + rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, > + plus_constant (Pmode, > + stack_pointer_rtx, > + -UNITS_PER_WORD)); > + RTX_FRAME_RELATED_P (sp_tmp) = 1; > + XVECEXP (dwarf, 0, 0) = sp_tmp; > + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); this. I assume pushp should be different from push only by ppx hint, not dwarf. > + > + return insn; > +} > + > + > /* Return >= 0 if there is an unused call-clobbered register available > for the entire function. */ > > @@ -7369,7 +7409,10 @@ ix86_emit_save_regs (void) > for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) > { > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > + if (TARGET_APX_PPX) > + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); > + else > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > RTX_FRAME_RELATED_P (insn) = 1; > } > } > @@ -7399,7 +7442,8 @@ ix86_emit_save_regs (void) > gen_rtx_REG (word_mode, > regno_list[0]), > gen_rtx_REG (word_mode, > - regno_list[1]))); > + regno_list[1]), > + TARGET_APX_PPX)); > RTX_FRAME_RELATED_P (insn) = 1; > rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); > > @@ -7431,15 +7475,21 @@ ix86_emit_save_regs (void) > } > else > { > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > + if (TARGET_APX_PPX) > + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); > + else > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > RTX_FRAME_RELATED_P (insn) = 1; > aligned = true; > } > } > if (loaded_regnum == 1) > { > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, > - regno_list[0]))); > + if (TARGET_APX_PPX) > + insn = gen_pushp (gen_rtx_REG (word_mode, regno_list[0])); > + else > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, > + regno_list[0]))); > RTX_FRAME_RELATED_P (insn) = 1; > } > } > @@ -9268,15 +9318,30 @@ ix86_expand_prologue (void) > emit_insn (gen_prologue_use (stack_pointer_rtx)); > } > > -/* Emit code to restore REG using a POP insn. */ > +/* Emit code to restore REG using a POP or POPP insn. */ > > static void > -ix86_emit_restore_reg_using_pop (rtx reg) > +ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) > { > struct machine_function *m = cfun->machine; > - rtx_insn *insn = emit_insn (gen_pop (reg)); > + rtx_insn *insn; > > - ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); > + if (ppx_p) > + { > + rtx stack = gen_rtx_MEM (word_mode, > + gen_rtx_POST_INC (Pmode, > + stack_pointer_rtx)); > + insn = emit_insn (gen_popp_di (reg, stack)); Diddo for .. > + RTX_FRAME_RELATED_P (insn) = 1; > + rtx dwarf = NULL_RTX; > + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); > + REG_NOTES (insn) = dwarf; .. this, why it's different from pop regarding cfa? > + } > + else > + { > + insn = emit_insn (gen_pop (reg)); > + ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); > + } > m->fs.sp_offset -= UNITS_PER_WORD; > > if (m->fs.cfa_reg == crtl->drap_reg > @@ -9328,14 +9393,19 @@ ix86_emit_restore_reg_using_pop (rtx reg) > > /* Emit code to restore REG using a POP2 insn. */ > static void > -ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) > +ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) > { > struct machine_function *m = cfun->machine; > const int offset = UNITS_PER_WORD * 2; > + rtx_insn *insn; > > rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, > stack_pointer_rtx)); > - rtx_insn *insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); > + > + if (ppx_p) > + insn = emit_insn (gen_pop2p_di (reg1, mem, reg2)); > + else > + insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); > > RTX_FRAME_RELATED_P (insn) = 1; > > @@ -9397,13 +9467,13 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) > /* Emit code to restore saved registers using POP insns. */ > > static void > -ix86_emit_restore_regs_using_pop (void) > +ix86_emit_restore_regs_using_pop (bool ppx_p) > { > unsigned int regno; > > for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p); > } > > /* Emit code to restore saved registers using POP2 insns. */ > @@ -9432,20 +9502,23 @@ ix86_emit_restore_regs_using_pop2 (void) > ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode, > regno_list[0]), > gen_rtx_REG (word_mode, > - regno_list[1])); > + regno_list[1]), > + TARGET_APX_PPX); > loaded_regnum = 0; > regno_list[0] = regno_list[1] = -1; > } > } > else > { > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), > + TARGET_APX_PPX); > aligned = true; > } > } > > if (loaded_regnum == 1) > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0])); > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]), > + TARGET_APX_PPX); > } > > /* Emit code and notes for the LEAVE instruction. If insn is non-null, > @@ -9990,7 +10063,7 @@ ix86_expand_epilogue (int style) > if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL) > ix86_emit_restore_regs_using_pop2 (); > else > - ix86_emit_restore_regs_using_pop (); > + ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); > } > > /* If we used a stack pointer and haven't already got rid of it, > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 037074758ea..227217a8712 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -54,6 +54,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > #define TARGET_APX_EGPR (ix86_apx_features & apx_egpr) > #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2) > #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) > +#define TARGET_APX_PPX (ix86_apx_features & apx_ppx) > > #include "config/vxworks-dummy.h" > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 1b5a794b9e5..8cce4c11e35 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -210,10 +210,17 @@ (define_c_enum "unspec" [ > ;; For insn_callee_abi: > UNSPEC_CALLEE_ABI > > - ;; For PUSH2/POP2 support > + ;; For APX PUSH2/POP2 support > UNSPEC_APXPUSH2 > UNSPEC_APXPOP2_LOW > UNSPEC_APXPOP2_HIGH > + > + ;; For APX PPX support > + UNSPEC_APXPUSHP > + UNSPEC_APXPOPP > + UNSPEC_APXPUSH2P > + UNSPEC_APXPOP2P_LOW > + UNSPEC_APXPOP2P_HIGH > ]) > > (define_c_enum "unspecv" [ > @@ -3785,6 +3792,44 @@ (define_insn "pop2_di" > [(set_attr "mode" "TI") > (set_attr "prefix" "evex")]) > > +(define_insn "pushp_di" > + [(set (match_operand:DI 0 "push_operand" "=<") > + (unspec: DI [(match_operand:DI 1 "register_operand" "r")] > + UNSPEC_APXPUSHP))] > + "TARGET_64BIT" > + "pushp\t%1" > + [(set_attr "mode" "DI")]) I think you just need to add an extra (unspec [(const_int 0)] UNSPEC_PPX) to origin push_di pattern, then no need for particular handling of dwarf and cfa stuffs. Ditto for popp and push2p and pop2p. > + > +(define_insn "popp_di" > + [(set (match_operand:DI 0 "nonimmediate_operand" "=r") > + (unspec:DI [(match_operand:DI 1 "pop_operand" ">")] > + UNSPEC_APXPOPP))] > + "TARGET_APX_PPX" > + "popp\t%0" > + [(set_attr "mode" "DI")]) > + > +(define_insn "push2p_di" > + [(set (match_operand:TI 0 "push_operand" "=<") > + (unspec:TI [(match_operand:DI 1 "register_operand" "r") > + (match_operand:DI 2 "register_operand" "r")] > + UNSPEC_APXPUSH2P))] > + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" > + "push2p\t%1, %2" > + [(set_attr "mode" "TI") > + (set_attr "type" "multi") > + (set_attr "prefix" "evex")]) > + > +(define_insn "pop2p_di" > + [(parallel [(set (match_operand:DI 0 "register_operand" "=r") > + (unspec:DI [(match_operand:TI 1 "pop_operand" ">")] > + UNSPEC_APXPOP2P_LOW)) > + (set (match_operand:DI 2 "register_operand" "=r") > + (unspec:DI [(const_int 0)] UNSPEC_APXPOP2P_HIGH))])] > + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" > + "pop2p\t%0, %2" > + [(set_attr "mode" "TI") > + (set_attr "prefix" "evex")]) > + > (define_insn "*pushsf_rex64" > [(set (match_operand:SF 0 "push_operand" "=X,X,X") > (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))] > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index 0c3b8f4b621..ee8898e3abb 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1333,6 +1333,9 @@ Enum(apx_features) String(push2pop2) Value(apx_push2pop2) Set(3) > EnumValue > Enum(apx_features) String(ndd) Value(apx_ndd) Set(4) > > +EnumValue > +Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) > + > EnumValue > Enum(apx_features) String(all) Value(apx_all) Set(1) > > diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > index 5f732d3e316..ffcb8fce71c 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-mapxf -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > +/* { dg-options "-mapx-features=egpr -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > extern void foo (void *) __attribute__ ((interrupt)); > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-1.c b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c > new file mode 100644 > index 00000000000..e9a595381e3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mapx-features=egpr,push2pop2,ppx" } */ > + > +#include "apx-push2pop2-1.c" > + > +/* { dg-final { scan-assembler "pushp" } } */ > +/* { dg-final { scan-assembler "popp" } } */ > +/* { dg-final { scan-assembler "push2p" } } */ > +/* { dg-final { scan-assembler "pop2p" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > index 089941d3726..c53112758a5 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-O2 -mapxf" } */ > +/* { dg-options "-O2 -mapx-features=push2pop2" } */ > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > extern int bar (int); > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > index 656ca91391a..022113bb1e2 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-O2 -mapxf -mforce-drap" } */ > +/* { dg-options "-O2 -mapx-features=push2pop2 -mforce-drap" } */ > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > #include "apx-push2pop2-1.c" > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > index 747f7aaf191..a5b46893208 100644 > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile { target { ! ia32 } } } */ > -/* { dg-options "-O2 -mapxf -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > +/* { dg-options "-O2 -mapx-features=egpr,push2pop2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > extern void foo (void *) __attribute__ ((interrupt)); > > -- > 2.31.1 >
> I think you just need to add an extra (unspec [(const_int 0)] > UNSPEC_PPX) to origin push_di pattern, then no need for particular > handling of dwarf and cfa stuffs. > Ditto for popp and push2p and pop2p. Yes, such change also worked and no cfa adjustment required then, thanks for the suggestion. Updated patch with just 1 new UNSPEC and removed cfa handling. Hongtao Liu <crazylht@gmail.com> 于2023年11月20日周一 14:46写道: > > On Fri, Nov 17, 2023 at 3:26 PM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > Intel APX PPX feature has been released in [1]. > > > > PPX stands for Push-Pop Acceleration. PUSH/PUSH2 and its corresponding POP > > can be marked with a 1-bit hint to indicate that the POP reads the > > value written by the PUSH from the stack. The processor tracks these marked > > instructions internally and fast-forwards register data between > > matching PUSH and POP instructions, without going through memory or > > through the training loop of the Fast Store Forwarding Predictor (FSFP). > > This feature can also be adopted to PUSH2/POP2. > > > > For GCC, we emit explicit suffix 'p' (paired) to indicate the push/pop > > pair are marked with PPX hint. To separate form original push/pop, we > > use UNSPEC to restrict the PPX related patterns. So for pushp/popp, the > > cfi is manually adjusted for the UNSPEC PPX insns. > > > > In the first implementation we only emit them under prologue/epilogue > > when saving/restoring callee-saved registers to make sure push/pop are > > paired. So an extra flag was added to check if PPX insns can be emitted > > for those register save/restore interfaces. > > > > The PPX hint is purely a performance hint. If the 'p' suffix is not > > emitted for paired push/pop, the PPX optimization will be disabled, > > while program sematic will not be affected at all. > > > > Bootstrapped/regtest on x86-64-pc-linux-gnu{-m32,}. > > > > Ok for master? > > > > [1].https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.ht > > > > gcc/ChangeLog: > > > > * config/i386/i386-opts.h (enum apx_features): Add apx_ppx, add > > it to apx_all. > > * config/i386/i386.cc (ix86_emit_restore_reg_using_pop): Add > > ppx_p parameter for function declaration. > > (gen_push2): Add ppx_p parameter, emit push2p if ppx_p is true. > > (ix86_emit_restore_reg_using_pop2): Likewise for pop2p. > > (gen_pushp): New function to emit pushp and adjust cfi. > > (ix86_emit_save_regs): Emit pushp/push2p under TARGET_APX_PPX. > > (ix86_emit_restore_reg_using_pop): Add ppx_p, emit popp insn > > and adjust cfi when ppx_p is ture. > > (ix86_emit_restore_reg_using_pop2): Add ppx_p and parse to its > > callee. > > (ix86_emit_restore_regs_using_pop2): Likewise. > > (ix86_expand_epilogue): Parse TARGET_APX_PPX to > > ix86_emit_restore_reg_using_pop. > > * config/i386/i386.h (TARGET_APX_PPX): New. > > * config/i386/i386.md (UNSPEC_APXPUSHP): New unspec. > > (UNSPEC_APXPOPP): Likewise. > > (UNSPEC_APXPUSH2P): Likewise. > > (UNSPEC_APXPOP2P_LOW): Likewise. > > (UNSPEC_APXPOP2P_HIGH): Likewise. > > (pushp_di): New define_insn. > > (popp_di): Likewise. > > (push2p_di): Likewise. > > (pop2p_di): Likewise. > > * config/i386/i386.opt: Add apx_ppx enum. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/apx-interrupt-1.c: Adjust option to restrict them > > under certain subfeatures. > > * gcc.target/i386/apx-push2pop2-1.c: Likewise. > > * gcc.target/i386/apx-push2pop2_force_drap-1.c: Likewise. > > * gcc.target/i386/apx-push2pop2_interrupt-1.c: Likewise. > > * gcc.target/i386/apx-ppx-1.c: New test. > > --- > > gcc/config/i386/i386-opts.h | 3 +- > > gcc/config/i386/i386.cc | 113 ++++++++++++++---- > > gcc/config/i386/i386.h | 1 + > > gcc/config/i386/i386.md | 47 +++++++- > > gcc/config/i386/i386.opt | 3 + > > .../gcc.target/i386/apx-interrupt-1.c | 2 +- > > gcc/testsuite/gcc.target/i386/apx-ppx-1.c | 9 ++ > > .../gcc.target/i386/apx-push2pop2-1.c | 2 +- > > .../i386/apx-push2pop2_force_drap-1.c | 2 +- > > .../i386/apx-push2pop2_interrupt-1.c | 2 +- > > 10 files changed, 158 insertions(+), 26 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-1.c > > > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h > > index 2ec76a16bce..4d293edb399 100644 > > --- a/gcc/config/i386/i386-opts.h > > +++ b/gcc/config/i386/i386-opts.h > > @@ -139,7 +139,8 @@ enum apx_features { > > apx_egpr = 1 << 0, > > apx_push2pop2 = 1 << 1, > > apx_ndd = 1 << 2, > > - apx_all = apx_egpr | apx_push2pop2 | apx_ndd, > > + apx_ppx = 1 << 3, > > + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx, > > }; > > > > #endif > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > index 683ac643bc8..df2fc236c0a 100644 > > --- a/gcc/config/i386/i386.cc > > +++ b/gcc/config/i386/i386.cc > > @@ -105,7 +105,7 @@ along with GCC; see the file COPYING3. If not see > > static rtx legitimize_dllimport_symbol (rtx, bool); > > static rtx legitimize_pe_coff_extern_decl (rtx, bool); > > static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); > > -static void ix86_emit_restore_reg_using_pop (rtx); > > +static void ix86_emit_restore_reg_using_pop (rtx, bool = false); > > > > > > #ifndef CHECK_STACK_LIMIT > > @@ -6512,7 +6512,7 @@ gen_popfl (void) > > > > /* Generate a "push2" pattern for input ARG. */ > > rtx > > -gen_push2 (rtx mem, rtx reg1, rtx reg2) > > +gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false) > > { > > struct machine_function *m = cfun->machine; > > const int offset = UNITS_PER_WORD * 2; > > @@ -6527,9 +6527,49 @@ gen_push2 (rtx mem, rtx reg1, rtx reg2) > > if (REG_P (reg2) && GET_MODE (reg2) != word_mode) > > reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); > > > > - return gen_push2_di (mem, reg1, reg2); > > + return ppx_p ? gen_push2p_di (mem, reg1, reg2): > > + gen_push2_di (mem, reg1, reg2); > > } > > > > +/* Generate a "pushp" insn for input ARG, and set reg notes. */ > > +rtx_insn * > > +gen_pushp (rtx arg) > > +{ > Can we just overload gen_push with an new arg bool ppx just like we > did for push2/pop2. > > + struct machine_function *m = cfun->machine; > > + > > + if (m->fs.cfa_reg == stack_pointer_rtx) > > + m->fs.cfa_offset += UNITS_PER_WORD; > > + m->fs.sp_offset += UNITS_PER_WORD; > > + > > + if (REG_P (arg) && GET_MODE (arg) != word_mode) > > + arg = gen_rtx_REG (word_mode, REGNO (arg)); > > + > > + rtx pat = gen_rtx_MEM (word_mode, > > + gen_rtx_PRE_DEC (Pmode, > > + stack_pointer_rtx)); > > + rtx_insn *insn = emit_insn (gen_pushp_di (pat, arg)); > > + > Why do we need .. > > + rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); > > + > > + rtx dwarf_reg = gen_rtx_REG (word_mode, > > + REGNO (arg)); > > + rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, > > + stack_pointer_rtx), > > + dwarf_reg); > > + RTX_FRAME_RELATED_P (tmp) = 1; > > + XVECEXP (dwarf, 0, 1) = tmp; > > + rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, > > + plus_constant (Pmode, > > + stack_pointer_rtx, > > + -UNITS_PER_WORD)); > > + RTX_FRAME_RELATED_P (sp_tmp) = 1; > > + XVECEXP (dwarf, 0, 0) = sp_tmp; > > + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); > this. I assume pushp should be different from push only by ppx hint, not dwarf. > > + > > + return insn; > > +} > > + > > + > > /* Return >= 0 if there is an unused call-clobbered register available > > for the entire function. */ > > > > @@ -7369,7 +7409,10 @@ ix86_emit_save_regs (void) > > for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) > > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) > > { > > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > + if (TARGET_APX_PPX) > > + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); > > + else > > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > RTX_FRAME_RELATED_P (insn) = 1; > > } > > } > > @@ -7399,7 +7442,8 @@ ix86_emit_save_regs (void) > > gen_rtx_REG (word_mode, > > regno_list[0]), > > gen_rtx_REG (word_mode, > > - regno_list[1]))); > > + regno_list[1]), > > + TARGET_APX_PPX)); > > RTX_FRAME_RELATED_P (insn) = 1; > > rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); > > > > @@ -7431,15 +7475,21 @@ ix86_emit_save_regs (void) > > } > > else > > { > > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > + if (TARGET_APX_PPX) > > + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); > > + else > > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > RTX_FRAME_RELATED_P (insn) = 1; > > aligned = true; > > } > > } > > if (loaded_regnum == 1) > > { > > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, > > - regno_list[0]))); > > + if (TARGET_APX_PPX) > > + insn = gen_pushp (gen_rtx_REG (word_mode, regno_list[0])); > > + else > > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, > > + regno_list[0]))); > > RTX_FRAME_RELATED_P (insn) = 1; > > } > > } > > @@ -9268,15 +9318,30 @@ ix86_expand_prologue (void) > > emit_insn (gen_prologue_use (stack_pointer_rtx)); > > } > > > > -/* Emit code to restore REG using a POP insn. */ > > +/* Emit code to restore REG using a POP or POPP insn. */ > > > > static void > > -ix86_emit_restore_reg_using_pop (rtx reg) > > +ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) > > { > > struct machine_function *m = cfun->machine; > > - rtx_insn *insn = emit_insn (gen_pop (reg)); > > + rtx_insn *insn; > > > > - ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); > > + if (ppx_p) > > + { > > + rtx stack = gen_rtx_MEM (word_mode, > > + gen_rtx_POST_INC (Pmode, > > + stack_pointer_rtx)); > > + insn = emit_insn (gen_popp_di (reg, stack)); > Diddo for .. > > + RTX_FRAME_RELATED_P (insn) = 1; > > + rtx dwarf = NULL_RTX; > > + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); > > + REG_NOTES (insn) = dwarf; > .. this, why it's different from pop regarding cfa? > > + } > > + else > > + { > > + insn = emit_insn (gen_pop (reg)); > > + ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); > > + } > > m->fs.sp_offset -= UNITS_PER_WORD; > > > > if (m->fs.cfa_reg == crtl->drap_reg > > @@ -9328,14 +9393,19 @@ ix86_emit_restore_reg_using_pop (rtx reg) > > > > /* Emit code to restore REG using a POP2 insn. */ > > static void > > -ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) > > +ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) > > { > > struct machine_function *m = cfun->machine; > > const int offset = UNITS_PER_WORD * 2; > > + rtx_insn *insn; > > > > rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, > > stack_pointer_rtx)); > > - rtx_insn *insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); > > + > > + if (ppx_p) > > + insn = emit_insn (gen_pop2p_di (reg1, mem, reg2)); > > + else > > + insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); > > > > RTX_FRAME_RELATED_P (insn) = 1; > > > > @@ -9397,13 +9467,13 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) > > /* Emit code to restore saved registers using POP insns. */ > > > > static void > > -ix86_emit_restore_regs_using_pop (void) > > +ix86_emit_restore_regs_using_pop (bool ppx_p) > > { > > unsigned int regno; > > > > for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) > > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) > > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); > > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p); > > } > > > > /* Emit code to restore saved registers using POP2 insns. */ > > @@ -9432,20 +9502,23 @@ ix86_emit_restore_regs_using_pop2 (void) > > ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode, > > regno_list[0]), > > gen_rtx_REG (word_mode, > > - regno_list[1])); > > + regno_list[1]), > > + TARGET_APX_PPX); > > loaded_regnum = 0; > > regno_list[0] = regno_list[1] = -1; > > } > > } > > else > > { > > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); > > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), > > + TARGET_APX_PPX); > > aligned = true; > > } > > } > > > > if (loaded_regnum == 1) > > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0])); > > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]), > > + TARGET_APX_PPX); > > } > > > > /* Emit code and notes for the LEAVE instruction. If insn is non-null, > > @@ -9990,7 +10063,7 @@ ix86_expand_epilogue (int style) > > if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL) > > ix86_emit_restore_regs_using_pop2 (); > > else > > - ix86_emit_restore_regs_using_pop (); > > + ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); > > } > > > > /* If we used a stack pointer and haven't already got rid of it, > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > index 037074758ea..227217a8712 100644 > > --- a/gcc/config/i386/i386.h > > +++ b/gcc/config/i386/i386.h > > @@ -54,6 +54,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > > #define TARGET_APX_EGPR (ix86_apx_features & apx_egpr) > > #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2) > > #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) > > +#define TARGET_APX_PPX (ix86_apx_features & apx_ppx) > > > > #include "config/vxworks-dummy.h" > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index 1b5a794b9e5..8cce4c11e35 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -210,10 +210,17 @@ (define_c_enum "unspec" [ > > ;; For insn_callee_abi: > > UNSPEC_CALLEE_ABI > > > > - ;; For PUSH2/POP2 support > > + ;; For APX PUSH2/POP2 support > > UNSPEC_APXPUSH2 > > UNSPEC_APXPOP2_LOW > > UNSPEC_APXPOP2_HIGH > > + > > + ;; For APX PPX support > > + UNSPEC_APXPUSHP > > + UNSPEC_APXPOPP > > + UNSPEC_APXPUSH2P > > + UNSPEC_APXPOP2P_LOW > > + UNSPEC_APXPOP2P_HIGH > > ]) > > > > (define_c_enum "unspecv" [ > > @@ -3785,6 +3792,44 @@ (define_insn "pop2_di" > > [(set_attr "mode" "TI") > > (set_attr "prefix" "evex")]) > > > > +(define_insn "pushp_di" > > + [(set (match_operand:DI 0 "push_operand" "=<") > > + (unspec: DI [(match_operand:DI 1 "register_operand" "r")] > > + UNSPEC_APXPUSHP))] > > + "TARGET_64BIT" > > + "pushp\t%1" > > + [(set_attr "mode" "DI")]) > I think you just need to add an extra (unspec [(const_int 0)] > UNSPEC_PPX) to origin push_di pattern, then no need for particular > handling of dwarf and cfa stuffs. > Ditto for popp and push2p and pop2p. > > + > > +(define_insn "popp_di" > > + [(set (match_operand:DI 0 "nonimmediate_operand" "=r") > > + (unspec:DI [(match_operand:DI 1 "pop_operand" ">")] > > + UNSPEC_APXPOPP))] > > + "TARGET_APX_PPX" > > + "popp\t%0" > > + [(set_attr "mode" "DI")]) > > + > > +(define_insn "push2p_di" > > + [(set (match_operand:TI 0 "push_operand" "=<") > > + (unspec:TI [(match_operand:DI 1 "register_operand" "r") > > + (match_operand:DI 2 "register_operand" "r")] > > + UNSPEC_APXPUSH2P))] > > + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" > > + "push2p\t%1, %2" > > + [(set_attr "mode" "TI") > > + (set_attr "type" "multi") > > + (set_attr "prefix" "evex")]) > > + > > +(define_insn "pop2p_di" > > + [(parallel [(set (match_operand:DI 0 "register_operand" "=r") > > + (unspec:DI [(match_operand:TI 1 "pop_operand" ">")] > > + UNSPEC_APXPOP2P_LOW)) > > + (set (match_operand:DI 2 "register_operand" "=r") > > + (unspec:DI [(const_int 0)] UNSPEC_APXPOP2P_HIGH))])] > > + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" > > + "pop2p\t%0, %2" > > + [(set_attr "mode" "TI") > > + (set_attr "prefix" "evex")]) > > + > > (define_insn "*pushsf_rex64" > > [(set (match_operand:SF 0 "push_operand" "=X,X,X") > > (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))] > > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > > index 0c3b8f4b621..ee8898e3abb 100644 > > --- a/gcc/config/i386/i386.opt > > +++ b/gcc/config/i386/i386.opt > > @@ -1333,6 +1333,9 @@ Enum(apx_features) String(push2pop2) Value(apx_push2pop2) Set(3) > > EnumValue > > Enum(apx_features) String(ndd) Value(apx_ndd) Set(4) > > > > +EnumValue > > +Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) > > + > > EnumValue > > Enum(apx_features) String(all) Value(apx_all) Set(1) > > > > diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > > index 5f732d3e316..ffcb8fce71c 100644 > > --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > > +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile { target { ! ia32 } } } */ > > -/* { dg-options "-mapxf -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > +/* { dg-options "-mapx-features=egpr -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > > > extern void foo (void *) __attribute__ ((interrupt)); > > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-1.c b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c > > new file mode 100644 > > index 00000000000..e9a595381e3 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c > > @@ -0,0 +1,9 @@ > > +/* { dg-do compile { target { ! ia32 } } } */ > > +/* { dg-options "-O2 -mapx-features=egpr,push2pop2,ppx" } */ > > + > > +#include "apx-push2pop2-1.c" > > + > > +/* { dg-final { scan-assembler "pushp" } } */ > > +/* { dg-final { scan-assembler "popp" } } */ > > +/* { dg-final { scan-assembler "push2p" } } */ > > +/* { dg-final { scan-assembler "pop2p" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > > index 089941d3726..c53112758a5 100644 > > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile { target { ! ia32 } } } */ > > -/* { dg-options "-O2 -mapxf" } */ > > +/* { dg-options "-O2 -mapx-features=push2pop2" } */ > > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > > > extern int bar (int); > > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > > index 656ca91391a..022113bb1e2 100644 > > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile { target { ! ia32 } } } */ > > -/* { dg-options "-O2 -mapxf -mforce-drap" } */ > > +/* { dg-options "-O2 -mapx-features=push2pop2 -mforce-drap" } */ > > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > > > #include "apx-push2pop2-1.c" > > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > > index 747f7aaf191..a5b46893208 100644 > > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > > @@ -1,5 +1,5 @@ > > /* { dg-do compile { target { ! ia32 } } } */ > > -/* { dg-options "-O2 -mapxf -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > +/* { dg-options "-O2 -mapx-features=egpr,push2pop2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > > > extern void foo (void *) __attribute__ ((interrupt)); > > > > -- > > 2.31.1 > > > > > -- > BR, > Hongtao
On Tue, Nov 21, 2023 at 1:45 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote: > > > I think you just need to add an extra (unspec [(const_int 0)] > > UNSPEC_PPX) to origin push_di pattern, then no need for particular > > handling of dwarf and cfa stuffs. > > Ditto for popp and push2p and pop2p. > > Yes, such change also worked and no cfa adjustment required then, > thanks for the suggestion. > Updated patch with just 1 new UNSPEC and removed cfa handling. LGTM. > > Hongtao Liu <crazylht@gmail.com> 于2023年11月20日周一 14:46写道: > > > > On Fri, Nov 17, 2023 at 3:26 PM Hongyu Wang <hongyu.wang@intel.com> wrote: > > > > > > Intel APX PPX feature has been released in [1]. > > > > > > PPX stands for Push-Pop Acceleration. PUSH/PUSH2 and its corresponding POP > > > can be marked with a 1-bit hint to indicate that the POP reads the > > > value written by the PUSH from the stack. The processor tracks these marked > > > instructions internally and fast-forwards register data between > > > matching PUSH and POP instructions, without going through memory or > > > through the training loop of the Fast Store Forwarding Predictor (FSFP). > > > This feature can also be adopted to PUSH2/POP2. > > > > > > For GCC, we emit explicit suffix 'p' (paired) to indicate the push/pop > > > pair are marked with PPX hint. To separate form original push/pop, we > > > use UNSPEC to restrict the PPX related patterns. So for pushp/popp, the > > > cfi is manually adjusted for the UNSPEC PPX insns. > > > > > > In the first implementation we only emit them under prologue/epilogue > > > when saving/restoring callee-saved registers to make sure push/pop are > > > paired. So an extra flag was added to check if PPX insns can be emitted > > > for those register save/restore interfaces. > > > > > > The PPX hint is purely a performance hint. If the 'p' suffix is not > > > emitted for paired push/pop, the PPX optimization will be disabled, > > > while program sematic will not be affected at all. > > > > > > Bootstrapped/regtest on x86-64-pc-linux-gnu{-m32,}. > > > > > > Ok for master? > > > > > > [1].https://www.intel.com/content/www/us/en/developer/articles/technical/advanced-performance-extensions-apx.ht > > > > > > gcc/ChangeLog: > > > > > > * config/i386/i386-opts.h (enum apx_features): Add apx_ppx, add > > > it to apx_all. > > > * config/i386/i386.cc (ix86_emit_restore_reg_using_pop): Add > > > ppx_p parameter for function declaration. > > > (gen_push2): Add ppx_p parameter, emit push2p if ppx_p is true. > > > (ix86_emit_restore_reg_using_pop2): Likewise for pop2p. > > > (gen_pushp): New function to emit pushp and adjust cfi. > > > (ix86_emit_save_regs): Emit pushp/push2p under TARGET_APX_PPX. > > > (ix86_emit_restore_reg_using_pop): Add ppx_p, emit popp insn > > > and adjust cfi when ppx_p is ture. > > > (ix86_emit_restore_reg_using_pop2): Add ppx_p and parse to its > > > callee. > > > (ix86_emit_restore_regs_using_pop2): Likewise. > > > (ix86_expand_epilogue): Parse TARGET_APX_PPX to > > > ix86_emit_restore_reg_using_pop. > > > * config/i386/i386.h (TARGET_APX_PPX): New. > > > * config/i386/i386.md (UNSPEC_APXPUSHP): New unspec. > > > (UNSPEC_APXPOPP): Likewise. > > > (UNSPEC_APXPUSH2P): Likewise. > > > (UNSPEC_APXPOP2P_LOW): Likewise. > > > (UNSPEC_APXPOP2P_HIGH): Likewise. > > > (pushp_di): New define_insn. > > > (popp_di): Likewise. > > > (push2p_di): Likewise. > > > (pop2p_di): Likewise. > > > * config/i386/i386.opt: Add apx_ppx enum. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.target/i386/apx-interrupt-1.c: Adjust option to restrict them > > > under certain subfeatures. > > > * gcc.target/i386/apx-push2pop2-1.c: Likewise. > > > * gcc.target/i386/apx-push2pop2_force_drap-1.c: Likewise. > > > * gcc.target/i386/apx-push2pop2_interrupt-1.c: Likewise. > > > * gcc.target/i386/apx-ppx-1.c: New test. > > > --- > > > gcc/config/i386/i386-opts.h | 3 +- > > > gcc/config/i386/i386.cc | 113 ++++++++++++++---- > > > gcc/config/i386/i386.h | 1 + > > > gcc/config/i386/i386.md | 47 +++++++- > > > gcc/config/i386/i386.opt | 3 + > > > .../gcc.target/i386/apx-interrupt-1.c | 2 +- > > > gcc/testsuite/gcc.target/i386/apx-ppx-1.c | 9 ++ > > > .../gcc.target/i386/apx-push2pop2-1.c | 2 +- > > > .../i386/apx-push2pop2_force_drap-1.c | 2 +- > > > .../i386/apx-push2pop2_interrupt-1.c | 2 +- > > > 10 files changed, 158 insertions(+), 26 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-1.c > > > > > > diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h > > > index 2ec76a16bce..4d293edb399 100644 > > > --- a/gcc/config/i386/i386-opts.h > > > +++ b/gcc/config/i386/i386-opts.h > > > @@ -139,7 +139,8 @@ enum apx_features { > > > apx_egpr = 1 << 0, > > > apx_push2pop2 = 1 << 1, > > > apx_ndd = 1 << 2, > > > - apx_all = apx_egpr | apx_push2pop2 | apx_ndd, > > > + apx_ppx = 1 << 3, > > > + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx, > > > }; > > > > > > #endif > > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > > > index 683ac643bc8..df2fc236c0a 100644 > > > --- a/gcc/config/i386/i386.cc > > > +++ b/gcc/config/i386/i386.cc > > > @@ -105,7 +105,7 @@ along with GCC; see the file COPYING3. If not see > > > static rtx legitimize_dllimport_symbol (rtx, bool); > > > static rtx legitimize_pe_coff_extern_decl (rtx, bool); > > > static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); > > > -static void ix86_emit_restore_reg_using_pop (rtx); > > > +static void ix86_emit_restore_reg_using_pop (rtx, bool = false); > > > > > > > > > #ifndef CHECK_STACK_LIMIT > > > @@ -6512,7 +6512,7 @@ gen_popfl (void) > > > > > > /* Generate a "push2" pattern for input ARG. */ > > > rtx > > > -gen_push2 (rtx mem, rtx reg1, rtx reg2) > > > +gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false) > > > { > > > struct machine_function *m = cfun->machine; > > > const int offset = UNITS_PER_WORD * 2; > > > @@ -6527,9 +6527,49 @@ gen_push2 (rtx mem, rtx reg1, rtx reg2) > > > if (REG_P (reg2) && GET_MODE (reg2) != word_mode) > > > reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); > > > > > > - return gen_push2_di (mem, reg1, reg2); > > > + return ppx_p ? gen_push2p_di (mem, reg1, reg2): > > > + gen_push2_di (mem, reg1, reg2); > > > } > > > > > > +/* Generate a "pushp" insn for input ARG, and set reg notes. */ > > > +rtx_insn * > > > +gen_pushp (rtx arg) > > > +{ > > Can we just overload gen_push with an new arg bool ppx just like we > > did for push2/pop2. > > > + struct machine_function *m = cfun->machine; > > > + > > > + if (m->fs.cfa_reg == stack_pointer_rtx) > > > + m->fs.cfa_offset += UNITS_PER_WORD; > > > + m->fs.sp_offset += UNITS_PER_WORD; > > > + > > > + if (REG_P (arg) && GET_MODE (arg) != word_mode) > > > + arg = gen_rtx_REG (word_mode, REGNO (arg)); > > > + > > > + rtx pat = gen_rtx_MEM (word_mode, > > > + gen_rtx_PRE_DEC (Pmode, > > > + stack_pointer_rtx)); > > > + rtx_insn *insn = emit_insn (gen_pushp_di (pat, arg)); > > > + > > Why do we need .. > > > + rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); > > > + > > > + rtx dwarf_reg = gen_rtx_REG (word_mode, > > > + REGNO (arg)); > > > + rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, > > > + stack_pointer_rtx), > > > + dwarf_reg); > > > + RTX_FRAME_RELATED_P (tmp) = 1; > > > + XVECEXP (dwarf, 0, 1) = tmp; > > > + rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, > > > + plus_constant (Pmode, > > > + stack_pointer_rtx, > > > + -UNITS_PER_WORD)); > > > + RTX_FRAME_RELATED_P (sp_tmp) = 1; > > > + XVECEXP (dwarf, 0, 0) = sp_tmp; > > > + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); > > this. I assume pushp should be different from push only by ppx hint, not dwarf. > > > + > > > + return insn; > > > +} > > > + > > > + > > > /* Return >= 0 if there is an unused call-clobbered register available > > > for the entire function. */ > > > > > > @@ -7369,7 +7409,10 @@ ix86_emit_save_regs (void) > > > for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) > > > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) > > > { > > > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > > + if (TARGET_APX_PPX) > > > + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); > > > + else > > > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > > RTX_FRAME_RELATED_P (insn) = 1; > > > } > > > } > > > @@ -7399,7 +7442,8 @@ ix86_emit_save_regs (void) > > > gen_rtx_REG (word_mode, > > > regno_list[0]), > > > gen_rtx_REG (word_mode, > > > - regno_list[1]))); > > > + regno_list[1]), > > > + TARGET_APX_PPX)); > > > RTX_FRAME_RELATED_P (insn) = 1; > > > rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); > > > > > > @@ -7431,15 +7475,21 @@ ix86_emit_save_regs (void) > > > } > > > else > > > { > > > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > > + if (TARGET_APX_PPX) > > > + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); > > > + else > > > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); > > > RTX_FRAME_RELATED_P (insn) = 1; > > > aligned = true; > > > } > > > } > > > if (loaded_regnum == 1) > > > { > > > - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, > > > - regno_list[0]))); > > > + if (TARGET_APX_PPX) > > > + insn = gen_pushp (gen_rtx_REG (word_mode, regno_list[0])); > > > + else > > > + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, > > > + regno_list[0]))); > > > RTX_FRAME_RELATED_P (insn) = 1; > > > } > > > } > > > @@ -9268,15 +9318,30 @@ ix86_expand_prologue (void) > > > emit_insn (gen_prologue_use (stack_pointer_rtx)); > > > } > > > > > > -/* Emit code to restore REG using a POP insn. */ > > > +/* Emit code to restore REG using a POP or POPP insn. */ > > > > > > static void > > > -ix86_emit_restore_reg_using_pop (rtx reg) > > > +ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) > > > { > > > struct machine_function *m = cfun->machine; > > > - rtx_insn *insn = emit_insn (gen_pop (reg)); > > > + rtx_insn *insn; > > > > > > - ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); > > > + if (ppx_p) > > > + { > > > + rtx stack = gen_rtx_MEM (word_mode, > > > + gen_rtx_POST_INC (Pmode, > > > + stack_pointer_rtx)); > > > + insn = emit_insn (gen_popp_di (reg, stack)); > > Diddo for .. > > > + RTX_FRAME_RELATED_P (insn) = 1; > > > + rtx dwarf = NULL_RTX; > > > + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); > > > + REG_NOTES (insn) = dwarf; > > .. this, why it's different from pop regarding cfa? > > > + } > > > + else > > > + { > > > + insn = emit_insn (gen_pop (reg)); > > > + ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); > > > + } > > > m->fs.sp_offset -= UNITS_PER_WORD; > > > > > > if (m->fs.cfa_reg == crtl->drap_reg > > > @@ -9328,14 +9393,19 @@ ix86_emit_restore_reg_using_pop (rtx reg) > > > > > > /* Emit code to restore REG using a POP2 insn. */ > > > static void > > > -ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) > > > +ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) > > > { > > > struct machine_function *m = cfun->machine; > > > const int offset = UNITS_PER_WORD * 2; > > > + rtx_insn *insn; > > > > > > rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, > > > stack_pointer_rtx)); > > > - rtx_insn *insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); > > > + > > > + if (ppx_p) > > > + insn = emit_insn (gen_pop2p_di (reg1, mem, reg2)); > > > + else > > > + insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); > > > > > > RTX_FRAME_RELATED_P (insn) = 1; > > > > > > @@ -9397,13 +9467,13 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) > > > /* Emit code to restore saved registers using POP insns. */ > > > > > > static void > > > -ix86_emit_restore_regs_using_pop (void) > > > +ix86_emit_restore_regs_using_pop (bool ppx_p) > > > { > > > unsigned int regno; > > > > > > for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) > > > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) > > > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); > > > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p); > > > } > > > > > > /* Emit code to restore saved registers using POP2 insns. */ > > > @@ -9432,20 +9502,23 @@ ix86_emit_restore_regs_using_pop2 (void) > > > ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode, > > > regno_list[0]), > > > gen_rtx_REG (word_mode, > > > - regno_list[1])); > > > + regno_list[1]), > > > + TARGET_APX_PPX); > > > loaded_regnum = 0; > > > regno_list[0] = regno_list[1] = -1; > > > } > > > } > > > else > > > { > > > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); > > > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), > > > + TARGET_APX_PPX); > > > aligned = true; > > > } > > > } > > > > > > if (loaded_regnum == 1) > > > - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0])); > > > + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]), > > > + TARGET_APX_PPX); > > > } > > > > > > /* Emit code and notes for the LEAVE instruction. If insn is non-null, > > > @@ -9990,7 +10063,7 @@ ix86_expand_epilogue (int style) > > > if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL) > > > ix86_emit_restore_regs_using_pop2 (); > > > else > > > - ix86_emit_restore_regs_using_pop (); > > > + ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); > > > } > > > > > > /* If we used a stack pointer and haven't already got rid of it, > > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > > > index 037074758ea..227217a8712 100644 > > > --- a/gcc/config/i386/i386.h > > > +++ b/gcc/config/i386/i386.h > > > @@ -54,6 +54,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > > > #define TARGET_APX_EGPR (ix86_apx_features & apx_egpr) > > > #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2) > > > #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) > > > +#define TARGET_APX_PPX (ix86_apx_features & apx_ppx) > > > > > > #include "config/vxworks-dummy.h" > > > > > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > > index 1b5a794b9e5..8cce4c11e35 100644 > > > --- a/gcc/config/i386/i386.md > > > +++ b/gcc/config/i386/i386.md > > > @@ -210,10 +210,17 @@ (define_c_enum "unspec" [ > > > ;; For insn_callee_abi: > > > UNSPEC_CALLEE_ABI > > > > > > - ;; For PUSH2/POP2 support > > > + ;; For APX PUSH2/POP2 support > > > UNSPEC_APXPUSH2 > > > UNSPEC_APXPOP2_LOW > > > UNSPEC_APXPOP2_HIGH > > > + > > > + ;; For APX PPX support > > > + UNSPEC_APXPUSHP > > > + UNSPEC_APXPOPP > > > + UNSPEC_APXPUSH2P > > > + UNSPEC_APXPOP2P_LOW > > > + UNSPEC_APXPOP2P_HIGH > > > ]) > > > > > > (define_c_enum "unspecv" [ > > > @@ -3785,6 +3792,44 @@ (define_insn "pop2_di" > > > [(set_attr "mode" "TI") > > > (set_attr "prefix" "evex")]) > > > > > > +(define_insn "pushp_di" > > > + [(set (match_operand:DI 0 "push_operand" "=<") > > > + (unspec: DI [(match_operand:DI 1 "register_operand" "r")] > > > + UNSPEC_APXPUSHP))] > > > + "TARGET_64BIT" > > > + "pushp\t%1" > > > + [(set_attr "mode" "DI")]) > > I think you just need to add an extra (unspec [(const_int 0)] > > UNSPEC_PPX) to origin push_di pattern, then no need for particular > > handling of dwarf and cfa stuffs. > > Ditto for popp and push2p and pop2p. > > > + > > > +(define_insn "popp_di" > > > + [(set (match_operand:DI 0 "nonimmediate_operand" "=r") > > > + (unspec:DI [(match_operand:DI 1 "pop_operand" ">")] > > > + UNSPEC_APXPOPP))] > > > + "TARGET_APX_PPX" > > > + "popp\t%0" > > > + [(set_attr "mode" "DI")]) > > > + > > > +(define_insn "push2p_di" > > > + [(set (match_operand:TI 0 "push_operand" "=<") > > > + (unspec:TI [(match_operand:DI 1 "register_operand" "r") > > > + (match_operand:DI 2 "register_operand" "r")] > > > + UNSPEC_APXPUSH2P))] > > > + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" > > > + "push2p\t%1, %2" > > > + [(set_attr "mode" "TI") > > > + (set_attr "type" "multi") > > > + (set_attr "prefix" "evex")]) > > > + > > > +(define_insn "pop2p_di" > > > + [(parallel [(set (match_operand:DI 0 "register_operand" "=r") > > > + (unspec:DI [(match_operand:TI 1 "pop_operand" ">")] > > > + UNSPEC_APXPOP2P_LOW)) > > > + (set (match_operand:DI 2 "register_operand" "=r") > > > + (unspec:DI [(const_int 0)] UNSPEC_APXPOP2P_HIGH))])] > > > + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" > > > + "pop2p\t%0, %2" > > > + [(set_attr "mode" "TI") > > > + (set_attr "prefix" "evex")]) > > > + > > > (define_insn "*pushsf_rex64" > > > [(set (match_operand:SF 0 "push_operand" "=X,X,X") > > > (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))] > > > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > > > index 0c3b8f4b621..ee8898e3abb 100644 > > > --- a/gcc/config/i386/i386.opt > > > +++ b/gcc/config/i386/i386.opt > > > @@ -1333,6 +1333,9 @@ Enum(apx_features) String(push2pop2) Value(apx_push2pop2) Set(3) > > > EnumValue > > > Enum(apx_features) String(ndd) Value(apx_ndd) Set(4) > > > > > > +EnumValue > > > +Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) > > > + > > > EnumValue > > > Enum(apx_features) String(all) Value(apx_all) Set(1) > > > > > > diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > > > index 5f732d3e316..ffcb8fce71c 100644 > > > --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > > > +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c > > > @@ -1,5 +1,5 @@ > > > /* { dg-do compile { target { ! ia32 } } } */ > > > -/* { dg-options "-mapxf -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > > +/* { dg-options "-mapx-features=egpr -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > > > > > extern void foo (void *) __attribute__ ((interrupt)); > > > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-1.c b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c > > > new file mode 100644 > > > index 00000000000..e9a595381e3 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c > > > @@ -0,0 +1,9 @@ > > > +/* { dg-do compile { target { ! ia32 } } } */ > > > +/* { dg-options "-O2 -mapx-features=egpr,push2pop2,ppx" } */ > > > + > > > +#include "apx-push2pop2-1.c" > > > + > > > +/* { dg-final { scan-assembler "pushp" } } */ > > > +/* { dg-final { scan-assembler "popp" } } */ > > > +/* { dg-final { scan-assembler "push2p" } } */ > > > +/* { dg-final { scan-assembler "pop2p" } } */ > > > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > > > index 089941d3726..c53112758a5 100644 > > > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > > > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c > > > @@ -1,5 +1,5 @@ > > > /* { dg-do compile { target { ! ia32 } } } */ > > > -/* { dg-options "-O2 -mapxf" } */ > > > +/* { dg-options "-O2 -mapx-features=push2pop2" } */ > > > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > > > > > extern int bar (int); > > > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > > > index 656ca91391a..022113bb1e2 100644 > > > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > > > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c > > > @@ -1,5 +1,5 @@ > > > /* { dg-do compile { target { ! ia32 } } } */ > > > -/* { dg-options "-O2 -mapxf -mforce-drap" } */ > > > +/* { dg-options "-O2 -mapx-features=push2pop2 -mforce-drap" } */ > > > /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ > > > > > > #include "apx-push2pop2-1.c" > > > diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > > > index 747f7aaf191..a5b46893208 100644 > > > --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > > > +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c > > > @@ -1,5 +1,5 @@ > > > /* { dg-do compile { target { ! ia32 } } } */ > > > -/* { dg-options "-O2 -mapxf -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > > +/* { dg-options "-O2 -mapx-features=egpr,push2pop2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ > > > > > > extern void foo (void *) __attribute__ ((interrupt)); > > > > > > -- > > > 2.31.1 > > > > > > > > > -- > > BR, > > Hongtao
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index 2ec76a16bce..4d293edb399 100644 --- a/gcc/config/i386/i386-opts.h +++ b/gcc/config/i386/i386-opts.h @@ -139,7 +139,8 @@ enum apx_features { apx_egpr = 1 << 0, apx_push2pop2 = 1 << 1, apx_ndd = 1 << 2, - apx_all = apx_egpr | apx_push2pop2 | apx_ndd, + apx_ppx = 1 << 3, + apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx, }; #endif diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 683ac643bc8..df2fc236c0a 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -105,7 +105,7 @@ along with GCC; see the file COPYING3. If not see static rtx legitimize_dllimport_symbol (rtx, bool); static rtx legitimize_pe_coff_extern_decl (rtx, bool); static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); -static void ix86_emit_restore_reg_using_pop (rtx); +static void ix86_emit_restore_reg_using_pop (rtx, bool = false); #ifndef CHECK_STACK_LIMIT @@ -6512,7 +6512,7 @@ gen_popfl (void) /* Generate a "push2" pattern for input ARG. */ rtx -gen_push2 (rtx mem, rtx reg1, rtx reg2) +gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false) { struct machine_function *m = cfun->machine; const int offset = UNITS_PER_WORD * 2; @@ -6527,9 +6527,49 @@ gen_push2 (rtx mem, rtx reg1, rtx reg2) if (REG_P (reg2) && GET_MODE (reg2) != word_mode) reg2 = gen_rtx_REG (word_mode, REGNO (reg2)); - return gen_push2_di (mem, reg1, reg2); + return ppx_p ? gen_push2p_di (mem, reg1, reg2): + gen_push2_di (mem, reg1, reg2); } +/* Generate a "pushp" insn for input ARG, and set reg notes. */ +rtx_insn * +gen_pushp (rtx arg) +{ + struct machine_function *m = cfun->machine; + + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_offset += UNITS_PER_WORD; + m->fs.sp_offset += UNITS_PER_WORD; + + if (REG_P (arg) && GET_MODE (arg) != word_mode) + arg = gen_rtx_REG (word_mode, REGNO (arg)); + + rtx pat = gen_rtx_MEM (word_mode, + gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)); + rtx_insn *insn = emit_insn (gen_pushp_di (pat, arg)); + + rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); + + rtx dwarf_reg = gen_rtx_REG (word_mode, + REGNO (arg)); + rtx tmp = gen_rtx_SET (gen_frame_mem (DImode, + stack_pointer_rtx), + dwarf_reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 1) = tmp; + rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -UNITS_PER_WORD)); + RTX_FRAME_RELATED_P (sp_tmp) = 1; + XVECEXP (dwarf, 0, 0) = sp_tmp; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + + return insn; +} + + /* Return >= 0 if there is an unused call-clobbered register available for the entire function. */ @@ -7369,7 +7409,10 @@ ix86_emit_save_regs (void) for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) { - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); + if (TARGET_APX_PPX) + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); + else + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); RTX_FRAME_RELATED_P (insn) = 1; } } @@ -7399,7 +7442,8 @@ ix86_emit_save_regs (void) gen_rtx_REG (word_mode, regno_list[0]), gen_rtx_REG (word_mode, - regno_list[1]))); + regno_list[1]), + TARGET_APX_PPX)); RTX_FRAME_RELATED_P (insn) = 1; rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); @@ -7431,15 +7475,21 @@ ix86_emit_save_regs (void) } else { - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); + if (TARGET_APX_PPX) + insn = gen_pushp (gen_rtx_REG (word_mode, regno)); + else + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); RTX_FRAME_RELATED_P (insn) = 1; aligned = true; } } if (loaded_regnum == 1) { - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, - regno_list[0]))); + if (TARGET_APX_PPX) + insn = gen_pushp (gen_rtx_REG (word_mode, regno_list[0])); + else + insn = emit_insn (gen_push (gen_rtx_REG (word_mode, + regno_list[0]))); RTX_FRAME_RELATED_P (insn) = 1; } } @@ -9268,15 +9318,30 @@ ix86_expand_prologue (void) emit_insn (gen_prologue_use (stack_pointer_rtx)); } -/* Emit code to restore REG using a POP insn. */ +/* Emit code to restore REG using a POP or POPP insn. */ static void -ix86_emit_restore_reg_using_pop (rtx reg) +ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p) { struct machine_function *m = cfun->machine; - rtx_insn *insn = emit_insn (gen_pop (reg)); + rtx_insn *insn; - ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); + if (ppx_p) + { + rtx stack = gen_rtx_MEM (word_mode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx)); + insn = emit_insn (gen_popp_di (reg, stack)); + RTX_FRAME_RELATED_P (insn) = 1; + rtx dwarf = NULL_RTX; + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + REG_NOTES (insn) = dwarf; + } + else + { + insn = emit_insn (gen_pop (reg)); + ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); + } m->fs.sp_offset -= UNITS_PER_WORD; if (m->fs.cfa_reg == crtl->drap_reg @@ -9328,14 +9393,19 @@ ix86_emit_restore_reg_using_pop (rtx reg) /* Emit code to restore REG using a POP2 insn. */ static void -ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) +ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false) { struct machine_function *m = cfun->machine; const int offset = UNITS_PER_WORD * 2; + rtx_insn *insn; rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); - rtx_insn *insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); + + if (ppx_p) + insn = emit_insn (gen_pop2p_di (reg1, mem, reg2)); + else + insn = emit_insn (gen_pop2_di (reg1, mem, reg2)); RTX_FRAME_RELATED_P (insn) = 1; @@ -9397,13 +9467,13 @@ ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2) /* Emit code to restore saved registers using POP insns. */ static void -ix86_emit_restore_regs_using_pop (void) +ix86_emit_restore_regs_using_pop (bool ppx_p) { unsigned int regno; for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p); } /* Emit code to restore saved registers using POP2 insns. */ @@ -9432,20 +9502,23 @@ ix86_emit_restore_regs_using_pop2 (void) ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode, regno_list[0]), gen_rtx_REG (word_mode, - regno_list[1])); + regno_list[1]), + TARGET_APX_PPX); loaded_regnum = 0; regno_list[0] = regno_list[1] = -1; } } else { - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), + TARGET_APX_PPX); aligned = true; } } if (loaded_regnum == 1) - ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0])); + ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]), + TARGET_APX_PPX); } /* Emit code and notes for the LEAVE instruction. If insn is non-null, @@ -9990,7 +10063,7 @@ ix86_expand_epilogue (int style) if (TARGET_APX_PUSH2POP2 && m->func_type == TYPE_NORMAL) ix86_emit_restore_regs_using_pop2 (); else - ix86_emit_restore_regs_using_pop (); + ix86_emit_restore_regs_using_pop (TARGET_APX_PPX); } /* If we used a stack pointer and haven't already got rid of it, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 037074758ea..227217a8712 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -54,6 +54,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_APX_EGPR (ix86_apx_features & apx_egpr) #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2) #define TARGET_APX_NDD (ix86_apx_features & apx_ndd) +#define TARGET_APX_PPX (ix86_apx_features & apx_ppx) #include "config/vxworks-dummy.h" diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1b5a794b9e5..8cce4c11e35 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -210,10 +210,17 @@ (define_c_enum "unspec" [ ;; For insn_callee_abi: UNSPEC_CALLEE_ABI - ;; For PUSH2/POP2 support + ;; For APX PUSH2/POP2 support UNSPEC_APXPUSH2 UNSPEC_APXPOP2_LOW UNSPEC_APXPOP2_HIGH + + ;; For APX PPX support + UNSPEC_APXPUSHP + UNSPEC_APXPOPP + UNSPEC_APXPUSH2P + UNSPEC_APXPOP2P_LOW + UNSPEC_APXPOP2P_HIGH ]) (define_c_enum "unspecv" [ @@ -3785,6 +3792,44 @@ (define_insn "pop2_di" [(set_attr "mode" "TI") (set_attr "prefix" "evex")]) +(define_insn "pushp_di" + [(set (match_operand:DI 0 "push_operand" "=<") + (unspec: DI [(match_operand:DI 1 "register_operand" "r")] + UNSPEC_APXPUSHP))] + "TARGET_64BIT" + "pushp\t%1" + [(set_attr "mode" "DI")]) + +(define_insn "popp_di" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r") + (unspec:DI [(match_operand:DI 1 "pop_operand" ">")] + UNSPEC_APXPOPP))] + "TARGET_APX_PPX" + "popp\t%0" + [(set_attr "mode" "DI")]) + +(define_insn "push2p_di" + [(set (match_operand:TI 0 "push_operand" "=<") + (unspec:TI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_APXPUSH2P))] + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" + "push2p\t%1, %2" + [(set_attr "mode" "TI") + (set_attr "type" "multi") + (set_attr "prefix" "evex")]) + +(define_insn "pop2p_di" + [(parallel [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:TI 1 "pop_operand" ">")] + UNSPEC_APXPOP2P_LOW)) + (set (match_operand:DI 2 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_APXPOP2P_HIGH))])] + "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" + "pop2p\t%0, %2" + [(set_attr "mode" "TI") + (set_attr "prefix" "evex")]) + (define_insn "*pushsf_rex64" [(set (match_operand:SF 0 "push_operand" "=X,X,X") (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))] diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 0c3b8f4b621..ee8898e3abb 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1333,6 +1333,9 @@ Enum(apx_features) String(push2pop2) Value(apx_push2pop2) Set(3) EnumValue Enum(apx_features) String(ndd) Value(apx_ndd) Set(4) +EnumValue +Enum(apx_features) String(ppx) Value(apx_ppx) Set(5) + EnumValue Enum(apx_features) String(all) Value(apx_all) Set(1) diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c index 5f732d3e316..ffcb8fce71c 100644 --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-mapxf -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ +/* { dg-options "-mapx-features=egpr -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ extern void foo (void *) __attribute__ ((interrupt)); diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-1.c b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c new file mode 100644 index 00000000000..e9a595381e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mapx-features=egpr,push2pop2,ppx" } */ + +#include "apx-push2pop2-1.c" + +/* { dg-final { scan-assembler "pushp" } } */ +/* { dg-final { scan-assembler "popp" } } */ +/* { dg-final { scan-assembler "push2p" } } */ +/* { dg-final { scan-assembler "pop2p" } } */ diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c index 089941d3726..c53112758a5 100644 --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mapxf" } */ +/* { dg-options "-O2 -mapx-features=push2pop2" } */ /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ extern int bar (int); diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c index 656ca91391a..022113bb1e2 100644 --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mapxf -mforce-drap" } */ +/* { dg-options "-O2 -mapx-features=push2pop2 -mforce-drap" } */ /* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ #include "apx-push2pop2-1.c" diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c index 747f7aaf191..a5b46893208 100644 --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_interrupt-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mapxf -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ +/* { dg-options "-O2 -mapx-features=egpr,push2pop2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ extern void foo (void *) __attribute__ ((interrupt));