diff mbox series

[v2,1/8,APX,NF] : Support APX NF add

Message ID BN9PR11MB5483892E10834975E862CB8AECEB2@BN9PR11MB5483.namprd11.prod.outlook.com
State New
Headers show
Series [v2,1/8,APX,NF] : Support APX NF add | expand

Commit Message

Kong, Lingling May 22, 2024, 8:29 a.m. UTC
> I wonder if we can use "define_subst" to conditionally add flags clobber
> for !TARGET_APX_NF targets. Even the example for "Define Subst" uses the insn
> w/ and w/o the clobber, so I think it is worth considering this approach.
> 
> Uros.

Good Suggestion, I defined new subst for no flags, and Bootstrapped and regtested on x86_64-linux-gnu. Also supported SPEC 2017 run normally on Intel software development emulator.
Ok for trunk?

Thanks,
Lingling

Subject: [PATCH v2 1/8] [APX NF]: Support APX NF add
APX NF(no flags) feature implements suppresses the update of status flags
for arithmetic operations.

For NF add, it is not clear whether nf add can be faster than lea. If so,
the pattern needs to be adjusted to perfer lea generation.

gcc/ChangeLog:

	* config/i386/i386-opts.h (enum apx_features): Add nf
	enumeration.
	* config/i386/i386.h (TARGET_APX_NF): New.
	* config/i386/i386.md (nf_subst): New define_subst.
	(nf_name): New subst_attr.
	(nf_prefix): Ditto.
	(nf_condition): Ditto.
	(nf_mem_constraint): Ditto.
	(nf_applied): Ditto.
	(*add<mode>_1_nf): New define_insn.
	(addhi_1_nf): Ditto.
	(addqi_1_nf): Ditto.
	* config/i386/i386.opt: Add apx_nf enumeration.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-ndd.c: Fixed test.
	* gcc.target/i386/apx-nf.c: New test.

Co-authored-by: Lingling Kong <lingling.kong@intel.com>
---
 gcc/config/i386/i386-opts.h             |   3 +-
 gcc/config/i386/i386.h                  |   1 +
 gcc/config/i386/i386.md                 | 179 +++++++++++++++---------
 gcc/config/i386/i386.opt                |   3 +
 gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
 gcc/testsuite/gcc.target/i386/apx-nf.c  |   6 +
 6 files changed, 126 insertions(+), 68 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c

Comments

Uros Bizjak May 22, 2024, 8:35 a.m. UTC | #1
On Wed, May 22, 2024 at 10:29 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> > I wonder if we can use "define_subst" to conditionally add flags clobber
> > for !TARGET_APX_NF targets. Even the example for "Define Subst" uses the insn
> > w/ and w/o the clobber, so I think it is worth considering this approach.
> >
> > Uros.
>
> Good Suggestion, I defined new subst for no flags, and Bootstrapped and regtested on x86_64-linux-gnu. Also supported SPEC 2017 run normally on Intel software development emulator.
> Ok for trunk?
>
> Thanks,
> Lingling
>
> Subject: [PATCH v2 1/8] [APX NF]: Support APX NF add
> APX NF(no flags) feature implements suppresses the update of status flags
> for arithmetic operations.
>
> For NF add, it is not clear whether nf add can be faster than lea. If so,
> the pattern needs to be adjusted to perfer lea generation.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-opts.h (enum apx_features): Add nf
>         enumeration.
>         * config/i386/i386.h (TARGET_APX_NF): New.
>         * config/i386/i386.md (nf_subst): New define_subst.
>         (nf_name): New subst_attr.
>         (nf_prefix): Ditto.
>         (nf_condition): Ditto.
>         (nf_mem_constraint): Ditto.
>         (nf_applied): Ditto.
>         (*add<mode>_1_nf): New define_insn.
>         (addhi_1_nf): Ditto.
>         (addqi_1_nf): Ditto.
>         * config/i386/i386.opt: Add apx_nf enumeration.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ndd.c: Fixed test.
>         * gcc.target/i386/apx-nf.c: New test.

LGTM, but I'll leave the final approval to Hongtao.

Thanks,
Uros.

>
> Co-authored-by: Lingling Kong <lingling.kong@intel.com>
> ---
>  gcc/config/i386/i386-opts.h             |   3 +-
>  gcc/config/i386/i386.h                  |   1 +
>  gcc/config/i386/i386.md                 | 179 +++++++++++++++---------
>  gcc/config/i386/i386.opt                |   3 +
>  gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
>  gcc/testsuite/gcc.target/i386/apx-nf.c  |   6 +
>  6 files changed, 126 insertions(+), 68 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-nf.c
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index ef2825803b3..60176ce609f 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -140,7 +140,8 @@ enum apx_features {
>    apx_push2pop2 = 1 << 1,
>    apx_ndd = 1 << 2,
>    apx_ppx = 1 << 3,
> -  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
> +  apx_nf = 1<< 4,
> +  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
>  };
>
>  #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 529edff93a4..f20ae4726da 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>  #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
>  #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
>  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
> +#define TARGET_APX_NF (ix86_apx_features & apx_nf)
>
>  #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 764bfe20ff2..bae344518bd 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6233,28 +6233,6 @@
>      }
>  })
>
>
> -;; Load effective address instructions
> -
> -(define_insn "*lea<mode>"
> -  [(set (match_operand:SWI48 0 "register_operand" "=r")
> -       (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
> -  "ix86_hardreg_mov_ok (operands[0], operands[1])"
> -{
> -  if (SImode_address_operand (operands[1], VOIDmode))
> -    {
> -      gcc_assert (TARGET_64BIT);
> -      return "lea{l}\t{%E1, %k0|%k0, %E1}";
> -    }
> -  else
> -    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
> -}
> -  [(set_attr "type" "lea")
> -   (set (attr "mode")
> -     (if_then_else
> -       (match_operand 1 "SImode_address_operand")
> -       (const_string "SI")
> -       (const_string "<MODE>")))])
> -
>  (define_peephole2
>    [(set (match_operand:SWI48 0 "register_operand")
>         (match_operand:SWI48 1 "address_no_seg_operand"))]
> @@ -6290,6 +6268,13 @@
>    [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
>                (clobber (reg:CC FLAGS_REG))])]
>    "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
> +
> +(define_split
> +  [(set (match_operand:SWI48 0 "general_reg_operand")
> +       (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
> +  "TARGET_APX_NF && reload_completed"
> +  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
> +  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
>
>
>  ;; Add instructions
>
> @@ -6437,48 +6422,65 @@
>               (clobber (reg:CC FLAGS_REG))])]
>   "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
>
> -(define_insn "*add<mode>_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
> +(define_subst_attr "nf_name" "nf_subst" "_nf" "")
> +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
> +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
> +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
> +(define_subst_attr "nf_applied" "nf_subst" "true" "false")
> +
> +(define_subst "nf_subst"
> +  [(set (match_operand:SWI 0)
> +        (match_operand:SWI 1))]
> +  ""
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +       (clobber (reg:CC FLAGS_REG))])
> +
> +(define_insn "*add<mode>_1<nf_name>"
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
>         (plus:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
> -         (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
> +         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
> +         (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
> +  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                     : "inc{<imodesuffix>}\t%0";
> +        return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                     : "<nf_prefix>inc{<imodesuffix>}\t%0";
>        else
>          {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                       : "dec{<imodesuffix>}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                       : "<nf_prefix>dec{<imodesuffix>}\t%0";
>         }
>
>      default:
>        /* For most processors, ADD is faster than LEA.  This alternative
>          was added to use ADD as much as possible.  */
> -      if (which_alternative == 2)
> +      if (which_alternative == 3)
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
> -        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                     : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
> +        return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> +                     : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                   : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> +                   : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
> +  [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "3")
> +     (cond [(eq_attr "alternative" "4")
>                (const_string "lea")
>             (match_operand:SWI48 2 "incdec_operand")
>               (const_string "incdec")
> @@ -6491,6 +6493,28 @@
>         (const_string "*")))
>     (set_attr "mode" "<MODE>")])
>
> +;; Load effective address instructions
> +
> +(define_insn "*lea<mode>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r")
> +       (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
> +  "ix86_hardreg_mov_ok (operands[0], operands[1])"
> +{
> +  if (SImode_address_operand (operands[1], VOIDmode))
> +    {
> +      gcc_assert (TARGET_64BIT);
> +      return "lea{l}\t{%E1, %k0|%k0, %E1}";
> +    }
> +  else
> +    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
> +}
> +  [(set_attr "type" "lea")
> +   (set (attr "mode")
> +     (if_then_else
> +       (match_operand 1 "SImode_address_operand")
> +       (const_string "SI")
> +       (const_string "<MODE>")))])
> +
>  ;; It may seem that nonimmediate operand is proper one for operand 1.
>  ;; The addsi_1 pattern allows nonimmediate operand at that place and
>  ;; we take care in ix86_binary_operator_ok to not allow two memory
> @@ -6552,26 +6576,29 @@
>         (const_string "*")))
>     (set_attr "mode" "SI")])
>
> -(define_insn "*addhi_1"
> +(define_insn "*addhi_1<nf_name>"
>    [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
>         (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
> -                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
> +                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -       return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
> +       return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
>         }
>
>      default:
> @@ -6581,11 +6608,11 @@
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], HImode))
> -       return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
> -                      : "sub{w}\t{%2, %0|%0, %2}";
> +       return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
> +                      : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
> -                    : "add{w}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
> +                    : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
> @@ -6603,33 +6630,36 @@
>         (const_string "*")))
>     (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
>
> -(define_insn "*addqi_1"
> +(define_insn "*addqi_1<nf_name>"
>    [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
>         (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
> -                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
> +                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool widen = (get_attr_mode (insn) != MODE_QI);
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
>         if (use_ndd)
> -         return "inc{b}\t{%1, %0|%0, %1}";
> +         return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
>         else
> -         return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
> +         return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
>           if (use_ndd)
> -           return "dec{b}\t{%1, %0|%0, %1}";
> +           return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
>           else
> -           return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
> +           return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
>         }
>
>      default:
> @@ -6641,16 +6671,16 @@
>        if (x86_maybe_negate_const_int (&operands[2], QImode))
>         {
>           if (use_ndd)
> -           return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
> +           return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
>           else
> -           return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
> -                        : "sub{b}\t{%2, %0|%0, %2}";
> +           return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
> +                        : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
>         }
>        if (use_ndd)
> -       return "add{b}\t{%2, %1, %0|%0, %1, %2}";
> +       return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
>        else
> -       return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
> -                    : "add{b}\t{%2, %0|%0, %2}";
> +       return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
> +                    : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
> @@ -6824,6 +6854,23 @@
>      }
>  })
>
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (plus:SWI (match_operand:SWI 1 "register_operand")
> +                 (match_operand:SWI 2 "<nonmemory_operand>")))]
> +  "TARGET_APX_NF && reload_completed
> +   && ix86_lea_for_add_ok (insn, operands)"
> +  [(set (match_dup 0)
> +       (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
> +{
> +  if (<MODE>mode != <LEAMODE>mode)
> +    {
> +      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
> +      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
> +      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
> +    }
> +})
> +
>  ;; Convert add to the lea pattern to avoid flags dependency.
>  (define_split
>    [(set (match_operand:DI 0 "register_operand")
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index d5f793a9e8b..66021d59d4e 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1356,6 +1356,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
>  EnumValue
>  Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
>
> +EnumValue
> +Enum(apx_features) String(nf) Value(apx_nf) Set(6)
> +
>  EnumValue
>  Enum(apx_features) String(all) Value(apx_all) Set(1)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index 0eb751ad225..0ff4df0780c 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
>  /* { dg-final { scan-assembler-not "movl"} } */
>
>  #include <stdint.h>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
> new file mode 100644
> index 00000000000..3adc7a27902
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
> +/* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
> +
> +#include "apx-ndd.c"
> +
> --
> 2.31.1
>
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index ef2825803b3..60176ce609f 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,7 +140,8 @@  enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
+  apx_nf = 1<< 4,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 529edff93a4..f20ae4726da 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,7 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
 #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
 #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
+#define TARGET_APX_NF (ix86_apx_features & apx_nf)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 764bfe20ff2..bae344518bd 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6233,28 +6233,6 @@ 
     }
 })
 

-;; Load effective address instructions
-
-(define_insn "*lea<mode>"
-  [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
-  "ix86_hardreg_mov_ok (operands[0], operands[1])"
-{
-  if (SImode_address_operand (operands[1], VOIDmode))
-    {
-      gcc_assert (TARGET_64BIT);
-      return "lea{l}\t{%E1, %k0|%k0, %E1}";
-    }
-  else
-    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
-}
-  [(set_attr "type" "lea")
-   (set (attr "mode")
-     (if_then_else
-       (match_operand 1 "SImode_address_operand")
-       (const_string "SI")
-       (const_string "<MODE>")))])
-
 (define_peephole2
   [(set (match_operand:SWI48 0 "register_operand")
 	(match_operand:SWI48 1 "address_no_seg_operand"))]
@@ -6290,6 +6268,13 @@ 
   [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
               (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_split
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
+  "TARGET_APX_NF && reload_completed"
+  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
+  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
 

 ;; Add instructions
 
@@ -6437,48 +6422,65 @@ 
 	      (clobber (reg:CC FLAGS_REG))])]
  "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
 
-(define_insn "*add<mode>_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
+(define_subst_attr "nf_name" "nf_subst" "_nf" "")
+(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
+(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
+(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
+(define_subst_attr "nf_applied" "nf_subst" "true" "false")
+
+(define_subst "nf_subst"
+  [(set (match_operand:SWI 0)
+        (match_operand:SWI 1))]
+  ""
+  [(set (match_dup 0)
+	(match_dup 1))
+	(clobber (reg:CC FLAGS_REG))])
+
+(define_insn "*add<mode>_1<nf_name>"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
 	(plus:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
-	  (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
+	  (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
-		      : "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+		      : "<nf_prefix>inc{<imodesuffix>}\t%0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-	  return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
-			: "dec{<imodesuffix>}\t%0";
+	  return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+			: "<nf_prefix>dec{<imodesuffix>}\t%0";
 	}
 
     default:
       /* For most processors, ADD is faster than LEA.  This alternative
 	 was added to use ADD as much as possible.  */
-      if (which_alternative == 2)
+      if (which_alternative == 3)
         std::swap (operands[1], operands[2]);
         
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		      : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		      : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		    : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		    : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
+  [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
    (set (attr "type")
-     (cond [(eq_attr "alternative" "3")
+     (cond [(eq_attr "alternative" "4")
               (const_string "lea")
 	    (match_operand:SWI48 2 "incdec_operand")
 	      (const_string "incdec")
@@ -6491,6 +6493,28 @@ 
 	(const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+;; Load effective address instructions
+
+(define_insn "*lea<mode>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(match_operand:SWI48 1 "address_no_seg_operand" "Ts"))]
+  "ix86_hardreg_mov_ok (operands[0], operands[1])"
+{
+  if (SImode_address_operand (operands[1], VOIDmode))
+    {
+      gcc_assert (TARGET_64BIT);
+      return "lea{l}\t{%E1, %k0|%k0, %E1}";
+    }
+  else
+    return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}";
+}
+  [(set_attr "type" "lea")
+   (set (attr "mode")
+     (if_then_else
+       (match_operand 1 "SImode_address_operand")
+       (const_string "SI")
+       (const_string "<MODE>")))])
+
 ;; It may seem that nonimmediate operand is proper one for operand 1.
 ;; The addsi_1 pattern allows nonimmediate operand at that place and
 ;; we take care in ix86_binary_operator_ok to not allow two memory
@@ -6552,26 +6576,29 @@ 
 	(const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*addhi_1"
+(define_insn "*addhi_1<nf_name>"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
 	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
-		 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
+		 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
+  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-	return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
+	return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
       else
 	{
 	  gcc_assert (operands[2] == constm1_rtx);
-	  return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
+	  return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
 	}
 
     default:
@@ -6581,11 +6608,11 @@ 
         std::swap (operands[1], operands[2]);
 
       if (x86_maybe_negate_const_int (&operands[2], HImode))
-	return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
-		       : "sub{w}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
 
-      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
-		     : "add{w}\t{%2, %0|%0, %2}";
+      return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
+		     : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
@@ -6603,33 +6630,36 @@ 
 	(const_string "*")))
    (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
 
-(define_insn "*addqi_1"
+(define_insn "*addqi_1<nf_name>"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
 	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
-		 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
+		 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool widen = (get_attr_mode (insn) != MODE_QI);
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
 	if (use_ndd)
-	  return "inc{b}\t{%1, %0|%0, %1}";
+	  return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
 	else
-	  return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+	  return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
       else
 	{
 	  gcc_assert (operands[2] == constm1_rtx);
 	  if (use_ndd)
-	    return "dec{b}\t{%1, %0|%0, %1}";
+	    return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
 	  else
-	    return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	    return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
 	}
 
     default:
@@ -6641,16 +6671,16 @@ 
       if (x86_maybe_negate_const_int (&operands[2], QImode))
 	{
 	  if (use_ndd)
-	    return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
+	    return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
 	  else
-	    return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
-			 : "sub{b}\t{%2, %0|%0, %2}";
+	    return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
+			 : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
 	}
       if (use_ndd)
-	return "add{b}\t{%2, %1, %0|%0, %1, %2}";
+	return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
       else
-	return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
-		     : "add{b}\t{%2, %0|%0, %2}";
+	return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
+		     : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
@@ -6824,6 +6854,23 @@ 
     }
 })
 
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(plus:SWI (match_operand:SWI 1 "register_operand")
+		  (match_operand:SWI 2 "<nonmemory_operand>")))]
+  "TARGET_APX_NF && reload_completed
+   && ix86_lea_for_add_ok (insn, operands)"
+  [(set (match_dup 0)
+	(plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
+    }
+})
+
 ;; Convert add to the lea pattern to avoid flags dependency.
 (define_split
   [(set (match_operand:DI 0 "register_operand")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index d5f793a9e8b..66021d59d4e 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1356,6 +1356,9 @@  Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
 EnumValue
 Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
 
+EnumValue
+Enum(apx_features) String(nf) Value(apx_nf) Set(6)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 0eb751ad225..0ff4df0780c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-mapxf -march=x86-64 -O2" } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
 /* { dg-final { scan-assembler-not "movl"} } */
 
 #include <stdint.h>
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c b/gcc/testsuite/gcc.target/i386/apx-nf.c
new file mode 100644
index 00000000000..3adc7a27902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-nf.c
@@ -0,0 +1,6 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx,nf -march=x86-64 -O2" } */
+/* { dg-final { scan-assembler-times "\{nf\} add" 4 } } */
+
+#include "apx-ndd.c"
+