diff mbox series

[v3,1/8,APX,NF] : Support APX NF add

Message ID DM4PR11MB5487CBCE904AF22EDDE6DC53ECF22@DM4PR11MB5487.namprd11.prod.outlook.com
State New
Headers show
Series [v3,1/8,APX,NF] : Support APX NF add | expand

Commit Message

Kong, Lingling May 29, 2024, 5:10 a.m. UTC
Hi, compared with v2, these patches restored the original lea patten position and addressed hongtao's comment. 

APX NF(no flags) feature implements suppresses the update of status flags
for arithmetic operations.

For NF add, it is not clear whether nf add can be faster than lea. If so,
the pattern needs to be adjusted to perfer lea generation.

gcc/ChangeLog:

        * config/i386/i386-opts.h (enum apx_features): Add nf
        enumeration.
        * config/i386/i386.h (TARGET_APX_NF): New.
        * config/i386/i386.md (*add<mode>_1_nf): New define_insn.
        * config/i386/i386.opt: Add apx_nf enumeration.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-ndd.c: Fixed test.

Co-authored-by: Lingling Kong <lingling.kong@intel.com>
---
 gcc/config/i386/i386-opts.h             |   3 +-
 gcc/config/i386/i386.h                  |   1 +
 gcc/config/i386/i386.md                 | 135 ++++++++++++++++--------
 gcc/config/i386/i386.opt                |   3 +
 gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
 5 files changed, 98 insertions(+), 46 deletions(-)

Comments

Hongtao Liu June 3, 2024, 1:08 a.m. UTC | #1
On Wed, May 29, 2024 at 1:11 PM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Hi, compared with v2, these patches restored the original lea patten position and addressed hongtao's comment.
>
> APX NF(no flags) feature implements suppresses the update of status flags
> for arithmetic operations.
Ok for the patch and the remaining.

[PATCH v3 1/8] [APX NF]: Support APX NF add   Kong, Lingling
[PATCH v3 2/8] [APX NF] Support APX NF for {sub/and/or/xor/neg}   Kong, Lingling
[PATCH v3 3/8] [APX NF] Support APX NF for left shift insns   Kong, Lingling
[PATCH v3 4/8] [APX NF] Support APX NF for right shift insns   Kong, Lingling
[PATCH v3 5/8] [APX NF] Support APX NF for rotate insns   Kong, Lingling
[PATCH v3 6/8] [APX NF] Support APX NF for shld/shrd   Kong, Lingling
[PATCH v3 7/8] [APX NF] Support APX NF for mul/div   Kong, Lingling
[PATCH v3 8/8] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt   Kong, Lingling

>
> For NF add, it is not clear whether nf add can be faster than lea. If so,
> the pattern needs to be adjusted to perfer lea generation.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-opts.h (enum apx_features): Add nf
>         enumeration.
>         * config/i386/i386.h (TARGET_APX_NF): New.
>         * config/i386/i386.md (*add<mode>_1_nf): New define_insn.
>         * config/i386/i386.opt: Add apx_nf enumeration.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-ndd.c: Fixed test.
>
> Co-authored-by: Lingling Kong <lingling.kong@intel.com>
> ---
>  gcc/config/i386/i386-opts.h             |   3 +-
>  gcc/config/i386/i386.h                  |   1 +
>  gcc/config/i386/i386.md                 | 135 ++++++++++++++++--------
>  gcc/config/i386/i386.opt                |   3 +
>  gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
>  5 files changed, 98 insertions(+), 46 deletions(-)
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index ef2825803b3..60176ce609f 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -140,7 +140,8 @@ enum apx_features {
>    apx_push2pop2 = 1 << 1,
>    apx_ndd = 1 << 2,
>    apx_ppx = 1 << 3,
> -  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
> +  apx_nf = 1<< 4,
> +  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
>  };
>
>  #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 359a8408263..969391d3013 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>  #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
>  #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
>  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
> +#define TARGET_APX_NF (ix86_apx_features & apx_nf)
>
>  #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index e8073f5a200..1eeadaddeba 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -6290,6 +6290,13 @@
>    [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
>                (clobber (reg:CC FLAGS_REG))])]
>    "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
> +
> +(define_split
> +  [(set (match_operand:SWI48 0 "general_reg_operand")
> +       (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
> +  "TARGET_APX_NF && reload_completed"
> +  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
> +  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
>
>
>  ;; Add instructions
>
> @@ -6437,48 +6444,65 @@
>               (clobber (reg:CC FLAGS_REG))])]
>   "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
>
> -(define_insn "*add<mode>_1"
> -  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
> +(define_subst_attr "nf_name" "nf_subst" "_nf" "")
> +(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
> +(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
> +(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
> +(define_subst_attr "nf_applied" "nf_subst" "true" "false")
> +
> +(define_subst "nf_subst"
> +  [(set (match_operand:SWI 0)
> +        (match_operand:SWI 1))]
> +  ""
> +  [(set (match_dup 0)
> +       (match_dup 1))
> +       (clobber (reg:CC FLAGS_REG))])
> +
> +(define_insn "*add<mode>_1<nf_name>"
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
>         (plus:SWI48
> -         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
> -         (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
> +         (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
> +         (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
> +  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                     : "inc{<imodesuffix>}\t%0";
> +        return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                     : "<nf_prefix>inc{<imodesuffix>}\t%0";
>        else
>          {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> -                       : "dec{<imodesuffix>}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
> +                       : "<nf_prefix>dec{<imodesuffix>}\t%0";
>         }
>
>      default:
>        /* For most processors, ADD is faster than LEA.  This alternative
>          was added to use ADD as much as possible.  */
> -      if (which_alternative == 2)
> +      if (which_alternative == 3)
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
> -        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                     : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
> +        return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> +                     : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> -                   : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
> +                   : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
> +  [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "3")
> +     (cond [(eq_attr "alternative" "4")
>                (const_string "lea")
>             (match_operand:SWI48 2 "incdec_operand")
>               (const_string "incdec")
> @@ -6552,26 +6576,29 @@
>         (const_string "*")))
>     (set_attr "mode" "SI")])
>
> -(define_insn "*addhi_1"
> +(define_insn "*addhi_1<nf_name>"
>    [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
>         (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
> -                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
> +                (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
> -       return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
> +       return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
> -         return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
> +         return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
>         }
>
>      default:
> @@ -6581,11 +6608,11 @@
>          std::swap (operands[1], operands[2]);
>
>        if (x86_maybe_negate_const_int (&operands[2], HImode))
> -       return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
> -                      : "sub{w}\t{%2, %0|%0, %2}";
> +       return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
> +                      : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
>
> -      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
> -                    : "add{w}\t{%2, %0|%0, %2}";
> +      return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
> +                    : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
> @@ -6603,33 +6630,36 @@
>         (const_string "*")))
>     (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
>
> -(define_insn "*addqi_1"
> +(define_insn "*addqi_1<nf_name>"
>    [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
>         (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
> -                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
> -   (clobber (reg:CC FLAGS_REG))]
> -  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
> +                (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
> +  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
> +  && <nf_condition>"
>  {
>    bool widen = (get_attr_mode (insn) != MODE_QI);
>    bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
>    switch (get_attr_type (insn))
>      {
>      case TYPE_LEA:
> -      return "#";
> +      if (TARGET_APX_NDD && <nf_applied>)
> +       return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
> +      else
> +       return "#";
>
>      case TYPE_INCDEC:
>        if (operands[2] == const1_rtx)
>         if (use_ndd)
> -         return "inc{b}\t{%1, %0|%0, %1}";
> +         return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
>         else
> -         return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
> +         return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
>        else
>         {
>           gcc_assert (operands[2] == constm1_rtx);
>           if (use_ndd)
> -           return "dec{b}\t{%1, %0|%0, %1}";
> +           return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
>           else
> -           return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
> +           return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
>         }
>
>      default:
> @@ -6641,16 +6671,16 @@
>        if (x86_maybe_negate_const_int (&operands[2], QImode))
>         {
>           if (use_ndd)
> -           return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
> +           return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
>           else
> -           return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
> -                        : "sub{b}\t{%2, %0|%0, %2}";
> +           return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
> +                        : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
>         }
>        if (use_ndd)
> -       return "add{b}\t{%2, %1, %0|%0, %1, %2}";
> +       return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
>        else
> -       return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
> -                    : "add{b}\t{%2, %0|%0, %2}";
> +       return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
> +                    : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
>      }
>  }
>    [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
> @@ -6824,6 +6854,23 @@
>      }
>  })
>
> +(define_split
> +  [(set (match_operand:SWI 0 "register_operand")
> +       (plus:SWI (match_operand:SWI 1 "register_operand")
> +                 (match_operand:SWI 2 "<nonmemory_operand>")))]
> +  "TARGET_APX_NF && reload_completed
> +   && ix86_lea_for_add_ok (insn, operands)"
> +  [(set (match_dup 0)
> +       (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
> +{
> +  if (<MODE>mode != <LEAMODE>mode)
> +    {
> +      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
> +      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
> +      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
> +    }
> +})
> +
>  ;; Convert add to the lea pattern to avoid flags dependency.
>  (define_split
>    [(set (match_operand:DI 0 "register_operand")
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 7151fb1b147..b6f28a2b4bd 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1336,6 +1336,9 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
>  EnumValue
>  Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
>
> +EnumValue
> +Enum(apx_features) String(nf) Value(apx_nf) Set(6)
> +
>  EnumValue
>  Enum(apx_features) String(all) Value(apx_all) Set(1)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> index 0eb751ad225..0ff4df0780c 100644
> --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
> +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile { target { ! ia32 } } } */
> -/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
>  /* { dg-final { scan-assembler-not "movl"} } */
>
>  #include <stdint.h>
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index ef2825803b3..60176ce609f 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,7 +140,8 @@  enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
+  apx_nf = 1<< 4,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 359a8408263..969391d3013 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,7 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
 #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
 #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
+#define TARGET_APX_NF (ix86_apx_features & apx_nf)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e8073f5a200..1eeadaddeba 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6290,6 +6290,13 @@ 
   [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
               (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_split
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
+  "TARGET_APX_NF && reload_completed"
+  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
+  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
 

 ;; Add instructions
 
@@ -6437,48 +6444,65 @@ 
 	      (clobber (reg:CC FLAGS_REG))])]
  "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[5]);")
 
-(define_insn "*add<mode>_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
+(define_subst_attr "nf_name" "nf_subst" "_nf" "")
+(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
+(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
+(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
+(define_subst_attr "nf_applied" "nf_subst" "true" "false")
+
+(define_subst "nf_subst"
+  [(set (match_operand:SWI 0)
+        (match_operand:SWI 1))]
+  ""
+  [(set (match_dup 0)
+	(match_dup 1))
+	(clobber (reg:CC FLAGS_REG))])
+
+(define_insn "*add<mode>_1<nf_name>"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r<nf_mem_constraint>,r,r,r,r,r,r")
 	(plus:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
-	  (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
+	  (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
+	  (match_operand:SWI48 2 "x86_64_general_operand" "r,e,BM,0,le,r,e,BM")))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-        return use_ndd ? "inc{<imodesuffix>}\t{%1, %0|%0, %1}"
-		      : "inc{<imodesuffix>}\t%0";
+        return use_ndd ? "<nf_prefix>inc{<imodesuffix>}\t{%1, %0|%0, %1}"
+		      : "<nf_prefix>inc{<imodesuffix>}\t%0";
       else
         {
 	  gcc_assert (operands[2] == constm1_rtx);
-	  return use_ndd ? "dec{<imodesuffix>}\t{%1, %0|%0, %1}"
-			: "dec{<imodesuffix>}\t%0";
+	  return use_ndd ? "<nf_prefix>dec{<imodesuffix>}\t{%1, %0|%0, %1}"
+			: "<nf_prefix>dec{<imodesuffix>}\t%0";
 	}
 
     default:
       /* For most processors, ADD is faster than LEA.  This alternative
 	 was added to use ADD as much as possible.  */
-      if (which_alternative == 2)
+      if (which_alternative == 3)
         std::swap (operands[1], operands[2]);
         
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
-        return use_ndd ? "sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		      : "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+        return use_ndd ? "<nf_prefix>sub{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		      : "<nf_prefix>sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return use_ndd ? "add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
-		    : "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      return use_ndd ? "<nf_prefix>add{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		    : "<nf_prefix>add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
+  [(set_attr "isa" "*,*,*,*,*,apx_ndd,apx_ndd,apx_ndd")
    (set (attr "type")
-     (cond [(eq_attr "alternative" "3")
+     (cond [(eq_attr "alternative" "4")
               (const_string "lea")
 	    (match_operand:SWI48 2 "incdec_operand")
 	      (const_string "incdec")
@@ -6552,26 +6576,29 @@ 
 	(const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*addhi_1"
+(define_insn "*addhi_1<nf_name>"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r")
 	(plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r")
-		 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)"
+		 (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m")))]
+  "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{w}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
-	return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0";
+	return use_ndd ? "<nf_prefix>inc{w}\t{%1, %0|%0, %1}" : "<nf_prefix>inc{w}\t%0";
       else
 	{
 	  gcc_assert (operands[2] == constm1_rtx);
-	  return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0";
+	  return use_ndd ? "<nf_prefix>dec{w}\t{%1, %0|%0, %1}" : "<nf_prefix>dec{w}\t%0";
 	}
 
     default:
@@ -6581,11 +6608,11 @@ 
         std::swap (operands[1], operands[2]);
 
       if (x86_maybe_negate_const_int (&operands[2], HImode))
-	return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}"
-		       : "sub{w}\t{%2, %0|%0, %2}";
+	return use_ndd ? "<nf_prefix>sub{w}\t{%2, %1, %0|%0, %1, %2}"
+		       : "<nf_prefix>sub{w}\t{%2, %0|%0, %2}";
 
-      return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}"
-		     : "add{w}\t{%2, %0|%0, %2}";
+      return use_ndd ? "<nf_prefix>add{w}\t{%2, %1, %0|%0, %1, %2}"
+		     : "<nf_prefix>add{w}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
@@ -6603,33 +6630,36 @@ 
 	(const_string "*")))
    (set_attr "mode" "HI,HI,HI,SI,HI,HI")])
 
-(define_insn "*addqi_1"
+(define_insn "*addqi_1<nf_name>"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r")
 	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r")
-		 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)"
+		 (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m")))]
+  "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)
+  && <nf_condition>"
 {
   bool widen = (get_attr_mode (insn) != MODE_QI);
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
-      return "#";
+      if (TARGET_APX_NDD && <nf_applied>)
+	return "%{nf%} add{b}\t{%2, %1, %0|%0, %1, %2}";
+      else
+	return "#";
 
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
 	if (use_ndd)
-	  return "inc{b}\t{%1, %0|%0, %1}";
+	  return "<nf_prefix>inc{b}\t{%1, %0|%0, %1}";
 	else
-	  return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+	  return widen ? "<nf_prefix>inc{l}\t%k0" : "<nf_prefix>inc{b}\t%0";
       else
 	{
 	  gcc_assert (operands[2] == constm1_rtx);
 	  if (use_ndd)
-	    return "dec{b}\t{%1, %0|%0, %1}";
+	    return "<nf_prefix>dec{b}\t{%1, %0|%0, %1}";
 	  else
-	    return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+	    return widen ? "<nf_prefix>dec{l}\t%k0" : "<nf_prefix>dec{b}\t%0";
 	}
 
     default:
@@ -6641,16 +6671,16 @@ 
       if (x86_maybe_negate_const_int (&operands[2], QImode))
 	{
 	  if (use_ndd)
-	    return "sub{b}\t{%2, %1, %0|%0, %1, %2}";
+	    return "<nf_prefix>sub{b}\t{%2, %1, %0|%0, %1, %2}";
 	  else
-	    return widen ? "sub{l}\t{%2, %k0|%k0, %2}"
-			 : "sub{b}\t{%2, %0|%0, %2}";
+	    return widen ? "<nf_prefix>sub{l}\t{%2, %k0|%k0, %2}"
+			 : "<nf_prefix>sub{b}\t{%2, %0|%0, %2}";
 	}
       if (use_ndd)
-	return "add{b}\t{%2, %1, %0|%0, %1, %2}";
+	return "<nf_prefix>add{b}\t{%2, %1, %0|%0, %1, %2}";
       else
-	return widen ? "add{l}\t{%k2, %k0|%k0, %k2}"
-		     : "add{b}\t{%2, %0|%0, %2}";
+	return widen ? "<nf_prefix>add{l}\t{%k2, %k0|%k0, %k2}"
+		     : "<nf_prefix>add{b}\t{%2, %0|%0, %2}";
     }
 }
   [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd")
@@ -6824,6 +6854,23 @@ 
     }
 })
 
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(plus:SWI (match_operand:SWI 1 "register_operand")
+		  (match_operand:SWI 2 "<nonmemory_operand>")))]
+  "TARGET_APX_NF && reload_completed
+   && ix86_lea_for_add_ok (insn, operands)"
+  [(set (match_dup 0)
+	(plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
+    }
+})
+
 ;; Convert add to the lea pattern to avoid flags dependency.
 (define_split
   [(set (match_operand:DI 0 "register_operand")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 7151fb1b147..b6f28a2b4bd 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1336,6 +1336,9 @@  Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
 EnumValue
 Enum(apx_features) String(ppx) Value(apx_ppx) Set(5)
 
+EnumValue
+Enum(apx_features) String(nf) Value(apx_nf) Set(6)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 0eb751ad225..0ff4df0780c 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-mapxf -march=x86-64 -O2" } */
+/* { dg-options "-mapx-features=egpr,push2pop2,ndd,ppx -march=x86-64 -O2" } */
 /* { dg-final { scan-assembler-not "movl"} } */
 
 #include <stdint.h>