diff mbox series

[APX,ZU] Support APX zero-upper

Message ID DM4PR11MB5487EF70AFE491DD40B364DDECFA2@DM4PR11MB5487.namprd11.prod.outlook.com
State New
Headers show
Series [APX,ZU] Support APX zero-upper | expand

Commit Message

Kong, Lingling June 6, 2024, 8:48 a.m. UTC
Enable ZU for IMUL (opcodes 0x69 and 0x6B) and SETcc.

gcc/ChangeLog:

        * config/i386/i386-opts.h (enum apx_features):Add apx_zu.
        * config/i386/i386.h (TARGET_APX_ZU): Define.
        * config/i386/i386.md (*imulhi<mode>zu): New define_insn.
        (*setcc_<mode>_zu): Ditto.
        * config/i386/i386.opt: Add enum value for zu.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/apx-zu-1.c: New test.
        * gcc.target/i386/apx-zu-2.c: Ditto.

Bootstrapped & regtested on x86-64-pc-linux-gnu with binutils 2.42 branch.
OK for trunk?

---
 gcc/config/i386/i386-opts.h              |  3 +-
 gcc/config/i386/i386.h                   |  1 +
 gcc/config/i386/i386.md                  | 25 ++++++++++++++--
 gcc/config/i386/i386.opt                 |  3 ++
 gcc/testsuite/gcc.target/i386/apx-zu-1.c | 38 ++++++++++++++++++++++++  gcc/testsuite/gcc.target/i386/apx-zu-2.c | 19 ++++++++++++
 6 files changed, 86 insertions(+), 3 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/apx-zu-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-zu-2.c

--
2.31.1

Comments

Hongtao Liu June 13, 2024, 12:50 a.m. UTC | #1
On Thu, Jun 6, 2024 at 4:49 PM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Enable ZU for IMUL (opcodes 0x69 and 0x6B) and SETcc.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-opts.h (enum apx_features):Add apx_zu.
>         * config/i386/i386.h (TARGET_APX_ZU): Define.
>         * config/i386/i386.md (*imulhi<mode>zu): New define_insn.
>         (*setcc_<mode>_zu): Ditto.
>         * config/i386/i386.opt: Add enum value for zu.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/apx-zu-1.c: New test.
>         * gcc.target/i386/apx-zu-2.c: Ditto.
>
> Bootstrapped & regtested on x86-64-pc-linux-gnu with binutils 2.42 branch.
> OK for trunk?
Ok.
>
> ---
>  gcc/config/i386/i386-opts.h              |  3 +-
>  gcc/config/i386/i386.h                   |  1 +
>  gcc/config/i386/i386.md                  | 25 ++++++++++++++--
>  gcc/config/i386/i386.opt                 |  3 ++
>  gcc/testsuite/gcc.target/i386/apx-zu-1.c | 38 ++++++++++++++++++++++++  gcc/testsuite/gcc.target/i386/apx-zu-2.c | 19 ++++++++++++
>  6 files changed, 86 insertions(+), 3 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/apx-zu-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/apx-zu-2.c
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index 5fcc4927978..c7ec0d9fd39 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -142,8 +142,9 @@ enum apx_features {
>    apx_ppx = 1 << 3,
>    apx_nf = 1 << 4,
>    apx_ccmp = 1 << 5,
> +  apx_zu = 1 << 6,
>    apx_all = apx_egpr | apx_push2pop2 | apx_ndd
> -           | apx_ppx | apx_nf | apx_ccmp,
> +           | apx_ppx | apx_nf | apx_ccmp | apx_zu,
>  };
>
>  #endif
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7051c6c13e4..dc1a1f44320 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -57,6 +57,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)  #define TARGET_APX_NF (ix86_apx_features & apx_nf)
> #define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
> +#define TARGET_APX_ZU (ix86_apx_features & apx_zu)
>
>  #include "config/vxworks-dummy.h"
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ffcf63e1cba..a2765f65754 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -9967,6 +9967,19 @@
>             (const_string "direct")))
>     (set_attr "mode" "<MODE>")])
>
> +(define_insn "*imulhi<mode>zu"
> +  [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
> +       (zero_extend:SWI48x
> +         (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
> +                  (match_operand:HI 2 "immediate_operand" "K,n"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_APX_ZU"
> +  "@
> +   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
> +   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
> +  [(set_attr "type" "imul")
> +   (set_attr "mode" "HI")])
> +
>  (define_insn "*mulsi3_1_zext"
>    [(set (match_operand:DI 0 "register_operand" "=r,r,r")
>         (zero_extend:DI
> @@ -18354,11 +18367,19 @@
>  ;; For all sCOND expanders, also expand the compare or test insn that  ;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
>
> +(define_insn "*setcc_<mode>_zu"
> +  [(set (match_operand:SWI248 0 "register_operand" "=r")
> +       (match_operator:SWI248 1 "ix86_comparison_operator"
> +         [(reg FLAGS_REG) (const_int 0)]))]
> +  "TARGET_APX_ZU"
> +  "setzu%C1\t%b0"
> +  [(set_attr "type" "setcc")])
> +
>  (define_insn_and_split "*setcc_di_1"
>    [(set (match_operand:DI 0 "register_operand" "=q")
>         (match_operator:DI 1 "ix86_comparison_operator"
>           [(reg FLAGS_REG) (const_int 0)]))]
> -  "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
> +  "!TARGET_APX_ZU && TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
>    "#"
>    "&& reload_completed"
>    [(set (match_dup 2) (match_dup 1))
> @@ -18391,7 +18412,7 @@
>    [(set (match_operand:SWI24 0 "register_operand" "=q")
>         (match_operator:SWI24 1 "ix86_comparison_operator"
>           [(reg FLAGS_REG) (const_int 0)]))]
> -  "!TARGET_PARTIAL_REG_STALL
> +  "!TARGET_APX_ZU && !TARGET_PARTIAL_REG_STALL
>     && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
>    "#"
>    "&& reload_completed"
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 7017cc87cec..353fffb2343 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -1342,6 +1342,9 @@ Enum(apx_features) String(nf) Value(apx_nf) Set(6)  EnumValue
>  Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
>
> +EnumValue
> +Enum(apx_features) String(zu) Value(apx_zu) Set(8)
> +
>  EnumValue
>  Enum(apx_features) String(all) Value(apx_all) Set(1)
>
> diff --git a/gcc/testsuite/gcc.target/i386/apx-zu-1.c b/gcc/testsuite/gcc.target/i386/apx-zu-1.c
> new file mode 100644
> index 00000000000..927a87673a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-zu-1.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +/* { dg-final { scan-assembler-not "setle"} } */
> +/* { dg-final { scan-assembler-not "setge"} } */
> +/* { dg-final { scan-assembler-not "sete"} } */
> +/* { dg-final { scan-assembler-not "xor"} } */
> +/* { dg-final { scan-assembler-times "setzune" 1} } */
> +/* { dg-final { scan-assembler-times "setzule" 1} } */
> +/* { dg-final { scan-assembler-times "setzue" 1} } */
> +/* { dg-final { scan-assembler-times "setzuge" 1} } */
> +/* { dg-final { scan-assembler "imulzu"} } */
> +long long foo0 (int a) {
> +  return a == 0 ? 0 : 1;
> +}
> +
> +long foo1 (int a, int b)
> +{
> +  return a > b ? 0 : 1;
> +}
> +
> +int foo2 (int a, int b)
> +{
> +  return a != b ? 0 : 1;
> +}
> +
> +short foo3 (int a, int b)
> +{
> +  return a < b ? 0 : 1;
> +}
> +
> +unsigned long
> +f1(unsigned short x)
> +{
> +  unsigned short a;
> +  a = x * 1000;
> +  return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/apx-zu-2.c b/gcc/testsuite/gcc.target/i386/apx-zu-2.c
> new file mode 100644
> index 00000000000..3ee04495d98
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-zu-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-require-effective-target apxf } */
> +/* { dg-options "-mapxf -march=x86-64 -O2" } */
> +#include "apx-zu-1.c"
> +
> +int main(void)
> +{
> +  if (foo0 (0))
> +    __builtin_abort ();
> +  if (foo1 (3, 2))
> +    __builtin_abort ();
> +  if (foo2 (3, 2))
> +    __builtin_abort ();
> +  if (foo3 (2, 3))
> +    __builtin_abort ();
> +  if (f1 (2) != 2000)
> +    __builtin_abort ();
> +  return 0;
> +}
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index 5fcc4927978..c7ec0d9fd39 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -142,8 +142,9 @@  enum apx_features {
   apx_ppx = 1 << 3,
   apx_nf = 1 << 4,
   apx_ccmp = 1 << 5,
+  apx_zu = 1 << 6,
   apx_all = apx_egpr | apx_push2pop2 | apx_ndd
-	    | apx_ppx | apx_nf | apx_ccmp,
+	    | apx_ppx | apx_nf | apx_ccmp | apx_zu,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7051c6c13e4..dc1a1f44320 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -57,6 +57,7 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see  #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)  #define TARGET_APX_NF (ix86_apx_features & apx_nf) 
#define TARGET_APX_CCMP (ix86_apx_features & apx_ccmp)
+#define TARGET_APX_ZU (ix86_apx_features & apx_zu)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ffcf63e1cba..a2765f65754 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9967,6 +9967,19 @@ 
 	    (const_string "direct")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*imulhi<mode>zu"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
+	(zero_extend:SWI48x
+	  (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
+		   (match_operand:HI 2 "immediate_operand" "K,n"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_APX_ZU"
+  "@
+   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
+   imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "HI")])
+
 (define_insn "*mulsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
@@ -18354,11 +18367,19 @@ 
 ;; For all sCOND expanders, also expand the compare or test insn that  ;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
 
+(define_insn "*setcc_<mode>_zu"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(match_operator:SWI248 1 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))]
+  "TARGET_APX_ZU"
+  "setzu%C1\t%b0"
+  [(set_attr "type" "setcc")])
+
 (define_insn_and_split "*setcc_di_1"
   [(set (match_operand:DI 0 "register_operand" "=q")
 	(match_operator:DI 1 "ix86_comparison_operator"
 	  [(reg FLAGS_REG) (const_int 0)]))]
-  "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
+  "!TARGET_APX_ZU && TARGET_64BIT && !TARGET_PARTIAL_REG_STALL"
   "#"
   "&& reload_completed"
   [(set (match_dup 2) (match_dup 1))
@@ -18391,7 +18412,7 @@ 
   [(set (match_operand:SWI24 0 "register_operand" "=q")
 	(match_operator:SWI24 1 "ix86_comparison_operator"
 	  [(reg FLAGS_REG) (const_int 0)]))]
-  "!TARGET_PARTIAL_REG_STALL
+  "!TARGET_APX_ZU && !TARGET_PARTIAL_REG_STALL
    && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
   "#"
   "&& reload_completed"
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 7017cc87cec..353fffb2343 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1342,6 +1342,9 @@  Enum(apx_features) String(nf) Value(apx_nf) Set(6)  EnumValue
 Enum(apx_features) String(ccmp) Value(apx_ccmp) Set(7)
 
+EnumValue
+Enum(apx_features) String(zu) Value(apx_zu) Set(8)
+
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
 
diff --git a/gcc/testsuite/gcc.target/i386/apx-zu-1.c b/gcc/testsuite/gcc.target/i386/apx-zu-1.c
new file mode 100644
index 00000000000..927a87673a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-zu-1.c
@@ -0,0 +1,38 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapxf -march=x86-64 -O2" } */
+/* { dg-final { scan-assembler-not "setle"} } */
+/* { dg-final { scan-assembler-not "setge"} } */
+/* { dg-final { scan-assembler-not "sete"} } */
+/* { dg-final { scan-assembler-not "xor"} } */
+/* { dg-final { scan-assembler-times "setzune" 1} } */
+/* { dg-final { scan-assembler-times "setzule" 1} } */
+/* { dg-final { scan-assembler-times "setzue" 1} } */
+/* { dg-final { scan-assembler-times "setzuge" 1} } */
+/* { dg-final { scan-assembler "imulzu"} } */
+long long foo0 (int a) {
+  return a == 0 ? 0 : 1;
+}
+
+long foo1 (int a, int b)
+{
+  return a > b ? 0 : 1;
+}
+
+int foo2 (int a, int b)
+{
+  return a != b ? 0 : 1;
+}
+
+short foo3 (int a, int b)
+{
+  return a < b ? 0 : 1;
+}
+
+unsigned long
+f1(unsigned short x)
+{
+  unsigned short a;
+  a = x * 1000;
+  return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/apx-zu-2.c b/gcc/testsuite/gcc.target/i386/apx-zu-2.c
new file mode 100644
index 00000000000..3ee04495d98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-zu-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target apxf } */
+/* { dg-options "-mapxf -march=x86-64 -O2" } */ 
+#include "apx-zu-1.c"
+
+int main(void)
+{
+  if (foo0 (0))
+    __builtin_abort ();
+  if (foo1 (3, 2))
+    __builtin_abort ();
+  if (foo2 (3, 2))
+    __builtin_abort ();
+  if (foo3 (2, 3))
+    __builtin_abort ();
+  if (f1 (2) != 2000)
+    __builtin_abort ();
+  return 0;
+}