diff mbox series

[2/7] AVX512FP16: Add expander for fmahf4

Message ID 20210923054640.1406227-3-hongtao.liu@intel.com
State New
Headers show
Series AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes | expand

Commit Message

liuhongt Sept. 23, 2021, 5:46 a.m. UTC
gcc/ChangeLog:

	* config/i386/sse.md (FMAMODEM): extend to handle FP16.
	(VFH_SF_AVX512VL): Extend to handle HFmode.
	(VF_SF_AVX512VL): Deleted.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-fma-1.c: New test.
	* gcc.target/i386/avx512fp16vl-fma-1.c: New test.
	* gcc.target/i386/avx512fp16vl-fma-vectorize-1.c: New test.
---
 gcc/config/i386/sse.md                        | 11 +--
 .../gcc.target/i386/avx512fp16-fma-1.c        | 69 ++++++++++++++++++
 .../gcc.target/i386/avx512fp16vl-fma-1.c      | 70 +++++++++++++++++++
 .../i386/avx512fp16vl-fma-vectorize-1.c       | 45 ++++++++++++
 4 files changed, 190 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c
diff mbox series

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9079613e829..1ca95984afc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4650,7 +4650,11 @@  (define_mode_iterator FMAMODEM
    (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V16SF "TARGET_AVX512F")
-   (V8DF "TARGET_AVX512F")])
+   (V8DF "TARGET_AVX512F")
+   (HF "TARGET_AVX512FP16")
+   (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V32HF "TARGET_AVX512FP16")])
 
 (define_expand "fma<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -4758,14 +4762,11 @@  (define_insn "*fma_fmadd_<mode>"
    (set_attr "mode" "<MODE>")])
 
 ;; Suppose AVX-512F as baseline
-(define_mode_iterator VF_SF_AVX512VL
-  [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
-
 (define_mode_iterator VFH_SF_AVX512VL
   [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (HF "TARGET_AVX512FP16")
    SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c
new file mode 100644
index 00000000000..d78d7629838
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c
@@ -0,0 +1,69 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16" } */
+
+typedef _Float16 v32hf __attribute__ ((__vector_size__ (64)));
+
+_Float16
+foo1 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+_Float16
+foo2 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+_Float16
+foo3 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+_Float16
+foo4 (_Float16 a, _Float16 b, _Float16 c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v32hf
+foo5 (v32hf a, v32hf b, v32hf c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
+v32hf
+foo6 (v32hf a, v32hf b, v32hf c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
+v32hf
+foo7 (v32hf a, v32hf b, v32hf c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
+v32hf
+foo8 (v32hf a, v32hf b, v32hf c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c
new file mode 100644
index 00000000000..1a832f37d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c
@@ -0,0 +1,70 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */
+
+typedef _Float16 v8hf __attribute__ ((__vector_size__ (16)));
+typedef _Float16 v16hf __attribute__ ((__vector_size__ (32)));
+
+v8hf
+foo1 (v8hf a, v8hf b, v8hf c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v8hf
+foo2 (v8hf a, v8hf b, v8hf c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v8hf
+foo3 (v8hf a, v8hf b, v8hf c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v8hf
+foo4 (v8hf a, v8hf b, v8hf c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+v16hf
+foo5 (v16hf a, v16hf b, v16hf c)
+{
+  return a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+v16hf
+foo6 (v16hf a, v16hf b, v16hf c)
+{
+  return -a * b + c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+v16hf
+foo7 (v16hf a, v16hf b, v16hf c)
+{
+  return a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
+v16hf
+foo8 (v16hf a, v16hf b, v16hf c)
+{
+  return -a * b - c;
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c
new file mode 100644
index 00000000000..d0b8bec34f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c
@@ -0,0 +1,45 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */
+
+typedef _Float16 v8hf __attribute__ ((__vector_size__ (16)));
+typedef _Float16 v16hf __attribute__ ((__vector_size__ (32)));
+
+void
+foo1 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+  for (int i = 0; i != 8; i++)
+    pd[i] = pa[i] * pb[i] + pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+void
+foo2 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+    for (int i = 0; i != 8; i++)
+    pd[i] = -pa[i] * pb[i] + pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+void
+foo3 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+  for (int i = 0; i != 8; i++)
+    pd[i] = pa[i] * pb[i] - pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */
+
+void
+foo4 (_Float16* __restrict pa, _Float16* __restrict pb,
+      _Float16* __restrict pc, _Float16* __restrict pd)
+{
+  for (int i = 0; i != 8; i++)
+    pd[i] = -pa[i] * pb[i] - pc[i];
+}
+
+/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */