diff mbox series

[28/62] AVX512FP16: Add vcvtuw2ph/vcvtw2ph/vcvtdq2ph/vcvtudq2ph/vcvtqq2ph/vcvtuqq2ph

Message ID 20210701061648.9447-29-hongtao.liu@intel.com
State New
Headers show
Series Support all AVX512FP16 intrinsics. | expand

Commit Message

liuhongt July 1, 2021, 6:16 a.m. UTC
gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h (_mm512_cvtepi32_ph): New
	intrinsic.
	(_mm512_mask_cvtepi32_ph): Likewise.
	(_mm512_maskz_cvtepi32_ph): Likewise.
	(_mm512_cvt_roundepi32_ph): Likewise.
	(_mm512_mask_cvt_roundepi32_ph): Likewise.
	(_mm512_maskz_cvt_roundepi32_ph): Likewise.
	(_mm512_cvtepu32_ph): Likewise.
	(_mm512_mask_cvtepu32_ph): Likewise.
	(_mm512_maskz_cvtepu32_ph): Likewise.
	(_mm512_cvt_roundepu32_ph): Likewise.
	(_mm512_mask_cvt_roundepu32_ph): Likewise.
	(_mm512_maskz_cvt_roundepu32_ph): Likewise.
	(_mm512_cvtepi64_ph): Likewise.
	(_mm512_mask_cvtepi64_ph): Likewise.
	(_mm512_maskz_cvtepi64_ph): Likewise.
	(_mm512_cvt_roundepi64_ph): Likewise.
	(_mm512_mask_cvt_roundepi64_ph): Likewise.
	(_mm512_maskz_cvt_roundepi64_ph): Likewise.
	(_mm512_cvtepu64_ph): Likewise.
	(_mm512_mask_cvtepu64_ph): Likewise.
	(_mm512_maskz_cvtepu64_ph): Likewise.
	(_mm512_cvt_roundepu64_ph): Likewise.
	(_mm512_mask_cvt_roundepu64_ph): Likewise.
	(_mm512_maskz_cvt_roundepu64_ph): Likewise.
	(_mm512_cvtepi16_ph): Likewise.
	(_mm512_mask_cvtepi16_ph): Likewise.
	(_mm512_maskz_cvtepi16_ph): Likewise.
	(_mm512_cvt_roundepi16_ph): Likewise.
	(_mm512_mask_cvt_roundepi16_ph): Likewise.
	(_mm512_maskz_cvt_roundepi16_ph): Likewise.
	(_mm512_cvtepu16_ph): Likewise.
	(_mm512_mask_cvtepu16_ph): Likewise.
	(_mm512_maskz_cvtepu16_ph): Likewise.
	(_mm512_cvt_roundepu16_ph): Likewise.
	(_mm512_mask_cvt_roundepu16_ph): Likewise.
	(_mm512_maskz_cvt_roundepu16_ph): Likewise.
	* config/i386/avx512fp16vlintrin.h (_mm_cvtepi32_ph): New
	intrinsic.
	(_mm_mask_cvtepi32_ph): Likewise.
	(_mm_maskz_cvtepi32_ph): Likewise.
	(_mm256_cvtepi32_ph): Likewise.
	(_mm256_mask_cvtepi32_ph): Likewise.
	(_mm256_maskz_cvtepi32_ph): Likewise.
	(_mm_cvtepu32_ph): Likewise.
	(_mm_mask_cvtepu32_ph): Likewise.
	(_mm_maskz_cvtepu32_ph): Likewise.
	(_mm256_cvtepu32_ph): Likewise.
	(_mm256_mask_cvtepu32_ph): Likewise.
	(_mm256_maskz_cvtepu32_ph): Likewise.
	(_mm_cvtepi64_ph): Likewise.
	(_mm_mask_cvtepi64_ph): Likewise.
	(_mm_maskz_cvtepi64_ph): Likewise.
	(_mm256_cvtepi64_ph): Likewise.
	(_mm256_mask_cvtepi64_ph): Likewise.
	(_mm256_maskz_cvtepi64_ph): Likewise.
	(_mm_cvtepu64_ph): Likewise.
	(_mm_mask_cvtepu64_ph): Likewise.
	(_mm_maskz_cvtepu64_ph): Likewise.
	(_mm256_cvtepu64_ph): Likewise.
	(_mm256_mask_cvtepu64_ph): Likewise.
	(_mm256_maskz_cvtepu64_ph): Likewise.
	(_mm_cvtepi16_ph): Likewise.
	(_mm_mask_cvtepi16_ph): Likewise.
	(_mm_maskz_cvtepi16_ph): Likewise.
	(_mm256_cvtepi16_ph): Likewise.
	(_mm256_mask_cvtepi16_ph): Likewise.
	(_mm256_maskz_cvtepi16_ph): Likewise.
	(_mm_cvtepu16_ph): Likewise.
	(_mm_mask_cvtepu16_ph): Likewise.
	(_mm_maskz_cvtepu16_ph): Likewise.
	(_mm256_cvtepu16_ph): Likewise.
	(_mm256_mask_cvtepu16_ph): Likewise.
	(_mm256_maskz_cvtepu16_ph): Likewise.
	* config/i386/i386-builtin-types.def: Add corresponding builtin types.
	* config/i386/i386-builtin.def: Add corresponding new builtins.
	* config/i386/i386-expand.c
	(ix86_expand_args_builtin): Handle new builtin types.
	(ix86_expand_round_builtin): Ditto.
	* config/i386/i386-modes.def: Declare V2HF and V6HF.
	* config/i386/sse.md (VI2H_AVX512VL): New.
	(qq2phsuff): Ditto.
	(sseintvecmode): Add HF vector modes.
	(avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode><mask_name><round_name>):
	New.
	(avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>): Ditto.
	(*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>): Ditto.
	(avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask): Ditto.
	(*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask): Ditto.
	(*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask_1): Ditto.
	(avx512fp16_vcvt<floatsuffix>qq2ph_v2di): Ditto.
	(*avx512fp16_vcvt<floatsuffix>qq2ph_v2di): Ditto.
	(avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask): Ditto.
	(*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask): Ditto.
	(*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1): Ditto.
	* config/i386/subst.md (round_qq2phsuff): New subst_attr.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-1.c: Add test for new builtins.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/sse-14.c: Add test for new intrinsics.
	* gcc.target/i386/sse-22.c: Ditto.
---
 gcc/config/i386/avx512fp16intrin.h     | 492 +++++++++++++++++++++++++
 gcc/config/i386/avx512fp16vlintrin.h   | 312 ++++++++++++++++
 gcc/config/i386/i386-builtin-types.def |   9 +
 gcc/config/i386/i386-builtin.def       |  18 +
 gcc/config/i386/i386-expand.c          |   9 +
 gcc/config/i386/i386-modes.def         |   2 +
 gcc/config/i386/sse.md                 | 153 +++++++-
 gcc/config/i386/subst.md               |   1 +
 gcc/testsuite/gcc.target/i386/avx-1.c  |   6 +
 gcc/testsuite/gcc.target/i386/sse-13.c |   6 +
 gcc/testsuite/gcc.target/i386/sse-14.c |  18 +
 gcc/testsuite/gcc.target/i386/sse-22.c |  18 +
 gcc/testsuite/gcc.target/i386/sse-23.c |   6 +
 13 files changed, 1047 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 42576c4ae2e..bd801942365 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -2702,6 +2702,172 @@  _mm512_maskz_cvt_roundph_epu32 (__mmask16 __A, __m256h __B, int __C)
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vcvtdq2ph.  */
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi32_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si) __A,
+						    _mm256_setzero_ph (),
+						    (__mmask16) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi32_ph (__m256h __A, __mmask16 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si) __C,
+						    __A,
+						    __B,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi32_ph (__mmask16 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si) __B,
+						    _mm256_setzero_ph (),
+						    __A,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi32_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si) __A,
+						    _mm256_setzero_ph (),
+						    (__mmask16) -1,
+						    __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si) __C,
+						    __A,
+						    __B,
+						    __D);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi32_ph (__mmask16 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si) __B,
+						    _mm256_setzero_ph (),
+						    __A,
+						    __C);
+}
+
+#else
+#define _mm512_cvt_roundepi32_ph(A, B)					\
+  (__builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si)(A),		\
+					      _mm256_setzero_ph (),	\
+					      (__mmask16)-1,		\
+					      (B)))
+
+#define _mm512_mask_cvt_roundepi32_ph(A, B, C, D)		\
+  (__builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si)(C),	\
+					      (A),		\
+					      (B),		\
+					      (D)))
+
+#define _mm512_maskz_cvt_roundepi32_ph(A, B, C)				\
+  (__builtin_ia32_vcvtdq2ph_v16si_mask_round ((__v16si)(B),		\
+					      _mm256_setzero_ph (),	\
+					      (A),			\
+					      (C)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vcvtudq2ph.  */
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu32_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si) __A,
+						     _mm256_setzero_ph (),
+						     (__mmask16) -1,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu32_ph (__m256h __A, __mmask16 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si) __C,
+						     __A,
+						     __B,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu32_ph (__mmask16 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si) __B,
+						     _mm256_setzero_ph (),
+						     __A,
+						     _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu32_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si) __A,
+						     _mm256_setzero_ph (),
+						     (__mmask16) -1,
+						     __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu32_ph (__m256h __A, __mmask16 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si) __C,
+						     __A,
+						     __B,
+						     __D);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu32_ph (__mmask16 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si) __B,
+						     _mm256_setzero_ph (),
+						     __A,
+						     __C);
+}
+
+#else
+#define _mm512_cvt_roundepu32_ph(A, B)					\
+  (__builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si)(A),		\
+					       _mm256_setzero_ph (),	\
+					       (__mmask16)-1,		\
+					       B))
+
+#define _mm512_mask_cvt_roundepu32_ph(A, B, C, D)		\
+  (__builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si)C,	\
+					       A,		\
+					       B,		\
+					       D))
+
+#define _mm512_maskz_cvt_roundepu32_ph(A, B, C)				\
+  (__builtin_ia32_vcvtudq2ph_v16si_mask_round ((__v16si)B,		\
+					       _mm256_setzero_ph (),	\
+					       A,			\
+					       C))
+
+#endif /* __OPTIMIZE__ */
+
 /* Intrinsics vcvtph2qq.  */
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -2853,6 +3019,166 @@  _mm512_maskz_cvt_roundph_epu64 (__mmask8 __A, __m128h __B, int __C)
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vcvtqq2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di) __A,
+						   _mm_setzero_ph (),
+						   (__mmask8) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di) __C,
+						   __A,
+						   __B,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_ph (__mmask8 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di) __B,
+						   _mm_setzero_ph (),
+						   __A,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi64_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di) __A,
+						   _mm_setzero_ph (),
+						   (__mmask8) -1,
+						   __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di) __C,
+						   __A,
+						   __B,
+						   __D);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi64_ph (__mmask8 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di) __B,
+						   _mm_setzero_ph (),
+						   __A,
+						   __C);
+}
+
+#else
+#define _mm512_cvt_roundepi64_ph(A, B)				\
+  (__builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di)(A),	\
+					     _mm_setzero_ph (),	\
+					     (__mmask8)-1,	\
+					     (B)))
+
+#define _mm512_mask_cvt_roundepi64_ph(A, B, C, D)			\
+  (__builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di)(C), (A), (B), (D)))
+
+#define _mm512_maskz_cvt_roundepi64_ph(A, B, C)			\
+  (__builtin_ia32_vcvtqq2ph_v8di_mask_round ((__v8di)(B),	\
+					     _mm_setzero_ph (),	\
+					     (A),		\
+					     (C)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vcvtuqq2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu64_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di) __A,
+						    _mm_setzero_ph (),
+						    (__mmask8) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di) __C,
+						    __A,
+						    __B,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu64_ph (__mmask8 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di) __B,
+						    _mm_setzero_ph (),
+						    __A,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu64_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di) __A,
+						    _mm_setzero_ph (),
+						    (__mmask8) -1,
+						    __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu64_ph (__m128h __A, __mmask8 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di) __C,
+						    __A,
+						    __B,
+						    __D);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu64_ph (__mmask8 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di) __B,
+						    _mm_setzero_ph (),
+						    __A,
+						    __C);
+}
+
+#else
+#define _mm512_cvt_roundepu64_ph(A, B)					\
+  (__builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di)(A),		\
+					      _mm_setzero_ph (),	\
+					      (__mmask8)-1,		\
+					      (B)))
+
+#define _mm512_mask_cvt_roundepu64_ph(A, B, C, D)			\
+  (__builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di)(C), (A), (B), (D)))
+
+#define _mm512_maskz_cvt_roundepu64_ph(A, B, C)				\
+  (__builtin_ia32_vcvtuqq2ph_v8di_mask_round ((__v8di)(B),		\
+					      _mm_setzero_ph (),	\
+					      (A),			\
+					      (C)))
+
+#endif /* __OPTIMIZE__ */
+
 /* Intrinsics vcvtph2w.  */
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -3037,6 +3363,172 @@  _mm512_maskz_cvt_roundph_epu16 (__mmask32 __A, __m512h __B, int __C)
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vcvtw2ph.  */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi) __A,
+						   _mm512_setzero_ph (),
+						   (__mmask32) -1,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_ph (__m512h __A, __mmask32 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi) __C,
+						   __A,
+						   __B,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_ph (__mmask32 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi) __B,
+						   _mm512_setzero_ph (),
+						   __A,
+						   _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi16_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi) __A,
+						   _mm512_setzero_ph (),
+						   (__mmask32) -1,
+						   __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi) __C,
+						   __A,
+						   __B,
+						   __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi16_ph (__mmask32 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi) __B,
+						   _mm512_setzero_ph (),
+						   __A,
+						   __C);
+}
+
+#else
+#define _mm512_cvt_roundepi16_ph(A, B)					\
+  (__builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi)(A),		\
+					     _mm512_setzero_ph (),	\
+					     (__mmask32)-1,		\
+					     (B)))
+
+#define _mm512_mask_cvt_roundepi16_ph(A, B, C, D)		\
+  (__builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi)(C),	\
+					     (A),		\
+					     (B),		\
+					     (D)))
+
+#define _mm512_maskz_cvt_roundepi16_ph(A, B, C)				\
+  (__builtin_ia32_vcvtw2ph_v32hi_mask_round ((__v32hi)(B),		\
+					     _mm512_setzero_ph (),	\
+					     (A),			\
+					     (C)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vcvtuw2ph.  */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu16_ph (__m512i __A)
+{
+  return __builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi) __A,
+						    _mm512_setzero_ph (),
+						    (__mmask32) -1,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu16_ph (__m512h __A, __mmask32 __B, __m512i __C)
+{
+  return __builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi) __C,
+						    __A,
+						    __B,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu16_ph (__mmask32 __A, __m512i __B)
+{
+  return __builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi) __B,
+						    _mm512_setzero_ph (),
+						    __A,
+						    _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu16_ph (__m512i __A, int __B)
+{
+  return __builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi) __A,
+						    _mm512_setzero_ph (),
+						    (__mmask32) -1,
+						    __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu16_ph (__m512h __A, __mmask32 __B, __m512i __C, int __D)
+{
+  return __builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi) __C,
+						    __A,
+						    __B,
+						    __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
+{
+  return __builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi) __B,
+						    _mm512_setzero_ph (),
+						    __A,
+						    __C);
+}
+
+#else
+#define _mm512_cvt_roundepu16_ph(A, B)					\
+  (__builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi)(A),		\
+					      _mm512_setzero_ph (),	\
+					      (__mmask32)-1,		\
+					      (B)))
+
+#define _mm512_mask_cvt_roundepu16_ph(A, B, C, D)		\
+  (__builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi)(C),	\
+					      (A),		\
+					      (B),		\
+					      (D)))
+
+#define _mm512_maskz_cvt_roundepu16_ph(A, B, C)				\
+  (__builtin_ia32_vcvtuw2ph_v32hi_mask_round ((__v32hi)(B),		\
+					      _mm512_setzero_ph (),	\
+					      (A),			\
+					      (C)))
+
+#endif /* __OPTIMIZE__ */
+
 #ifdef __DISABLE_AVX512FP16__
 #undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 8a7e0aaa6b1..93d9ff8bf3c 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -1050,6 +1050,110 @@  _mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
 					 __A);
 }
 
+/* Intrinsics vcvtdq2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtdq2ph_v4si_mask ((__v4si) __A,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtdq2ph_v4si_mask ((__v4si) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtdq2ph_v4si_mask ((__v4si) __B,
+					     _mm_setzero_ph (),
+					     __A);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtdq2ph_v8si_mask ((__v8si) __A,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtdq2ph_v8si_mask ((__v8si) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtdq2ph_v8si_mask ((__v8si) __B,
+					     _mm_setzero_ph (),
+					     __A);
+}
+
+/* Intrinsics vcvtudq2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtudq2ph_v4si_mask ((__v4si) __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtudq2ph_v4si_mask ((__v4si) __C,
+					      __A,
+					      __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtudq2ph_v4si_mask ((__v4si) __B,
+					      _mm_setzero_ph (),
+					      __A);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtudq2ph_v8si_mask ((__v8si) __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtudq2ph_v8si_mask ((__v8si) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtudq2ph_v8si_mask ((__v8si) __B,
+					      _mm_setzero_ph (),
+					      __A);
+}
+
 /* Intrinsics vcvtph2qq.  */
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1153,6 +1257,108 @@  _mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
 					      __A);
 }
 
+/* Intrinsics vcvtqq2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtqq2ph_v2di_mask ((__v2di) __A,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtqq2ph_v2di_mask ((__v2di) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtqq2ph_v2di_mask ((__v2di) __B,
+					     _mm_setzero_ph (),
+					     __A);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtqq2ph_v4di_mask ((__v4di) __A,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtqq2ph_v4di_mask ((__v4di) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtqq2ph_v4di_mask ((__v4di) __B,
+					     _mm_setzero_ph (),
+					     __A);
+}
+
+/* Intrinsics vcvtuqq2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu64_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtuqq2ph_v2di_mask ((__v2di) __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtuqq2ph_v2di_mask ((__v2di) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtuqq2ph_v2di_mask ((__v2di) __B,
+					      _mm_setzero_ph (),
+					      __A);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu64_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtuqq2ph_v4di_mask ((__v4di) __A,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtuqq2ph_v4di_mask ((__v4di) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtuqq2ph_v4di_mask ((__v4di) __B,
+					      _mm_setzero_ph (),
+					      __A);
+}
+
 /* Intrinsics vcvtph2w.  */
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1275,6 +1481,112 @@  _mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B)
 					 __A);
 }
 
+/* Intrinsics vcvtw2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtw2ph_v8hi_mask ((__v8hi) __A,
+					    _mm_setzero_ph (),
+					    (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtw2ph_v8hi_mask ((__v8hi) __C,
+					    __A,
+					    __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtw2ph_v8hi_mask ((__v8hi) __B,
+					    _mm_setzero_ph (),
+					    __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtw2ph_v16hi_mask ((__v16hi) __A,
+					     _mm256_setzero_ph (),
+					     (__mmask16) -1);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtw2ph_v16hi_mask ((__v16hi) __C,
+					     __A,
+					     __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtw2ph_v16hi_mask ((__v16hi) __B,
+					     _mm256_setzero_ph (),
+					     __A);
+}
+
+/* Intrinsics vcvtuw2ph.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_ph (__m128i __A)
+{
+  return __builtin_ia32_vcvtuw2ph_v8hi_mask ((__v8hi) __A,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C)
+{
+  return __builtin_ia32_vcvtuw2ph_v8hi_mask ((__v8hi) __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B)
+{
+  return __builtin_ia32_vcvtuw2ph_v8hi_mask ((__v8hi) __B,
+					     _mm_setzero_ph (),
+					     __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu16_ph (__m256i __A)
+{
+  return __builtin_ia32_vcvtuw2ph_v16hi_mask ((__v16hi) __A,
+					      _mm256_setzero_ph (),
+					      (__mmask16) -1);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C)
+{
+  return __builtin_ia32_vcvtuw2ph_v16hi_mask ((__v16hi) __C, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B)
+{
+  return __builtin_ia32_vcvtuw2ph_v16hi_mask ((__v16hi) __B,
+					      _mm256_setzero_ph (),
+					      __A);
+}
+
 #ifdef __DISABLE_AVX512FP16VL__
 #undef __DISABLE_AVX512FP16VL__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index c430dc9ab48..57b9ea786e1 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1316,6 +1316,11 @@  DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI)
 DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI)
 DEF_FUNCTION_TYPE (V8SI, V8HF, V8SI, UQI)
 DEF_FUNCTION_TYPE (V8HI, V8HF, V8HI, UQI)
+DEF_FUNCTION_TYPE (V8HF, V4SI, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, V8SI, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, V2DI, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, V4DI, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, V8HI, V8HF, UQI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, UQI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, INT, V8HF, UQI)
@@ -1323,18 +1328,22 @@  DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI)
 DEF_FUNCTION_TYPE (UQI, V8HF, V8HF, INT, UQI, INT)
 DEF_FUNCTION_TYPE (V8DI, V8HF, V8DI, UQI, INT)
+DEF_FUNCTION_TYPE (V8HF, V8DI, V8HF, UQI, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI, INT)
 DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF)
 DEF_FUNCTION_TYPE (V16HI, V16HF, V16HI, UHI)
+DEF_FUNCTION_TYPE (V16HF, V16HI, V16HF, UHI)
 DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, UHI)
 DEF_FUNCTION_TYPE (V16SI, V16HF, V16SI, UHI, INT)
 DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI)
 DEF_FUNCTION_TYPE (UHI, V16HF, V16HF, INT, UHI)
+DEF_FUNCTION_TYPE (V16HF, V16SI, V16HF, UHI, INT)
 DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT)
 DEF_FUNCTION_TYPE (V32HI, V32HF, V32HI, USI, INT)
+DEF_FUNCTION_TYPE (V32HF, V32HI, V32HF, USI, INT)
 DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index dde8af53ff0..44c55876e48 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2843,6 +2843,18 @@  BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v16hi_mask, "__builtin_ia32_vcvtph2w_v16hi_mask", IX86_BUILTIN_VCVTPH2W_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v8hi_mask, "__builtin_ia32_vcvtph2uw_v8hi_mask", IX86_BUILTIN_VCVTPH2UW_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v16hi_mask, "__builtin_ia32_vcvtph2uw_v16hi_mask", IX86_BUILTIN_VCVTPH2UW_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v8hi_mask, "__builtin_ia32_vcvtw2ph_v8hi_mask", IX86_BUILTIN_VCVTW2PH_V8HI_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v16hi_mask, "__builtin_ia32_vcvtw2ph_v16hi_mask", IX86_BUILTIN_VCVTW2PH_V16HI_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HI_V16HF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v8hi_mask, "__builtin_ia32_vcvtuw2ph_v8hi_mask", IX86_BUILTIN_VCVTUW2PH_V8HI_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v16hi_mask, "__builtin_ia32_vcvtuw2ph_v16hi_mask", IX86_BUILTIN_VCVTUW2PH_V16HI_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HI_V16HF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v4si_mask, "__builtin_ia32_vcvtdq2ph_v4si_mask", IX86_BUILTIN_VCVTDQ2PH_V4SI_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v8si_mask, "__builtin_ia32_vcvtdq2ph_v8si_mask", IX86_BUILTIN_VCVTDQ2PH_V8SI_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v4si_mask, "__builtin_ia32_vcvtudq2ph_v4si_mask", IX86_BUILTIN_VCVTUDQ2PH_V4SI_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v8si_mask, "__builtin_ia32_vcvtudq2ph_v8si_mask", IX86_BUILTIN_VCVTUDQ2PH_V8SI_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v2di_mask, "__builtin_ia32_vcvtqq2ph_v2di_mask", IX86_BUILTIN_VCVTQQ2PH_V2DI_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v4di_mask, "__builtin_ia32_vcvtqq2ph_v4di_mask", IX86_BUILTIN_VCVTQQ2PH_V4DI_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v2di_mask, "__builtin_ia32_vcvtuqq2ph_v2di_mask", IX86_BUILTIN_VCVTUQQ2PH_V2DI_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DI_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v4di_mask, "__builtin_ia32_vcvtuqq2ph_v4di_mask", IX86_BUILTIN_VCVTUQQ2PH_V4DI_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI)
 
 /* Builtins with rounding support.  */
 BDESC_END (ARGS, ROUND_ARGS)
@@ -3076,6 +3088,12 @@  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_r
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq_v8di_mask_round", IX86_BUILTIN_VCVTPH2UQQ_V8DI_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w_v32hi_mask_round", IX86_BUILTIN_VCVTPH2W_V32HI_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw_v32hi_mask_round", IX86_BUILTIN_VCVTPH2UW_V32HI_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph_v32hi_mask_round", IX86_BUILTIN_VCVTW2PH_V32HI_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph_v32hi_mask_round", IX86_BUILTIN_VCVTUW2PH_V32HI_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph_v16si_mask_round", IX86_BUILTIN_VCVTDQ2PH_V16SI_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph_v16si_mask_round", IX86_BUILTIN_VCVTUDQ2PH_V16SI_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTUQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
 
 BDESC_END (ROUND_ARGS, MULTI_ARG)
 
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 59d1f4f5eea..7d9e1bd6a2d 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -9574,6 +9574,11 @@  ix86_expand_args_builtin (const struct builtin_description *d,
     case V2DI_FTYPE_V8HF_V2DI_UQI:
     case V2DI_FTYPE_V4SF_V2DI_UQI:
     case V8HF_FTYPE_V8HF_V8HF_UQI:
+    case V8HF_FTYPE_V8HI_V8HF_UQI:
+    case V8HF_FTYPE_V8SI_V8HF_UQI:
+    case V8HF_FTYPE_V4SI_V8HF_UQI:
+    case V8HF_FTYPE_V4DI_V8HF_UQI:
+    case V8HF_FTYPE_V2DI_V8HF_UQI:
     case V4SF_FTYPE_V4DI_V4SF_UQI:
     case V4SF_FTYPE_V2DI_V4SF_UQI:
     case V4DF_FTYPE_V4DI_V4DF_UQI:
@@ -9640,6 +9645,7 @@  ix86_expand_args_builtin (const struct builtin_description *d,
     case V8DI_FTYPE_DI_V8DI_UQI:
     case V16SF_FTYPE_V8SF_V16SF_UHI:
     case V16SI_FTYPE_V8SI_V16SI_UHI:
+    case V16HF_FTYPE_V16HI_V16HF_UHI:
     case V16HI_FTYPE_V16HF_V16HI_UHI:
     case V16HI_FTYPE_V16HI_V16HI_UHI:
     case V8HI_FTYPE_V16QI_V8HI_UQI:
@@ -10513,16 +10519,19 @@  ix86_expand_round_builtin (const struct builtin_description *d,
     case V8DI_FTYPE_V8DF_V8DI_QI_INT:
     case V8SF_FTYPE_V8DI_V8SF_QI_INT:
     case V8DF_FTYPE_V8DI_V8DF_QI_INT:
+    case V32HF_FTYPE_V32HI_V32HF_USI_INT:
     case V32HF_FTYPE_V32HF_V32HF_USI_INT:
     case V16SF_FTYPE_V16SF_V16SF_HI_INT:
     case V8DI_FTYPE_V8SF_V8DI_QI_INT:
     case V16SF_FTYPE_V16SI_V16SF_HI_INT:
     case V16SI_FTYPE_V16SF_V16SI_HI_INT:
     case V16SI_FTYPE_V16HF_V16SI_UHI_INT:
+    case V16HF_FTYPE_V16SI_V16HF_UHI_INT:
     case V8DF_FTYPE_V8SF_V8DF_QI_INT:
     case V16SF_FTYPE_V16HI_V16SF_HI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
     case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
+    case V8HF_FTYPE_V8DI_V8HF_UQI_INT:
       nargs = 4;
       break;
     case V4SF_FTYPE_V4SF_V4SF_INT_INT:
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index fcadfcd4c94..699f9a234c9 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -90,6 +90,8 @@  VECTOR_MODES (FLOAT, 32);     /*   V16HF V8SF V4DF V2TF */
 VECTOR_MODES (FLOAT, 64);     /*  V32HF V16SF V8DF V4TF */
 VECTOR_MODES (FLOAT, 128);    /* V64HF V32SF V16DF V8TF */
 VECTOR_MODES (FLOAT, 256);    /* V128HF V64SF V32DF V16TF */
+VECTOR_MODE (FLOAT, HF, 2)    /* 	      	   V2HF */
+VECTOR_MODE (FLOAT, HF, 6)    /*		   V6HF */
 VECTOR_MODE (INT, TI, 1);     /*                   V1TI */
 VECTOR_MODE (INT, DI, 1);     /*                   V1DI */
 VECTOR_MODE (INT, SI, 1);     /*                   V1SI */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7b705422396..8b23048a232 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -494,6 +494,11 @@  (define_mode_iterator VI48_AVX512F_AVX512VL
 (define_mode_iterator VI2_AVX512VL
   [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
 
+(define_mode_iterator VI2H_AVX512VL
+  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+   (V8SI "TARGET_AVX512VL") V16SI
+   V8DI ])
+
 (define_mode_iterator VI1_AVX512VL_F
   [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
 
@@ -895,9 +900,9 @@  (define_mode_attr avx512fmaskhalfmode
 
 ;; Mapping of vector float modes to an integer mode of the same size
 (define_mode_attr sseintvecmode
-  [(V16SF "V16SI") (V8DF  "V8DI")
-   (V8SF  "V8SI")  (V4DF  "V4DI")
-   (V4SF  "V4SI")  (V2DF  "V2DI")
+  [(V32HF "V32HI") (V16SF "V16SI") (V8DF  "V8DI")
+   (V16HF "V16HI") (V8SF  "V8SI")  (V4DF  "V4DI")
+   (V8HF "V8HI") (V4SF  "V4SI")  (V2DF  "V2DI")
    (V16SI "V16SI") (V8DI  "V8DI")
    (V8SI  "V8SI")  (V4DI  "V4DI")
    (V4SI  "V4SI")  (V2DI  "V2DI")
@@ -5432,6 +5437,11 @@  (define_int_attr sseintconvertsignprefix
 	[(UNSPEC_UNSIGNED_FIX_NOTRUNC "u")
 	 (UNSPEC_FIX_NOTRUNC "")])
 
+(define_mode_attr qq2phsuff
+  [(V32HI "") (V16HI "") (V8HI "")
+   (V16SI "") (V8SI "{y}") (V4SI "{x}")
+   (V8DI "{z}") (V4DI "{y}") (V2DI "{x}")])
+
 (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><mask_name><round_name>"
   [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
         (unspec:VI248_AVX512VL
@@ -5443,6 +5453,143 @@  (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode><mask_name><round_name>"
+  [(set (match_operand:<ssePHmode> 0 "register_operand" "=v")
+	(any_float:<ssePHmode>
+	  (match_operand:VI2H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))]
+  "TARGET_AVX512FP16"
+  "vcvt<floatsuffix><sseintconvert>2ph<round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+	(vec_concat:V8HF
+	    (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
+	    (match_dup 2)))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "operands[2] = CONST0_RTX (V4HFmode);")
+
+(define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+	(vec_concat:V8HF
+	    (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
+	    (match_operand:V4HF 2 "const0_operand" "C")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+    (vec_concat:V8HF
+        (vec_merge:V4HF
+	    (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
+            (vec_select:V4HF
+                (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
+                (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))
+            (match_operand:QI 3 "register_operand" "Yk"))
+	    (match_dup 4)))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "operands[4] = CONST0_RTX (V4HFmode);")
+
+(define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+    (vec_concat:V8HF
+        (vec_merge:V4HF
+	    (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
+            (vec_select:V4HF
+                (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
+                (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))
+            (match_operand:QI 3 "register_operand" "Yk"))
+	    (match_operand:V4HF 4 "const0_operand" "C")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask_1"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+    (vec_concat:V8HF
+	(vec_merge:V4HF
+		(any_float:V4HF (match_operand:VI4_128_8_256 1
+				  "vector_operand" "vm"))
+	    (match_operand:V4HF 3 "const0_operand" "C")
+	    (match_operand:QI 2 "register_operand" "Yk"))
+	    (match_operand:V4HF 4 "const0_operand" "C")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512fp16_vcvt<floatsuffix>qq2ph_v2di"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+	(vec_concat:V8HF
+	    (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
+	    (match_dup 2)))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "operands[2] = CONST0_RTX (V6HFmode);")
+
+(define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+	(vec_concat:V8HF
+	    (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
+	    (match_operand:V6HF 2 "const0_operand" "C")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<floatsuffix>qq2ph{x}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_expand "avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+    (vec_concat:V8HF
+        (vec_merge:V2HF
+	    (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
+            (vec_select:V2HF
+                (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
+                (parallel [(const_int 0) (const_int 1)]))
+            (match_operand:QI 3 "register_operand" "Yk"))
+	    (match_dup 4)))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "operands[4] = CONST0_RTX (V6HFmode);")
+
+(define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+    (vec_concat:V8HF
+        (vec_merge:V2HF
+	    (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
+            (vec_select:V2HF
+                (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
+                (parallel [(const_int 0) (const_int 1)]))
+            (match_operand:QI 3 "register_operand" "Yk"))
+	    (match_operand:V6HF 4 "const0_operand" "C")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<floatsuffix>qq2ph{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+    (vec_concat:V8HF
+	(vec_merge:V2HF
+		(any_float:V2HF (match_operand:V2DI 1
+				  "vector_operand" "vm"))
+	    (match_operand:V2HF 3 "const0_operand" "C")
+	    (match_operand:QI 2 "register_operand" "Yk"))
+	    (match_operand:V6HF 4 "const0_operand" "C")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<floatsuffix>qq2ph{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index ecb158f07e5..2e9c2b38e25 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -134,6 +134,7 @@  (define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>")
 (define_subst_attr "round_mask_op4" "round" "" "<round_mask_operand4>")
 (define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
 (define_subst_attr "round_constraint" "round" "vm" "v")
+(define_subst_attr "round_qq2phsuff" "round" "<qq2phsuff>" "")
 (define_subst_attr "bcst_round_constraint" "round" "vmBr" "v")
 (define_subst_attr "round_constraint2" "round" "m" "v")
 (define_subst_attr "round_constraint3" "round" "rm" "r")
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index cdfc2e3b69f..b569cc0bdd9 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -725,6 +725,12 @@ 
 #define __builtin_ia32_vcvtph2uqq_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq_v8di_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtph2w_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w_v32hi_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtph2uw_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtw2ph_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtuw2ph_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtdq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph_v16si_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 5e4aaf8ce9b..07e59118438 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -742,6 +742,12 @@ 
 #define __builtin_ia32_vcvtph2uqq_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq_v8di_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtph2w_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w_v32hi_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtph2uw_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtw2ph_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtuw2ph_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtdq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph_v16si_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 32aa4518703..0530192d97e 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -684,6 +684,12 @@  test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8)
 test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8)
 test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8)
 test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8)
+test_1 (_mm512_cvt_roundepi16_ph, __m512h, __m512i, 8)
+test_1 (_mm512_cvt_roundepu16_ph, __m512h, __m512i, 8)
+test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8)
+test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
+test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
+test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
 test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
@@ -722,6 +728,12 @@  test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8)
 test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8)
 test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8)
 test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8)
+test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
 test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
 test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -766,6 +778,12 @@  test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8)
 test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8)
 test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8)
 test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8)
+test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepu32_ph, __m256h, __m256h, __mmask16, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 44ac10d602f..04e6340516b 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -789,6 +789,12 @@  test_1 (_mm512_cvt_roundph_epi32, __m512i, __m256h, 8)
 test_1 (_mm512_cvt_roundph_epu32, __m512i, __m256h, 8)
 test_1 (_mm512_cvt_roundph_epi64, __m512i, __m128h, 8)
 test_1 (_mm512_cvt_roundph_epu64, __m512i, __m128h, 8)
+test_1 (_mm512_cvt_roundepi16_ph, __m512h, __m512i, 8)
+test_1 (_mm512_cvt_roundepu16_ph, __m512h, __m512i, 8)
+test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8)
+test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
+test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
+test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
 test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
@@ -826,6 +832,12 @@  test_2 (_mm512_maskz_cvt_roundph_epi32, __m512i, __mmask16, __m256h, 8)
 test_2 (_mm512_maskz_cvt_roundph_epu32, __m512i, __mmask16, __m256h, 8)
 test_2 (_mm512_maskz_cvt_roundph_epi64, __m512i, __mmask8, __m128h, 8)
 test_2 (_mm512_maskz_cvt_roundph_epu64, __m512i, __mmask8, __m128h, 8)
+test_2 (_mm512_maskz_cvt_roundepi16_ph, __m512h, __mmask32, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepu16_ph, __m512h, __mmask32, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
+test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
 test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
 test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -869,6 +881,12 @@  test_3 (_mm512_mask_cvt_roundph_epi32, __m512i, __m512i, __mmask16, __m256h, 8)
 test_3 (_mm512_mask_cvt_roundph_epu32, __m512i, __m512i, __mmask16, __m256h, 8)
 test_3 (_mm512_mask_cvt_roundph_epi64, __m512i, __m512i, __mmask8, __m128h, 8)
 test_3 (_mm512_mask_cvt_roundph_epu64, __m512i, __m512i, __mmask8, __m128h, 8)
+test_3 (_mm512_mask_cvt_roundepi16_ph, __m512h, __m512h, __mmask32, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepu16_ph, __m512h, __m512h, __mmask32, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepi32_ph, __m256h, __m256h, __mmask16, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepu32_ph, __m256h, __m256h, __mmask16, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepi64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
+test_3 (_mm512_mask_cvt_roundepu64_ph, __m128h, __m128h, __mmask8, __m512i, 8)
 test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
 test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
 test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index ae6151b4a61..684891cc98b 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -743,6 +743,12 @@ 
 #define __builtin_ia32_vcvtph2uqq_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uqq_v8di_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtph2w_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtph2w_v32hi_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtph2uw_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtph2uw_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtw2ph_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtw2ph_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtuw2ph_v32hi_mask_round(A, B, C, D) __builtin_ia32_vcvtuw2ph_v32hi_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtdq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtdq2ph_v16si_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)