diff mbox series

AVX512FP16: Adjust builtin name for FP16 builtins to match AVX512F style

Message ID 20210915090807.17351-1-hongyu.wang@intel.com
State New
Headers show
Series AVX512FP16: Adjust builtin name for FP16 builtins to match AVX512F style | expand

Commit Message

Hongyu Wang Sept. 15, 2021, 9:08 a.m. UTC
Hi,

For AVX512FP16 builtins, they all contain format like vaddph_v8hf,
while AVX512F builtins use addps128 which succeeded SSE/AVX style.
Adjust AVX512FP16 builtins to match such format.

Bootstraped/regtested on x86-64-*-linux, ok for master?

gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h: Adjust all builtin calls.
	* config/i386/avx512fp16vlintrin.h: Likewise.
	* config/i386/i386-builtin.def: Adjust builtin name to match
	AVX512F style.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-1.c: Adjust builtin macros.
	* gcc.target/i386/sse-13.c: Likewise.
	* gcc.target/i386/sse-23.c: Likewise.
---
 gcc/config/i386/avx512fp16intrin.h     | 1236 ++++++++++++------------
 gcc/config/i386/avx512fp16vlintrin.h   |  454 ++++-----
 gcc/config/i386/i386-builtin.def       |  130 +--
 gcc/testsuite/gcc.target/i386/avx-1.c  |   58 +-
 gcc/testsuite/gcc.target/i386/sse-13.c |   58 +-
 gcc/testsuite/gcc.target/i386/sse-23.c |   58 +-
 6 files changed, 997 insertions(+), 997 deletions(-)

Comments

Hongtao Liu Sept. 15, 2021, 9:21 a.m. UTC | #1
On Wed, Sep 15, 2021 at 5:08 PM Hongyu Wang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> For AVX512FP16 builtins, they all contain format like vaddph_v8hf,
> while AVX512F builtins use addps128 which succeeded SSE/AVX style.
> Adjust AVX512FP16 builtins to match such format.
>
> Bootstraped/regtested on x86-64-*-linux, ok for master?
>
> gcc/ChangeLog:
>
>         * config/i386/avx512fp16intrin.h: Adjust all builtin calls.
>         * config/i386/avx512fp16vlintrin.h: Likewise.
>         * config/i386/i386-builtin.def: Adjust builtin name to match
>         AVX512F style.
>

+BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask",
IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int)
V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask",
IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int)
V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask",
IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int)
UQI_FTYPE_V8HF_V8HF_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask",
IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int)
UHI_FTYPE_V16HF_V16HF_INT_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask",
IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int)
USI_FTYPE_V32HF_V32HF_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask",
IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int)
V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask",
IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int)
V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask",
IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int)
V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_rsqrtv16hf2_mask,
"__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK,
UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_rsqrtv32hf2_mask,
"__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK,
UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask",
IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int)
V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask",
IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int)
V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask",
IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int)
V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask",
IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int)
V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask",
IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int)
V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512fp16_scalefv8hf_mask,
"__builtin_ia32_scalefph128_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK,
UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask",
IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int)
V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
Could you also change IX86_BUILTIN_VSCALEFPH_V16HF_MASK
IX86_BUILTIN_VSCALEFPH256_MASK, similar for others.

> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx-1.c: Adjust builtin macros.
>         * gcc.target/i386/sse-13.c: Likewise.
>         * gcc.target/i386/sse-23.c: Likewise.
> ---
>  gcc/config/i386/avx512fp16intrin.h     | 1236 ++++++++++++------------
>  gcc/config/i386/avx512fp16vlintrin.h   |  454 ++++-----
>  gcc/config/i386/i386-builtin.def       |  130 +--
>  gcc/testsuite/gcc.target/i386/avx-1.c  |   58 +-
>  gcc/testsuite/gcc.target/i386/sse-13.c |   58 +-
>  gcc/testsuite/gcc.target/i386/sse-23.c |   58 +-
>  6 files changed, 997 insertions(+), 997 deletions(-)
>
> diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
> index 2fbfc140c44..5d66ca5c820 100644
> --- a/gcc/config/i386/avx512fp16intrin.h
> +++ b/gcc/config/i386/avx512fp16intrin.h
> @@ -229,15 +229,15 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
>  {
> -  return __builtin_ia32_vaddph_v32hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
>  {
> -  return __builtin_ia32_vaddph_v32hf_mask (__B, __C,
> -                                          _mm512_setzero_ph (), __A);
> +  return __builtin_ia32_addph512_mask (__B, __C,
> +                                      _mm512_setzero_ph (), __A);
>  }
>
>  extern __inline __m512h
> @@ -251,15 +251,15 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
>  {
> -  return __builtin_ia32_vsubph_v32hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
>  {
> -  return __builtin_ia32_vsubph_v32hf_mask (__B, __C,
> -                                          _mm512_setzero_ph (), __A);
> +  return __builtin_ia32_subph512_mask (__B, __C,
> +                                      _mm512_setzero_ph (), __A);
>  }
>
>  extern __inline __m512h
> @@ -273,15 +273,15 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
>  {
> -  return __builtin_ia32_vmulph_v32hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
>  {
> -  return __builtin_ia32_vmulph_v32hf_mask (__B, __C,
> -                                          _mm512_setzero_ph (), __A);
> +  return __builtin_ia32_mulph512_mask (__B, __C,
> +                                      _mm512_setzero_ph (), __A);
>  }
>
>  extern __inline __m512h
> @@ -295,15 +295,15 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
>  {
> -  return __builtin_ia32_vdivph_v32hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
>  {
> -  return __builtin_ia32_vdivph_v32hf_mask (__B, __C,
> -                                          _mm512_setzero_ph (), __A);
> +  return __builtin_ia32_divph512_mask (__B, __C,
> +                                      _mm512_setzero_ph (), __A);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -311,9 +311,9 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vaddph_v32hf_mask_round (__A, __B,
> -                                                _mm512_setzero_ph (),
> -                                                (__mmask32) -1, __C);
> +  return __builtin_ia32_addph512_mask_round (__A, __B,
> +                                            _mm512_setzero_ph (),
> +                                            (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -321,7 +321,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                           __m512h __D, const int __E)
>  {
> -  return __builtin_ia32_vaddph_v32hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m512h
> @@ -329,18 +329,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vaddph_v32hf_mask_round (__B, __C,
> -                                                _mm512_setzero_ph (),
> -                                                __A, __D);
> +  return __builtin_ia32_addph512_mask_round (__B, __C,
> +                                            _mm512_setzero_ph (),
> +                                            __A, __D);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vsubph_v32hf_mask_round (__A, __B,
> -                                                _mm512_setzero_ph (),
> -                                                (__mmask32) -1, __C);
> +  return __builtin_ia32_subph512_mask_round (__A, __B,
> +                                            _mm512_setzero_ph (),
> +                                            (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -348,7 +348,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                           __m512h __D, const int __E)
>  {
> -  return __builtin_ia32_vsubph_v32hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m512h
> @@ -356,18 +356,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vsubph_v32hf_mask_round (__B, __C,
> -                                                _mm512_setzero_ph (),
> -                                                __A, __D);
> +  return __builtin_ia32_subph512_mask_round (__B, __C,
> +                                            _mm512_setzero_ph (),
> +                                            __A, __D);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vmulph_v32hf_mask_round (__A, __B,
> -                                                _mm512_setzero_ph (),
> -                                                (__mmask32) -1, __C);
> +  return __builtin_ia32_mulph512_mask_round (__A, __B,
> +                                            _mm512_setzero_ph (),
> +                                            (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -375,7 +375,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                           __m512h __D, const int __E)
>  {
> -  return __builtin_ia32_vmulph_v32hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m512h
> @@ -383,18 +383,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vmulph_v32hf_mask_round (__B, __C,
> -                                                _mm512_setzero_ph (),
> -                                                __A, __D);
> +  return __builtin_ia32_mulph512_mask_round (__B, __C,
> +                                            _mm512_setzero_ph (),
> +                                            __A, __D);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vdivph_v32hf_mask_round (__A, __B,
> -                                                _mm512_setzero_ph (),
> -                                                (__mmask32) -1, __C);
> +  return __builtin_ia32_divph512_mask_round (__A, __B,
> +                                            _mm512_setzero_ph (),
> +                                            (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -402,7 +402,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                           __m512h __D, const int __E)
>  {
> -  return __builtin_ia32_vdivph_v32hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m512h
> @@ -410,67 +410,67 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vdivph_v32hf_mask_round (__B, __C,
> -                                                _mm512_setzero_ph (),
> -                                                __A, __D);
> +  return __builtin_ia32_divph512_mask_round (__B, __C,
> +                                            _mm512_setzero_ph (),
> +                                            __A, __D);
>  }
>  #else
>  #define _mm512_add_round_ph(A, B, C)                                   \
> -  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((A), (B),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (__mmask32)-1, (C)))
> +  ((__m512h)__builtin_ia32_addph512_mask_round((A), (B),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (__mmask32)-1, (C)))
>
> -#define _mm512_mask_add_round_ph(A, B, C, D, E)                        \
> -  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((C), (D), (A), (B), (E)))
> +#define _mm512_mask_add_round_ph(A, B, C, D, E)                                \
> +  ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
>
>  #define _mm512_maskz_add_round_ph(A, B, C, D)                          \
> -  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((B), (C),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (A), (D)))
> +  ((__m512h)__builtin_ia32_addph512_mask_round((B), (C),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (A), (D)))
>
>  #define _mm512_sub_round_ph(A, B, C)                                   \
> -  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((A), (B),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (__mmask32)-1, (C)))
> +  ((__m512h)__builtin_ia32_subph512_mask_round((A), (B),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (__mmask32)-1, (C)))
>
> -#define _mm512_mask_sub_round_ph(A, B, C, D, E)                        \
> -  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((C), (D), (A), (B), (E)))
> +#define _mm512_mask_sub_round_ph(A, B, C, D, E)                                \
> +  ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
>
>  #define _mm512_maskz_sub_round_ph(A, B, C, D)                          \
> -  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((B), (C),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (A), (D)))
> +  ((__m512h)__builtin_ia32_subph512_mask_round((B), (C),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (A), (D)))
>
>  #define _mm512_mul_round_ph(A, B, C)                                   \
> -  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((A), (B),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (__mmask32)-1, (C)))
> +  ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (__mmask32)-1, (C)))
>
> -#define _mm512_mask_mul_round_ph(A, B, C, D, E)                        \
> -  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((C), (D), (A), (B), (E)))
> +#define _mm512_mask_mul_round_ph(A, B, C, D, E)                                \
> +  ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
>
>  #define _mm512_maskz_mul_round_ph(A, B, C, D)                          \
> -  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((B), (C),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (A), (D)))
> +  ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (A), (D)))
>
>  #define _mm512_div_round_ph(A, B, C)                                   \
> -  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((A), (B),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (__mmask32)-1, (C)))
> +  ((__m512h)__builtin_ia32_divph512_mask_round((A), (B),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (__mmask32)-1, (C)))
>
> -#define _mm512_mask_div_round_ph(A, B, C, D, E)                        \
> -  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((C), (D), (A), (B), (E)))
> +#define _mm512_mask_div_round_ph(A, B, C, D, E)                                \
> +  ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
>
>  #define _mm512_maskz_div_round_ph(A, B, C, D)                          \
> -  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((B), (C),           \
> -                                                  _mm512_setzero_ph (),\
> -                                                  (A), (D)))
> +  ((__m512h)__builtin_ia32_divph512_mask_round((B), (C),               \
> +                                              _mm512_setzero_ph (),    \
> +                                              (A), (D)))
>  #endif  /* __OPTIMIZE__  */
>
>  /* Intrinsics of v[add,sub,mul,div]sh.  */
>  extern __inline __m128h
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_add_sh (__m128h __A, __m128h __B)
>  {
>    __A[0] += __B[0];
> @@ -481,15 +481,15 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vaddsh_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vaddsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
> +                                   __A);
>  }
>
>  extern __inline __m128h
> @@ -504,15 +504,15 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vsubsh_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vsubsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
> +                                   __A);
>  }
>
>  extern __inline __m128h
> @@ -527,14 +527,14 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vmulsh_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vmulsh_v8hf_mask (__B, __C, _mm_setzero_ph (), __A);
> +  return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
>  }
>
>  extern __inline __m128h
> @@ -549,15 +549,15 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vdivsh_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vdivsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
> +                                   __A);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -565,9 +565,9 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vaddsh_v8hf_mask_round (__A, __B,
> -                                               _mm_setzero_ph (),
> -                                               (__mmask8) -1, __C);
> +  return __builtin_ia32_addsh_mask_round (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -575,7 +575,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                        __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vaddsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m128h
> @@ -583,18 +583,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                         const int __D)
>  {
> -  return __builtin_ia32_vaddsh_v8hf_mask_round (__B, __C,
> -                                               _mm_setzero_ph (),
> -                                               __A, __D);
> +  return __builtin_ia32_addsh_mask_round (__B, __C,
> +                                         _mm_setzero_ph (),
> +                                         __A, __D);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vsubsh_v8hf_mask_round (__A, __B,
> -                                               _mm_setzero_ph (),
> -                                               (__mmask8) -1, __C);
> +  return __builtin_ia32_subsh_mask_round (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -602,7 +602,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                        __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vsubsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m128h
> @@ -610,18 +610,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                         const int __D)
>  {
> -  return __builtin_ia32_vsubsh_v8hf_mask_round (__B, __C,
> -                                               _mm_setzero_ph (),
> -                                               __A, __D);
> +  return __builtin_ia32_subsh_mask_round (__B, __C,
> +                                         _mm_setzero_ph (),
> +                                         __A, __D);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vmulsh_v8hf_mask_round (__A, __B,
> -                                               _mm_setzero_ph (),
> -                                               (__mmask8) -1, __C);
> +  return __builtin_ia32_mulsh_mask_round (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -629,7 +629,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                        __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vmulsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m128h
> @@ -637,18 +637,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                         const int __D)
>  {
> -  return __builtin_ia32_vmulsh_v8hf_mask_round (__B, __C,
> -                                               _mm_setzero_ph (),
> -                                               __A, __D);
> +  return __builtin_ia32_mulsh_mask_round (__B, __C,
> +                                         _mm_setzero_ph (),
> +                                         __A, __D);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vdivsh_v8hf_mask_round (__A, __B,
> -                                               _mm_setzero_ph (),
> -                                               (__mmask8) -1, __C);
> +  return __builtin_ia32_divsh_mask_round (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -656,7 +656,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                        __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vdivsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m128h
> @@ -664,62 +664,62 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                         const int __D)
>  {
> -  return __builtin_ia32_vdivsh_v8hf_mask_round (__B, __C,
> -                                               _mm_setzero_ph (),
> -                                               __A, __D);
> +  return __builtin_ia32_divsh_mask_round (__B, __C,
> +                                         _mm_setzero_ph (),
> +                                         __A, __D);
>  }
>  #else
>  #define _mm_add_round_sh(A, B, C)                                      \
> -  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((A), (B),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (__mmask8)-1, (C)))
> +  ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B),                 \
> +                                            _mm_setzero_ph (),         \
> +                                            (__mmask8)-1, (C)))
>
>  #define _mm_mask_add_round_sh(A, B, C, D, E)                           \
> -  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> +  ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm_maskz_add_round_sh(A, B, C, D)                             \
> -  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((B), (C),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (A), (D)))
> +#define _mm_maskz_add_round_sh(A, B, C, D)                     \
> +  ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C),         \
> +                                            _mm_setzero_ph (), \
> +                                            (A), (D)))
>
>  #define _mm_sub_round_sh(A, B, C)                                      \
> -  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((A), (B),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (__mmask8)-1, (C)))
> +  ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B),                 \
> +                                            _mm_setzero_ph (),         \
> +                                            (__mmask8)-1, (C)))
>
>  #define _mm_mask_sub_round_sh(A, B, C, D, E)                           \
> -  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> +  ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm_maskz_sub_round_sh(A, B, C, D)                             \
> -  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((B), (C),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (A), (D)))
> +#define _mm_maskz_sub_round_sh(A, B, C, D)                     \
> +  ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C),         \
> +                                            _mm_setzero_ph (), \
> +                                            (A), (D)))
>
>  #define _mm_mul_round_sh(A, B, C)                                      \
> -  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((A), (B),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (__mmask8)-1, (C)))
> +  ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B),                 \
> +                                            _mm_setzero_ph (),         \
> +                                            (__mmask8)-1, (C)))
>
>  #define _mm_mask_mul_round_sh(A, B, C, D, E)                           \
> -  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> +  ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm_maskz_mul_round_sh(A, B, C, D)                             \
> -  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((B), (C),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (A), (D)))
> +#define _mm_maskz_mul_round_sh(A, B, C, D)                     \
> +  ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C),         \
> +                                            _mm_setzero_ph (), \
> +                                            (A), (D)))
>
>  #define _mm_div_round_sh(A, B, C)                                      \
> -  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((A), (B),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (__mmask8)-1, (C)))
> +  ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B),                 \
> +                                            _mm_setzero_ph (),         \
> +                                            (__mmask8)-1, (C)))
>
>  #define _mm_mask_div_round_sh(A, B, C, D, E)                           \
> -  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> +  ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm_maskz_div_round_sh(A, B, C, D)                             \
> -  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((B), (C),           \
> -                                                  _mm_setzero_ph (),   \
> -                                                  (A), (D)))
> +#define _mm_maskz_div_round_sh(A, B, C, D)                     \
> +  ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C),         \
> +                                            _mm_setzero_ph (), \
> +                                            (A), (D)))
>  #endif /* __OPTIMIZE__ */
>
>  /* Intrinsic vmaxph vminph.  */
> @@ -727,48 +727,48 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_max_ph (__m512h __A, __m512h __B)
>  {
> -  return __builtin_ia32_vmaxph_v32hf_mask (__A, __B,
> -                                          _mm512_setzero_ph (),
> -                                          (__mmask32) -1);
> +  return __builtin_ia32_maxph512_mask (__A, __B,
> +                                      _mm512_setzero_ph (),
> +                                      (__mmask32) -1);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
>  {
> -  return __builtin_ia32_vmaxph_v32hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
>  {
> -  return __builtin_ia32_vmaxph_v32hf_mask (__B, __C,
> -                                          _mm512_setzero_ph (), __A);
> +  return __builtin_ia32_maxph512_mask (__B, __C,
> +                                      _mm512_setzero_ph (), __A);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_min_ph (__m512h __A, __m512h __B)
>  {
> -  return __builtin_ia32_vminph_v32hf_mask (__A, __B,
> -                                          _mm512_setzero_ph (),
> -                                          (__mmask32) -1);
> +  return __builtin_ia32_minph512_mask (__A, __B,
> +                                      _mm512_setzero_ph (),
> +                                      (__mmask32) -1);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
>  {
> -  return __builtin_ia32_vminph_v32hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
>  {
> -  return __builtin_ia32_vminph_v32hf_mask (__B, __C,
> -                                          _mm512_setzero_ph (), __A);
> +  return __builtin_ia32_minph512_mask (__B, __C,
> +                                      _mm512_setzero_ph (), __A);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -776,9 +776,9 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vmaxph_v32hf_mask_round (__A, __B,
> -                                                _mm512_setzero_ph (),
> -                                                (__mmask32) -1, __C);
> +  return __builtin_ia32_maxph512_mask_round (__A, __B,
> +                                            _mm512_setzero_ph (),
> +                                            (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -786,7 +786,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                           __m512h __D, const int __E)
>  {
> -  return __builtin_ia32_vmaxph_v32hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m512h
> @@ -794,18 +794,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vmaxph_v32hf_mask_round (__B, __C,
> -                                                _mm512_setzero_ph (),
> -                                                __A, __D);
> +  return __builtin_ia32_maxph512_mask_round (__B, __C,
> +                                            _mm512_setzero_ph (),
> +                                            __A, __D);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vminph_v32hf_mask_round (__A, __B,
> -                                                _mm512_setzero_ph (),
> -                                                (__mmask32) -1, __C);
> +  return __builtin_ia32_minph512_mask_round (__A, __B,
> +                                            _mm512_setzero_ph (),
> +                                            (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -813,7 +813,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                           __m512h __D, const int __E)
>  {
> -  return __builtin_ia32_vminph_v32hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m512h
> @@ -821,37 +821,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vminph_v32hf_mask_round (__B, __C,
> -                                                _mm512_setzero_ph (),
> -                                                __A, __D);
> +  return __builtin_ia32_minph512_mask_round (__B, __C,
> +                                            _mm512_setzero_ph (),
> +                                            __A, __D);
>  }
>
>  #else
> -#define _mm512_max_round_ph(A, B, C)                                   \
> -  (__builtin_ia32_vmaxph_v32hf_mask_round ((A), (B),                   \
> -                                          _mm512_setzero_ph (),        \
> -                                          (__mmask32)-1, (C)))
> +#define _mm512_max_round_ph(A, B, C)                           \
> +  (__builtin_ia32_maxph512_mask_round ((A), (B),               \
> +                                      _mm512_setzero_ph (),    \
> +                                      (__mmask32)-1, (C)))
>
>  #define _mm512_mask_max_round_ph(A, B, C, D, E)                                \
> -  (__builtin_ia32_vmaxph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> +  (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm512_maskz_max_round_ph(A, B, C, D)                          \
> -  (__builtin_ia32_vmaxph_v32hf_mask_round ((B), (C),                   \
> -                                          _mm512_setzero_ph (),        \
> -                                          (A), (D)))
> +#define _mm512_maskz_max_round_ph(A, B, C, D)                  \
> +  (__builtin_ia32_maxph512_mask_round ((B), (C),               \
> +                                      _mm512_setzero_ph (),    \
> +                                      (A), (D)))
>
> -#define _mm512_min_round_ph(A, B, C)                                   \
> -  (__builtin_ia32_vminph_v32hf_mask_round ((A), (B),                   \
> -                                          _mm512_setzero_ph (),        \
> -                                          (__mmask32)-1, (C)))
> +#define _mm512_min_round_ph(A, B, C)                           \
> +  (__builtin_ia32_minph512_mask_round ((A), (B),               \
> +                                      _mm512_setzero_ph (),    \
> +                                      (__mmask32)-1, (C)))
>
>  #define _mm512_mask_min_round_ph(A, B, C, D, E)                                \
> -  (__builtin_ia32_vminph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> +  (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm512_maskz_min_round_ph(A, B, C, D)                          \
> -  (__builtin_ia32_vminph_v32hf_mask_round ((B), (C),                   \
> -                                          _mm512_setzero_ph (),        \
> -                                          (A), (D)))
> +#define _mm512_maskz_min_round_ph(A, B, C, D)                  \
> +  (__builtin_ia32_minph512_mask_round ((B), (C),               \
> +                                      _mm512_setzero_ph (),    \
> +                                      (A), (D)))
>  #endif /* __OPTIMIZE__ */
>
>  /* Intrinsic vmaxsh vminsh.  */
> @@ -867,15 +867,15 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vmaxsh_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vmaxsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
> +                                   __A);
>  }
>
>  extern __inline __m128h
> @@ -890,15 +890,15 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vminsh_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vminsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
> +                                   __A);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -906,9 +906,9 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vmaxsh_v8hf_mask_round (__A, __B,
> -                                               _mm_setzero_ph (),
> -                                               (__mmask8) -1, __C);
> +  return __builtin_ia32_maxsh_mask_round (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -916,7 +916,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                        __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vmaxsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m128h
> @@ -924,18 +924,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                         const int __D)
>  {
> -  return __builtin_ia32_vmaxsh_v8hf_mask_round (__B, __C,
> -                                               _mm_setzero_ph (),
> -                                               __A, __D);
> +  return __builtin_ia32_maxsh_mask_round (__B, __C,
> +                                         _mm_setzero_ph (),
> +                                         __A, __D);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vminsh_v8hf_mask_round (__A, __B,
> -                                               _mm_setzero_ph (),
> -                                               (__mmask8) -1, __C);
> +  return __builtin_ia32_minsh_mask_round (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -943,7 +943,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                        __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vminsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> +  return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
>  }
>
>  extern __inline __m128h
> @@ -951,37 +951,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                         const int __D)
>  {
> -  return __builtin_ia32_vminsh_v8hf_mask_round (__B, __C,
> -                                               _mm_setzero_ph (),
> -                                               __A, __D);
> +  return __builtin_ia32_minsh_mask_round (__B, __C,
> +                                         _mm_setzero_ph (),
> +                                         __A, __D);
>  }
>
>  #else
> -#define _mm_max_round_sh(A, B, C)                                      \
> -  (__builtin_ia32_vmaxsh_v8hf_mask_round ((A), (B),                    \
> -                                         _mm_setzero_ph (),            \
> -                                         (__mmask8)-1, (C)))
> +#define _mm_max_round_sh(A, B, C)                      \
> +  (__builtin_ia32_maxsh_mask_round ((A), (B),          \
> +                                   _mm_setzero_ph (),  \
> +                                   (__mmask8)-1, (C)))
>
> -#define _mm_mask_max_round_sh(A, B, C, D, E)                           \
> -  (__builtin_ia32_vmaxsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> +#define _mm_mask_max_round_sh(A, B, C, D, E)                   \
> +  (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm_maskz_max_round_sh(A, B, C, D)                             \
> -  (__builtin_ia32_vmaxsh_v8hf_mask_round ((B), (C),                    \
> -                                         _mm_setzero_ph (),            \
> -                                         (A), (D)))
> +#define _mm_maskz_max_round_sh(A, B, C, D)             \
> +  (__builtin_ia32_maxsh_mask_round ((B), (C),          \
> +                                   _mm_setzero_ph (),  \
> +                                   (A), (D)))
>
> -#define _mm_min_round_sh(A, B, C)                                      \
> -  (__builtin_ia32_vminsh_v8hf_mask_round ((A), (B),                    \
> -                                         _mm_setzero_ph (),            \
> -                                         (__mmask8)-1, (C)))
> +#define _mm_min_round_sh(A, B, C)                      \
> +  (__builtin_ia32_minsh_mask_round ((A), (B),          \
> +                                   _mm_setzero_ph (),  \
> +                                   (__mmask8)-1, (C)))
>
> -#define _mm_mask_min_round_sh(A, B, C, D, E)                           \
> -  (__builtin_ia32_vminsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> +#define _mm_mask_min_round_sh(A, B, C, D, E)                   \
> +  (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm_maskz_min_round_sh(A, B, C, D)                             \
> -  (__builtin_ia32_vminsh_v8hf_mask_round ((B), (C),                    \
> -                                         _mm_setzero_ph (),            \
> -                                         (A), (D)))
> +#define _mm_maskz_min_round_sh(A, B, C, D)             \
> +  (__builtin_ia32_minsh_mask_round ((B), (C),          \
> +                                   _mm_setzero_ph (),  \
> +                                   (A), (D)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -991,8 +991,8 @@ extern __inline __mmask32
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
>  {
> -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__A, __B, __C,
> -                                                      (__mmask32) -1);
> +  return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
> +                                                  (__mmask32) -1);
>  }
>
>  extern __inline __mmask32
> @@ -1000,8 +1000,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
>                          const int __D)
>  {
> -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__B, __C, __D,
> -                                                      __A);
> +  return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
> +                                                  __A);
>  }
>
>  extern __inline __mmask32
> @@ -1009,9 +1009,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
>                           const int __D)
>  {
> -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__A, __B,
> -                                                            __C, (__mmask32) -1,
> -                                                            __D);
> +  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
> +                                                        __C, (__mmask32) -1,
> +                                                        __D);
>  }
>
>  extern __inline __mmask32
> @@ -1019,23 +1019,23 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
>                                const int __D, const int __E)
>  {
> -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__B, __C,
> -                                                            __D, __A,
> -                                                            __E);
> +  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
> +                                                        __D, __A,
> +                                                        __E);
>  }
>
>  #else
>  #define _mm512_cmp_ph_mask(A, B, C)                    \
> -  (__builtin_ia32_vcmpph_v32hf_mask ((A), (B), (C), (-1)))
> +  (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
>
>  #define _mm512_mask_cmp_ph_mask(A, B, C, D)            \
> -  (__builtin_ia32_vcmpph_v32hf_mask ((B), (C), (D), (A)))
> +  (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
>
> -#define _mm512_cmp_round_ph_mask(A, B, C, D)           \
> -  (__builtin_ia32_vcmpph_v32hf_mask_round ((A), (B), (C), (-1), (D)))
> +#define _mm512_cmp_round_ph_mask(A, B, C, D)                           \
> +  (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
>
> -#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)   \
> -  (__builtin_ia32_vcmpph_v32hf_mask_round ((B), (C), (D), (A), (E)))
> +#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)                   \
> +  (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -1046,9 +1046,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
>  {
>    return (__mmask8)
> -    __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B,
> -                                          __C, (__mmask8) -1,
> -                                          _MM_FROUND_CUR_DIRECTION);
> +    __builtin_ia32_cmpsh_mask_round (__A, __B,
> +                                    __C, (__mmask8) -1,
> +                                    _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __mmask8
> @@ -1057,9 +1057,9 @@ _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
>                       const int __D)
>  {
>    return (__mmask8)
> -    __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C,
> -                                          __D, __A,
> -                                          _MM_FROUND_CUR_DIRECTION);
> +    __builtin_ia32_cmpsh_mask_round (__B, __C,
> +                                    __D, __A,
> +                                    _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __mmask8
> @@ -1067,9 +1067,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
>                        const int __D)
>  {
> -  return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B,
> -                                                          __C, (__mmask8) -1,
> -                                                          __D);
> +  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
> +                                                    __C, (__mmask8) -1,
> +                                                    __D);
>  }
>
>  extern __inline __mmask8
> @@ -1077,25 +1077,25 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
>                             const int __D, const int __E)
>  {
> -  return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C,
> -                                                          __D, __A,
> -                                                          __E);
> +  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
> +                                                    __D, __A,
> +                                                    __E);
>  }
>
>  #else
> -#define _mm_cmp_sh_mask(A, B, C)               \
> -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), \
> -                                         (_MM_FROUND_CUR_DIRECTION)))
> +#define _mm_cmp_sh_mask(A, B, C)                                       \
> +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1),               \
> +                                   (_MM_FROUND_CUR_DIRECTION)))
>
> -#define _mm_mask_cmp_sh_mask(A, B, C, D)       \
> -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A),          \
> -                                         (_MM_FROUND_CUR_DIRECTION)))
> +#define _mm_mask_cmp_sh_mask(A, B, C, D)                               \
> +  (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A),                        \
> +                                   (_MM_FROUND_CUR_DIRECTION)))
>
> -#define _mm_cmp_round_sh_mask(A, B, C, D)                              \
> -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), (D)))
> +#define _mm_cmp_round_sh_mask(A, B, C, D)                      \
> +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
>
> -#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)      \
> -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A), (E)))
> +#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)              \
> +  (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -1104,134 +1104,134 @@ extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_comieq_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OS,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_comilt_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OS,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_comile_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OS,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_comigt_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OS,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_comige_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OS,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_comineq_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_US,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ucomieq_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OQ,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ucomilt_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OQ,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ucomile_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OQ,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ucomigt_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OQ,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ucomige_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OQ,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ucomineq_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_UQ,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  #ifdef __OPTIMIZE__
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -  _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
> +_mm_comi_sh (__m128h __A, __m128h __B, const int __P)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P,
> -                                               (__mmask8) -1,
> -                                               _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
> +                                         (__mmask8) -1,
> +                                         _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline int
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
>  {
> -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P,
> -                                               (__mmask8) -1,__R);
> +  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
> +                                         (__mmask8) -1,__R);
>  }
>
>  #else
> -#define _mm_comi_round_sh(A, B, P, R)          \
> -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
> -#define _mm_comi_sh(A, B, P)           \
> -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), \
> -                                         _MM_FROUND_CUR_DIRECTION))
> +#define _mm_comi_round_sh(A, B, P, R)                                  \
> +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
> +#define _mm_comi_sh(A, B, P)                                           \
> +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1),    \
> +                                   _MM_FROUND_CUR_DIRECTION))
>
>  #endif /* __OPTIMIZE__  */
>
> @@ -1240,28 +1240,28 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_sqrt_ph (__m512h __A)
>  {
> -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__A,
> -                                                 _mm512_setzero_ph(),
> -                                                 (__mmask32) -1,
> -                                                 _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_sqrtph512_mask_round (__A,
> +                                             _mm512_setzero_ph(),
> +                                             (__mmask32) -1,
> +                                             _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
>  {
> -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__C, __A, __B,
> -                                                 _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
> +                                             _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
>  {
> -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__B,
> -                                                 _mm512_setzero_ph (),
> -                                                 __A,
> -                                                 _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_sqrtph512_mask_round (__B,
> +                                             _mm512_setzero_ph (),
> +                                             __A,
> +                                             _MM_FROUND_CUR_DIRECTION);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -1269,9 +1269,9 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_sqrt_round_ph (__m512h __A, const int __B)
>  {
> -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__A,
> -                                                 _mm512_setzero_ph(),
> -                                                 (__mmask32) -1, __B);
> +  return __builtin_ia32_sqrtph512_mask_round (__A,
> +                                             _mm512_setzero_ph(),
> +                                             (__mmask32) -1, __B);
>  }
>
>  extern __inline __m512h
> @@ -1279,31 +1279,31 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__C, __A, __B, __D);
> +  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__B,
> -                                                 _mm512_setzero_ph (),
> -                                                 __A, __C);
> +  return __builtin_ia32_sqrtph512_mask_round (__B,
> +                                             _mm512_setzero_ph (),
> +                                             __A, __C);
>  }
>
>  #else
> -#define _mm512_sqrt_round_ph(A, B)                                     \
> -  (__builtin_ia32_vsqrtph_v32hf_mask_round ((A),                       \
> -                                           _mm512_setzero_ph (),       \
> -                                           (__mmask32)-1, (B)))
> +#define _mm512_sqrt_round_ph(A, B)                             \
> +  (__builtin_ia32_sqrtph512_mask_round ((A),                   \
> +                                       _mm512_setzero_ph (),   \
> +                                       (__mmask32)-1, (B)))
>
> -#define _mm512_mask_sqrt_round_ph(A, B, C, D)                          \
> -  (__builtin_ia32_vsqrtph_v32hf_mask_round ((C), (A), (B), (D)))
> +#define _mm512_mask_sqrt_round_ph(A, B, C, D)                  \
> +  (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
>
> -#define _mm512_maskz_sqrt_round_ph(A, B, C)                            \
> -  (__builtin_ia32_vsqrtph_v32hf_mask_round ((B),                       \
> -                                           _mm512_setzero_ph (),       \
> -                                           (A), (C)))
> +#define _mm512_maskz_sqrt_round_ph(A, B, C)                    \
> +  (__builtin_ia32_sqrtph512_mask_round ((B),                   \
> +                                       _mm512_setzero_ph (),   \
> +                                       (A), (C)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -1312,23 +1312,23 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_rsqrt_ph (__m512h __A)
>  {
> -  return __builtin_ia32_vrsqrtph_v32hf_mask (__A, _mm512_setzero_ph (),
> -                                            (__mmask32) -1);
> +  return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
> +                                        (__mmask32) -1);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
>  {
> -  return __builtin_ia32_vrsqrtph_v32hf_mask (__C, __A, __B);
> +  return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
>  {
> -  return __builtin_ia32_vrsqrtph_v32hf_mask (__B, _mm512_setzero_ph (),
> -                                            __A);
> +  return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
> +                                        __A);
>  }
>
>  /* Intrinsics vrsqrtsh.  */
> @@ -1336,23 +1336,23 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_rsqrt_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vrsqrtsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
> -                                           (__mmask8) -1);
> +  return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
> +                                     (__mmask8) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vrsqrtsh_v8hf_mask (__D, __C, __A, __B);
> +  return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vrsqrtsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
> -                                           __A);
> +  return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
> +                                     __A);
>  }
>
>  /* Intrinsics vsqrtsh.  */
> @@ -1360,27 +1360,27 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_sqrt_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__B, __A,
> -                                                _mm_setzero_ph (),
> -                                                (__mmask8) -1,
> -                                                _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
> +                                          _mm_setzero_ph (),
> +                                          (__mmask8) -1,
> +                                          _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__D, __C, __A, __B,
> -                                                _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
> +                                          _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__C, __B,
> -                                                _mm_setzero_ph (),
> -                                                __A, _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
> +                                          _mm_setzero_ph (),
> +                                          __A, _MM_FROUND_CUR_DIRECTION);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -1388,9 +1388,9 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__B, __A,
> -                                                _mm_setzero_ph (),
> -                                                (__mmask8) -1, __C);
> +  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
> +                                          _mm_setzero_ph (),
> +                                          (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -1398,8 +1398,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                         __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__D, __C, __A, __B,
> -                                                __E);
> +  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
> +                                          __E);
>  }
>
>  extern __inline __m128h
> @@ -1407,24 +1407,24 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                          const int __D)
>  {
> -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__C, __B,
> -                                                _mm_setzero_ph (),
> -                                                __A, __D);
> +  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
> +                                          _mm_setzero_ph (),
> +                                          __A, __D);
>  }
>
>  #else
>  #define _mm_sqrt_round_sh(A, B, C)                             \
> -  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((B), (A),           \
> -                                          _mm_setzero_ph (),   \
> -                                          (__mmask8)-1, (C)))
> +  (__builtin_ia32_sqrtsh_mask_round ((B), (A),                 \
> +                                    _mm_setzero_ph (),         \
> +                                    (__mmask8)-1, (C)))
>
>  #define _mm_mask_sqrt_round_sh(A, B, C, D, E)                  \
> -  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((D), (C), (A), (B), (E)))
> +  (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
>
> -#define _mm_maskz_sqrt_round_sh(A, B, C, D)                    \
> -  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((C), (B),           \
> -                                          _mm_setzero_ph (),   \
> -                                          (A), (D)))
> +#define _mm_maskz_sqrt_round_sh(A, B, C, D)            \
> +  (__builtin_ia32_sqrtsh_mask_round ((C), (B),         \
> +                                    _mm_setzero_ph (), \
> +                                    (A), (D)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -1433,23 +1433,23 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_rcp_ph (__m512h __A)
>  {
> -  return __builtin_ia32_vrcpph_v32hf_mask (__A, _mm512_setzero_ph (),
> -                                          (__mmask32) -1);
> +  return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
> +                                      (__mmask32) -1);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
>  {
> -  return __builtin_ia32_vrcpph_v32hf_mask (__C, __A, __B);
> +  return __builtin_ia32_rcpph512_mask (__C, __A, __B);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
>  {
> -  return __builtin_ia32_vrcpph_v32hf_mask (__B, _mm512_setzero_ph (),
> -                                          __A);
> +  return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
> +                                      __A);
>  }
>
>  /* Intrinsics vrcpsh.  */
> @@ -1457,23 +1457,23 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_rcp_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vrcpsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
> -                                         (__mmask8) -1);
> +  return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
> +                                   (__mmask8) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vrcpsh_v8hf_mask (__D, __C, __A, __B);
> +  return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vrcpsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
> +                                   __A);
>  }
>
>  /* Intrinsics vscalefph.  */
> @@ -1481,28 +1481,28 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_scalef_ph (__m512h __A, __m512h __B)
>  {
> -  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
> -                                                   _mm512_setzero_ph (),
> -                                                   (__mmask32) -1,
> -                                                   _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_scalefph512_mask_round (__A, __B,
> +                                               _mm512_setzero_ph (),
> +                                               (__mmask32) -1,
> +                                               _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
>  {
> -  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
> -                                                   _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
> +                                               _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
>  {
> -  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
> -                                                   _mm512_setzero_ph (),
> -                                                   __A,
> -                                                   _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_scalefph512_mask_round (__B, __C,
> +                                               _mm512_setzero_ph (),
> +                                               __A,
> +                                               _MM_FROUND_CUR_DIRECTION);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -1510,9 +1510,9 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
>  {
> -  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
> -                                                   _mm512_setzero_ph (),
> -                                                   (__mmask32) -1, __C);
> +  return __builtin_ia32_scalefph512_mask_round (__A, __B,
> +                                               _mm512_setzero_ph (),
> +                                               (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -1520,8 +1520,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                              __m512h __D, const int __E)
>  {
> -  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
> -                                                   __E);
> +  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
> +                                               __E);
>  }
>
>  extern __inline __m512h
> @@ -1529,24 +1529,24 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
>                               const int __D)
>  {
> -  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
> -                                                   _mm512_setzero_ph (),
> -                                                   __A, __D);
> +  return __builtin_ia32_scalefph512_mask_round (__B, __C,
> +                                               _mm512_setzero_ph (),
> +                                               __A, __D);
>  }
>
>  #else
> -#define _mm512_scalef_round_ph(A, B, C)                                        \
> -  (__builtin_ia32_vscalefph_v32hf_mask_round ((A), (B),                        \
> -                                             _mm512_setzero_ph (),     \
> -                                             (__mmask32)-1, (C)))
> +#define _mm512_scalef_round_ph(A, B, C)                                \
> +  (__builtin_ia32_scalefph512_mask_round ((A), (B),            \
> +                                         _mm512_setzero_ph (), \
> +                                         (__mmask32)-1, (C)))
>
>  #define _mm512_mask_scalef_round_ph(A, B, C, D, E)                     \
> -  (__builtin_ia32_vscalefph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> +  (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm512_maskz_scalef_round_ph(A, B, C, D)                       \
> -  (__builtin_ia32_vscalefph_v32hf_mask_round ((B), (C),                        \
> -                                             _mm512_setzero_ph (),     \
> -                                             (A), (D)))
> +#define _mm512_maskz_scalef_round_ph(A, B, C, D)               \
> +  (__builtin_ia32_scalefph512_mask_round ((B), (C),            \
> +                                         _mm512_setzero_ph (), \
> +                                         (A), (D)))
>
>  #endif  /* __OPTIMIZE__ */
>
> @@ -1555,28 +1555,28 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_scalef_sh (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
> -                                                  _mm_setzero_ph (),
> -                                                  (__mmask8) -1,
> -                                                  _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_scalefsh_mask_round (__A, __B,
> +                                            _mm_setzero_ph (),
> +                                            (__mmask8) -1,
> +                                            _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
> -                                                  _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
> +                                            _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
> -                                                  _mm_setzero_ph (),
> -                                                  __A,
> -                                                  _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_scalefsh_mask_round (__B, __C,
> +                                            _mm_setzero_ph (),
> +                                            __A,
> +                                            _MM_FROUND_CUR_DIRECTION);
>  }
>
>  #ifdef __OPTIMIZE__
> @@ -1584,9 +1584,9 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
>  {
> -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
> -                                                  _mm_setzero_ph (),
> -                                                  (__mmask8) -1, __C);
> +  return __builtin_ia32_scalefsh_mask_round (__A, __B,
> +                                            _mm_setzero_ph (),
> +                                            (__mmask8) -1, __C);
>  }
>
>  extern __inline __m128h
> @@ -1594,8 +1594,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                           __m128h __D, const int __E)
>  {
> -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
> -                                                  __E);
> +  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
> +                                            __E);
>  }
>
>  extern __inline __m128h
> @@ -1603,23 +1603,23 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                            const int __D)
>  {
> -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
> -                                                  _mm_setzero_ph (),
> -                                                  __A, __D);
> +  return __builtin_ia32_scalefsh_mask_round (__B, __C,
> +                                            _mm_setzero_ph (),
> +                                            __A, __D);
>  }
>
>  #else
> -#define _mm_scalef_round_sh(A, B, C)                                     \
> -  (__builtin_ia32_vscalefsh_v8hf_mask_round ((A), (B),                   \
> -                                            _mm_setzero_ph (),           \
> -                                            (__mmask8)-1, (C)))
> +#define _mm_scalef_round_sh(A, B, C)                           \
> +  (__builtin_ia32_scalefsh_mask_round ((A), (B),               \
> +                                      _mm_setzero_ph (),       \
> +                                      (__mmask8)-1, (C)))
>
> -#define _mm_mask_scalef_round_sh(A, B, C, D, E)                                  \
> -  (__builtin_ia32_vscalefsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> +#define _mm_mask_scalef_round_sh(A, B, C, D, E)                                \
> +  (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm_maskz_scalef_round_sh(A, B, C, D)                            \
> -  (__builtin_ia32_vscalefsh_v8hf_mask_round ((B), (C), _mm_setzero_ph (), \
> -                                            (A), (D)))
> +#define _mm_maskz_scalef_round_sh(A, B, C, D)                          \
> +  (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (),    \
> +                                      (A), (D)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -1629,37 +1629,37 @@ extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_reduce_ph (__m512h __A, int __B)
>  {
> -  return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
> -                                                   _mm512_setzero_ph (),
> -                                                   (__mmask32) -1,
> -                                                   _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_reduceph512_mask_round (__A, __B,
> +                                               _mm512_setzero_ph (),
> +                                               (__mmask32) -1,
> +                                               _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
>  {
> -  return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
> -                                                   _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
> +                                               _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
>  {
> -  return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
> -                                                   _mm512_setzero_ph (),
> -                                                   __A,
> -                                                   _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_reduceph512_mask_round (__B, __C,
> +                                               _mm512_setzero_ph (),
> +                                               __A,
> +                                               _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
>  {
> -  return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
> -                                                   _mm512_setzero_ph (),
> -                                                   (__mmask32) -1, __C);
> +  return __builtin_ia32_reduceph512_mask_round (__A, __B,
> +                                               _mm512_setzero_ph (),
> +                                               (__mmask32) -1, __C);
>  }
>
>  extern __inline __m512h
> @@ -1667,8 +1667,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
>                              int __D, const int __E)
>  {
> -  return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
> -                                                   __E);
> +  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
> +                                               __E);
>  }
>
>  extern __inline __m512h
> @@ -1676,39 +1676,39 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
>                               const int __D)
>  {
> -  return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
> -                                                   _mm512_setzero_ph (),
> -                                                   __A, __D);
> +  return __builtin_ia32_reduceph512_mask_round (__B, __C,
> +                                               _mm512_setzero_ph (),
> +                                               __A, __D);
>  }
>
>  #else
>  #define _mm512_reduce_ph(A, B)                                         \
> -  (__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B),                        \
> -                                             _mm512_setzero_ph (),     \
> -                                             (__mmask32)-1,            \
> -                                             _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_reduceph512_mask_round ((A), (B),                    \
> +                                         _mm512_setzero_ph (),         \
> +                                         (__mmask32)-1,                \
> +                                         _MM_FROUND_CUR_DIRECTION))
>
>  #define _mm512_mask_reduce_ph(A, B, C, D)                              \
> -  (__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B),      \
> -                                             _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B),          \
> +                                         _MM_FROUND_CUR_DIRECTION))
>
>  #define _mm512_maskz_reduce_ph(A, B, C)                                        \
> -  (__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C),                        \
> -                                             _mm512_setzero_ph (),     \
> -                                             (A), _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_reduceph512_mask_round ((B), (C),                    \
> +                                         _mm512_setzero_ph (),         \
> +                                         (A), _MM_FROUND_CUR_DIRECTION))
>
> -#define _mm512_reduce_round_ph(A, B, C)                                        \
> -  (__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B),                        \
> -                                             _mm512_setzero_ph (),     \
> -                                             (__mmask32)-1, (C)))
> +#define _mm512_reduce_round_ph(A, B, C)                                \
> +  (__builtin_ia32_reduceph512_mask_round ((A), (B),            \
> +                                         _mm512_setzero_ph (), \
> +                                         (__mmask32)-1, (C)))
>
>  #define _mm512_mask_reduce_round_ph(A, B, C, D, E)                     \
> -  (__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> +  (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm512_maskz_reduce_round_ph(A, B, C, D)                       \
> -  (__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C),                        \
> -                                             _mm512_setzero_ph (),     \
> -                                             (A), (D)))
> +#define _mm512_maskz_reduce_round_ph(A, B, C, D)               \
> +  (__builtin_ia32_reduceph512_mask_round ((B), (C),            \
> +                                         _mm512_setzero_ph (), \
> +                                         (A), (D)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -1718,10 +1718,10 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
>  {
> -  return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
> -                                                  _mm_setzero_ph (),
> -                                                  (__mmask8) -1,
> -                                                  _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
> +                                            _mm_setzero_ph (),
> +                                            (__mmask8) -1,
> +                                            _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
> @@ -1729,26 +1729,26 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                     __m128h __D, int __E)
>  {
> -  return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A, __B,
> -                                                  _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
> +                                            _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
>  {
> -  return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
> -                                                  _mm_setzero_ph (), __A,
> -                                                  _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
> +                                            _mm_setzero_ph (), __A,
> +                                            _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
>  {
> -  return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
> -                                                  _mm_setzero_ph (),
> -                                                  (__mmask8) -1, __D);
> +  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
> +                                            _mm_setzero_ph (),
> +                                            (__mmask8) -1, __D);
>  }
>
>  extern __inline __m128h
> @@ -1756,8 +1756,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                           __m128h __D, int __E, const int __F)
>  {
> -  return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A,
> -                                                  __B, __F);
> +  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
> +                                            __B, __F);
>  }
>
>  extern __inline __m128h
> @@ -1765,81 +1765,81 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                            int __D, const int __E)
>  {
> -  return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
> -                                                  _mm_setzero_ph (),
> -                                                  __A, __E);
> +  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
> +                                            _mm_setzero_ph (),
> +                                            __A, __E);
>  }
>
>  #else
>  #define _mm_reduce_sh(A, B, C)                                         \
> -  (__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C),            \
> -                                            _mm_setzero_ph (), \
> -                                            (__mmask8)-1,              \
> -                                            _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_reducesh_mask_round ((A), (B), (C),                  \
> +                                      _mm_setzero_ph (),               \
> +                                      (__mmask8)-1,                    \
> +                                      _MM_FROUND_CUR_DIRECTION))
>
>  #define _mm_mask_reduce_sh(A, B, C, D, E)                              \
> -  (__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B),  \
> -                                            _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B),                \
> +                                      _MM_FROUND_CUR_DIRECTION))
>
>  #define _mm_maskz_reduce_sh(A, B, C, D)                                        \
> -  (__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D),            \
> -                                            _mm_setzero_ph (), \
> -                                            (A), _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_reducesh_mask_round ((B), (C), (D),                  \
> +                                      _mm_setzero_ph (),               \
> +                                      (A), _MM_FROUND_CUR_DIRECTION))
>
>  #define _mm_reduce_round_sh(A, B, C, D)                                \
> -  (__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C),    \
> -                                            _mm_setzero_ph (), \
> -                                            (__mmask8)-1, (D)))
> +  (__builtin_ia32_reducesh_mask_round ((A), (B), (C),          \
> +                                      _mm_setzero_ph (),       \
> +                                      (__mmask8)-1, (D)))
>
>  #define _mm_mask_reduce_round_sh(A, B, C, D, E, F)                     \
> -  (__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
> +  (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
>
>  #define _mm_maskz_reduce_round_sh(A, B, C, D, E)               \
> -  (__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D),    \
> -                                            _mm_setzero_ph (), \
> -                                            (A), (E)))
> +  (__builtin_ia32_reducesh_mask_round ((B), (C), (D),          \
> +                                      _mm_setzero_ph (),       \
> +                                      (A), (E)))
>
>  #endif /* __OPTIMIZE__ */
>
>  /* Intrinsics vrndscaleph.  */
>  #ifdef __OPTIMIZE__
>  extern __inline __m512h
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_roundscale_ph (__m512h __A, int __B)
>  {
> -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
> -                                                     _mm512_setzero_ph (),
> -                                                     (__mmask32) -1,
> -                                                     _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
> +                                                 _mm512_setzero_ph (),
> +                                                 (__mmask32) -1,
> +                                                 _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
> -                                __m512h __C, int __D)
> +                          __m512h __C, int __D)
>  {
> -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A, __B,
> -                                                     _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
> +                                                 _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
>  {
> -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
> -                                                     _mm512_setzero_ph (),
> -                                                     __A,
> -                                                     _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
> +                                                 _mm512_setzero_ph (),
> +                                                 __A,
> +                                                 _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m512h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
>  {
> -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
> -                                                     _mm512_setzero_ph (),
> -                                                     (__mmask32) -1,
> -                                                     __C);
> +  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
> +                                                 _mm512_setzero_ph (),
> +                                                 (__mmask32) -1,
> +                                                 __C);
>  }
>
>  extern __inline __m512h
> @@ -1847,8 +1847,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
>                                  __m512h __C, int __D, const int __E)
>  {
> -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A,
> -                                                     __B, __E);
> +  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
> +                                                 __B, __E);
>  }
>
>  extern __inline __m512h
> @@ -1856,52 +1856,52 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
>                                   const int __D)
>  {
> -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
> -                                                     _mm512_setzero_ph (),
> -                                                     __A, __D);
> +  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
> +                                                 _mm512_setzero_ph (),
> +                                                 __A, __D);
>  }
>
>  #else
> -#define _mm512_roundscale_ph(A, B) \
> -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B),              \
> -                                               _mm512_setzero_ph (),   \
> -                                               (__mmask32)-1,          \
> -                                               _MM_FROUND_CUR_DIRECTION))
> -
> -#define _mm512_mask_roundscale_ph(A, B, C, D) \
> -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B),    \
> -                                               _MM_FROUND_CUR_DIRECTION))
> -
> -#define _mm512_maskz_roundscale_ph(A, B, C) \
> -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C),              \
> -                                               _mm512_setzero_ph (),   \
> -                                               (A),                    \
> -                                               _MM_FROUND_CUR_DIRECTION))
> -#define _mm512_roundscale_round_ph(A, B, C) \
> -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B),              \
> -                                               _mm512_setzero_ph (),   \
> -                                               (__mmask32)-1, (C)))
> +#define _mm512_roundscale_ph(A, B)                                     \
> +  (__builtin_ia32_rndscaleph512_mask_round ((A), (B),                  \
> +                                           _mm512_setzero_ph (),       \
> +                                           (__mmask32)-1,              \
> +                                           _MM_FROUND_CUR_DIRECTION))
> +
> +#define _mm512_mask_roundscale_ph(A, B, C, D)                          \
> +  (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B),                \
> +                                           _MM_FROUND_CUR_DIRECTION))
> +
> +#define _mm512_maskz_roundscale_ph(A, B, C)                            \
> +  (__builtin_ia32_rndscaleph512_mask_round ((B), (C),                  \
> +                                           _mm512_setzero_ph (),       \
> +                                           (A),                        \
> +                                           _MM_FROUND_CUR_DIRECTION))
> +#define _mm512_roundscale_round_ph(A, B, C)                            \
> +  (__builtin_ia32_rndscaleph512_mask_round ((A), (B),                  \
> +                                           _mm512_setzero_ph (),       \
> +                                           (__mmask32)-1, (C)))
>
>  #define _mm512_mask_roundscale_round_ph(A, B, C, D, E)                 \
> -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> +  (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
>
> -#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
> -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C),              \
> -                                               _mm512_setzero_ph (),   \
> -                                               (A), (D)))
> +#define _mm512_maskz_roundscale_round_ph(A, B, C, D)                   \
> +  (__builtin_ia32_rndscaleph512_mask_round ((B), (C),                  \
> +                                           _mm512_setzero_ph (),       \
> +                                           (A), (D)))
>
>  #endif /* __OPTIMIZE__ */
>
>  /* Intrinsics vrndscalesh.  */
>  #ifdef __OPTIMIZE__
>  extern __inline __m128h
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
>  {
> -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
> -                                                    _mm_setzero_ph (),
> -                                                    (__mmask8) -1,
> -                                                    _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
> +                                              _mm_setzero_ph (),
> +                                              (__mmask8) -1,
> +                                              _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
> @@ -1909,27 +1909,27 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                         __m128h __D, int __E)
>  {
> -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E, __A, __B,
> -                                                    _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
> +                                              _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
>  {
> -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
> -                                                    _mm_setzero_ph (), __A,
> -                                                    _MM_FROUND_CUR_DIRECTION);
> +  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
> +                                              _mm_setzero_ph (), __A,
> +                                              _MM_FROUND_CUR_DIRECTION);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
>  {
> -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
> -                                                    _mm_setzero_ph (),
> -                                                    (__mmask8) -1,
> -                                                    __D);
> +  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
> +                                              _mm_setzero_ph (),
> +                                              (__mmask8) -1,
> +                                              __D);
>  }
>
>  extern __inline __m128h
> @@ -1937,8 +1937,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
>                               __m128h __D, int __E, const int __F)
>  {
> -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E,
> -                                                    __A, __B, __F);
> +  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
> +                                              __A, __B, __F);
>  }
>
>  extern __inline __m128h
> @@ -1946,46 +1946,46 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
>                                int __D, const int __E)
>  {
> -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
> -                                                    _mm_setzero_ph (),
> -                                                    __A, __E);
> +  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
> +                                              _mm_setzero_ph (),
> +                                              __A, __E);
>  }
>
>  #else
>  #define _mm_roundscale_sh(A, B, C)                                     \
> -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C),          \
> -                                              _mm_setzero_ph (),       \
> -                                              (__mmask8)-1, \
> -                                              _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),                        \
> +                                        _mm_setzero_ph (),             \
> +                                        (__mmask8)-1,                  \
> +                                        _MM_FROUND_CUR_DIRECTION))
>
>  #define _mm_mask_roundscale_sh(A, B, C, D, E)                          \
> -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), \
> -                                              _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B),      \
> +                                        _MM_FROUND_CUR_DIRECTION))
>
>  #define _mm_maskz_roundscale_sh(A, B, C, D)                            \
> -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D),          \
> -                                              _mm_setzero_ph (),       \
> -                                              (A), _MM_FROUND_CUR_DIRECTION))
> +  (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),                        \
> +                                        _mm_setzero_ph (),             \
> +                                        (A), _MM_FROUND_CUR_DIRECTION))
>
> -#define _mm_roundscale_round_sh(A, B, C, D)                            \
> -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C),          \
> -                                              _mm_setzero_ph (),       \
> -                                              (__mmask8)-1, (D)))
> +#define _mm_roundscale_round_sh(A, B, C, D)                    \
> +  (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),                \
> +                                        _mm_setzero_ph (),     \
> +                                        (__mmask8)-1, (D)))
>
>  #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F)                 \
> -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
> +  (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
>
> -#define _mm_maskz_roundscale_round_sh(A, B, C, D, E)                   \
> -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D),          \
> -                                              _mm_setzero_ph (),       \
> -                                              (A), (E)))
> +#define _mm_maskz_roundscale_round_sh(A, B, C, D, E)           \
> +  (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),                \
> +                                        _mm_setzero_ph (),     \
> +                                        (A), (E)))
>
>  #endif /* __OPTIMIZE__ */
>
>  /* Intrinsics vfpclasssh.  */
>  #ifdef __OPTIMIZE__
>  extern __inline __mmask8
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_fpclass_sh_mask (__m128h __A, const int __imm)
>  {
>    return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
> @@ -2031,11 +2031,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
>
>  #else
>  #define _mm512_mask_fpclass_ph_mask(u, x, c)                           \
> -  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x),\
> +  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
>                                                  (int) (c),(__mmask8)(u)))
>
>  #define _mm512_fpclass_ph_mask(x, c)                                    \
> -  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x),\
> +  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
>                                                  (int) (c),(__mmask8)-1))
>  #endif /* __OPIMTIZE__ */
>
> @@ -2141,9 +2141,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_getexp_round_ph (__m512h __A, const int __R)
>  {
>    return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
> -                                                  (__v32hf)
> -                                                  _mm512_setzero_ph (),
> -                                                  (__mmask32) -1, __R);
> +                                                   (__v32hf)
> +                                                   _mm512_setzero_ph (),
> +                                                   (__mmask32) -1, __R);
>  }
>
>  extern __inline __m512h
> @@ -2152,8 +2152,8 @@ _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
>                              const int __R)
>  {
>    return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
> -                                                  (__v32hf) __W,
> -                                                  (__mmask32) __U, __R);
> +                                                   (__v32hf) __W,
> +                                                   (__mmask32) __U, __R);
>  }
>
>  extern __inline __m512h
> @@ -2161,37 +2161,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
>  {
>    return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
> -                                                  (__v32hf)
> -                                                  _mm512_setzero_ph (),
> -                                                  (__mmask32) __U, __R);
> +                                                   (__v32hf)
> +                                                   _mm512_setzero_ph (),
> +                                                   (__mmask32) __U, __R);
>  }
>
>  #else
> -#define _mm_getexp_round_sh(A, B, R)                                           \
> -  ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),           \
> -                                              (__v8hf)(__m128h)(B),            \
> -                                              (__v8hf)_mm_setzero_ph(),        \
> +#define _mm_getexp_round_sh(A, B, R)                                   \
> +  ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),   \
> +                                              (__v8hf)(__m128h)(B),    \
> +                                              (__v8hf)_mm_setzero_ph(), \
>                                                (__mmask8)-1, R))
>
> -#define _mm_mask_getexp_round_sh(W, U, A, B, C)                                        \
> +#define _mm_mask_getexp_round_sh(W, U, A, B, C)                        \
>    (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
>
> -#define _mm_maskz_getexp_round_sh(U, A, B, C)                                  \
> -  (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,                            \
> -                                             (__v8hf)_mm_setzero_ph(),         \
> +#define _mm_maskz_getexp_round_sh(U, A, B, C)                          \
> +  (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,                    \
> +                                             (__v8hf)_mm_setzero_ph(), \
>                                               U, C)
>
> -#define _mm512_getexp_round_ph(A, R)                                           \
> -  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),             \
> -  (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
> +#define _mm512_getexp_round_ph(A, R)                                   \
> +  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),     \
> +                                           (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
>
> -#define _mm512_mask_getexp_round_ph(W, U, A, R)                                        \
> -  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),             \
> -  (__v32hf)(__m512h)(W), (__mmask32)(U), R))
> +#define _mm512_mask_getexp_round_ph(W, U, A, R)                                \
> +  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),     \
> +                                           (__v32hf)(__m512h)(W), (__mmask32)(U), R))
>
> -#define _mm512_maskz_getexp_round_ph(U, A, R)                                  \
> -  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),             \
> -  (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
> +#define _mm512_maskz_getexp_round_ph(U, A, R)                          \
> +  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),     \
> +                                           (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
>
>  #endif /* __OPTIMIZE__ */
>
> diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
> index 206d60407fc..e9478792a03 100644
> --- a/gcc/config/i386/avx512fp16vlintrin.h
> +++ b/gcc/config/i386/avx512fp16vlintrin.h
> @@ -53,30 +53,30 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vaddph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
>  {
> -  return __builtin_ia32_vaddph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vaddph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
> +                                      __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
>  {
> -  return __builtin_ia32_vaddph_v16hf_mask (__B, __C,
> -                                          _mm256_setzero_ph (), __A);
> +  return __builtin_ia32_addph256_mask (__B, __C,
> +                                      _mm256_setzero_ph (), __A);
>  }
>
>  extern __inline __m128h
> @@ -97,30 +97,30 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vsubph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
>  {
> -  return __builtin_ia32_vsubph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vsubph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
> +                                      __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
>  {
> -  return __builtin_ia32_vsubph_v16hf_mask (__B, __C,
> -                                          _mm256_setzero_ph (), __A);
> +  return __builtin_ia32_subph256_mask (__B, __C,
> +                                      _mm256_setzero_ph (), __A);
>  }
>
>  extern __inline __m128h
> @@ -141,30 +141,30 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vmulph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
>  {
> -  return __builtin_ia32_vmulph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vmulph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
> +                                      __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
>  {
> -  return __builtin_ia32_vmulph_v16hf_mask (__B, __C,
> -                                          _mm256_setzero_ph (), __A);
> +  return __builtin_ia32_mulph256_mask (__B, __C,
> +                                      _mm256_setzero_ph (), __A);
>  }
>
>  extern __inline __m128h
> @@ -185,30 +185,30 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vdivph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
>  {
> -  return __builtin_ia32_vdivph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vdivph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
> +                                      __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
>  {
> -  return __builtin_ia32_vdivph_v16hf_mask (__B, __C,
> -                                          _mm256_setzero_ph (), __A);
> +  return __builtin_ia32_divph256_mask (__B, __C,
> +                                      _mm256_setzero_ph (), __A);
>  }
>
>  /* Intrinsics v[max,min]ph.  */
> @@ -216,96 +216,96 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_max_ph (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vmaxph_v8hf_mask (__A, __B,
> -                                         _mm_setzero_ph (),
> -                                         (__mmask8) -1);
> +  return __builtin_ia32_maxph128_mask (__A, __B,
> +                                      _mm_setzero_ph (),
> +                                      (__mmask8) -1);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_max_ph (__m256h __A, __m256h __B)
>  {
> -  return __builtin_ia32_vmaxph_v16hf_mask (__A, __B,
> -                                         _mm256_setzero_ph (),
> -                                         (__mmask16) -1);
> +  return __builtin_ia32_maxph256_mask (__A, __B,
> +                                      _mm256_setzero_ph (),
> +                                      (__mmask16) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vmaxph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
>  {
> -  return __builtin_ia32_vmaxph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vmaxph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
> +                                      __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
>  {
> -  return __builtin_ia32_vmaxph_v16hf_mask (__B, __C,
> -                                          _mm256_setzero_ph (), __A);
> +  return __builtin_ia32_maxph256_mask (__B, __C,
> +                                      _mm256_setzero_ph (), __A);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_min_ph (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vminph_v8hf_mask (__A, __B,
> -                                         _mm_setzero_ph (),
> -                                         (__mmask8) -1);
> +  return __builtin_ia32_minph128_mask (__A, __B,
> +                                      _mm_setzero_ph (),
> +                                      (__mmask8) -1);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_min_ph (__m256h __A, __m256h __B)
>  {
> -  return __builtin_ia32_vminph_v16hf_mask (__A, __B,
> -                                         _mm256_setzero_ph (),
> -                                         (__mmask16) -1);
> +  return __builtin_ia32_minph256_mask (__A, __B,
> +                                      _mm256_setzero_ph (),
> +                                      (__mmask16) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vminph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
>  {
> -  return __builtin_ia32_vminph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vminph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> -                                         __A);
> +  return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
> +                                      __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
>  {
> -  return __builtin_ia32_vminph_v16hf_mask (__B, __C,
> -                                          _mm256_setzero_ph (), __A);
> +  return __builtin_ia32_minph256_mask (__B, __C,
> +                                      _mm256_setzero_ph (), __A);
>  }
>
>  /* vcmpph */
> @@ -314,8 +314,8 @@ extern __inline __mmask8
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
>  {
> -  return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__A, __B, __C,
> -                                                    (__mmask8) -1);
> +  return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
> +                                                 (__mmask8) -1);
>  }
>
>  extern __inline __mmask8
> @@ -323,15 +323,15 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
>                       const int __D)
>  {
> -  return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__B, __C, __D, __A);
> +  return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
>  }
>
>  extern __inline __mmask16
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
>  {
> -  return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__A, __B, __C,
> -                                                      (__mmask16) -1);
> +  return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
> +                                                  (__mmask16) -1);
>  }
>
>  extern __inline __mmask16
> @@ -339,22 +339,22 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
>                       const int __D)
>  {
> -  return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__B, __C, __D,
> -                                                      __A);
> +  return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
> +                                                  __A);
>  }
>
>  #else
> -#define _mm_cmp_ph_mask(A, B, C)               \
> -  (__builtin_ia32_vcmpph_v8hf_mask ((A), (B), (C), (-1)))
> +#define _mm_cmp_ph_mask(A, B, C)                       \
> +  (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
>
> -#define _mm_mask_cmp_ph_mask(A, B, C, D)       \
> -  (__builtin_ia32_vcmpph_v8hf_mask ((B), (C), (D), (A)))
> +#define _mm_mask_cmp_ph_mask(A, B, C, D)               \
> +  (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
>
> -#define _mm256_cmp_ph_mask(A, B, C)            \
> -  (__builtin_ia32_vcmpph_v16hf_mask ((A), (B), (C), (-1)))
> +#define _mm256_cmp_ph_mask(A, B, C)                    \
> +  (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
>
> -#define _mm256_mask_cmp_ph_mask(A, B, C, D)    \
> -  (__builtin_ia32_vcmpph_v16hf_mask ((B), (C), (D), (A)))
> +#define _mm256_mask_cmp_ph_mask(A, B, C, D)            \
> +  (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -363,46 +363,46 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_sqrt_ph (__m128h __A)
>  {
> -  return __builtin_ia32_vsqrtph_v8hf_mask (__A, _mm_setzero_ph (),
> -                                          (__mmask8) -1);
> +  return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
> +                                       (__mmask8) -1);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_sqrt_ph (__m256h __A)
>  {
> -  return __builtin_ia32_vsqrtph_v16hf_mask (__A, _mm256_setzero_ph (),
> -                                           (__mmask16) -1);
> +  return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
> +                                       (__mmask16) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
>  {
> -  return __builtin_ia32_vsqrtph_v8hf_mask (__C, __A, __B);
> +  return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
>  {
> -  return __builtin_ia32_vsqrtph_v16hf_mask (__C, __A, __B);
> +  return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
>  {
> -  return __builtin_ia32_vsqrtph_v8hf_mask (__B, _mm_setzero_ph (),
> -                                          __A);
> +  return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
> +                                       __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
>  {
> -  return __builtin_ia32_vsqrtph_v16hf_mask (__B, _mm256_setzero_ph (),
> -                                           __A);
> +  return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
> +                                       __A);
>  }
>
>  /* Intrinsics vrsqrtph.  */
> @@ -410,45 +410,45 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_rsqrt_ph (__m128h __A)
>  {
> -  return __builtin_ia32_vrsqrtph_v8hf_mask (__A, _mm_setzero_ph (),
> -                                           (__mmask8) -1);
> +  return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
> +                                        (__mmask8) -1);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_rsqrt_ph (__m256h __A)
>  {
> -  return __builtin_ia32_vrsqrtph_v16hf_mask (__A, _mm256_setzero_ph (),
> -                                            (__mmask16) -1);
> +  return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
> +                                        (__mmask16) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
>  {
> -  return __builtin_ia32_vrsqrtph_v8hf_mask (__C, __A, __B);
> +  return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
>  {
> -  return __builtin_ia32_vrsqrtph_v16hf_mask (__C, __A, __B);
> +  return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
>  {
> -  return __builtin_ia32_vrsqrtph_v8hf_mask (__B, _mm_setzero_ph (), __A);
> +  return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
>  {
> -  return __builtin_ia32_vrsqrtph_v16hf_mask (__B, _mm256_setzero_ph (),
> -                                            __A);
> +  return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
> +                                        __A);
>  }
>
>  /* Intrinsics vrcpph.  */
> @@ -456,45 +456,45 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_rcp_ph (__m128h __A)
>  {
> -  return __builtin_ia32_vrcpph_v8hf_mask (__A, _mm_setzero_ph (),
> -                                         (__mmask8) -1);
> +  return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
> +                                      (__mmask8) -1);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_rcp_ph (__m256h __A)
>  {
> -  return __builtin_ia32_vrcpph_v16hf_mask (__A, _mm256_setzero_ph (),
> -                                          (__mmask16) -1);
> +  return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
> +                                      (__mmask16) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
>  {
> -  return __builtin_ia32_vrcpph_v8hf_mask (__C, __A, __B);
> +  return __builtin_ia32_rcpph128_mask (__C, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
>  {
> -  return __builtin_ia32_vrcpph_v16hf_mask (__C, __A, __B);
> +  return __builtin_ia32_rcpph256_mask (__C, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
>  {
> -  return __builtin_ia32_vrcpph_v8hf_mask (__B, _mm_setzero_ph (), __A);
> +  return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
>  {
> -  return __builtin_ia32_vrcpph_v16hf_mask (__B, _mm256_setzero_ph (),
> -                                          __A);
> +  return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
> +                                      __A);
>  }
>
>  /* Intrinsics vscalefph.  */
> @@ -502,25 +502,25 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_scalef_ph (__m128h __A, __m128h __B)
>  {
> -  return __builtin_ia32_vscalefph_v8hf_mask (__A, __B,
> -                                            _mm_setzero_ph (),
> -                                            (__mmask8) -1);
> +  return __builtin_ia32_scalefph128_mask (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_scalef_ph (__m256h __A, __m256h __B)
>  {
> -  return __builtin_ia32_vscalefph_v16hf_mask (__A, __B,
> -                                             _mm256_setzero_ph (),
> -                                             (__mmask16) -1);
> +  return __builtin_ia32_scalefph256_mask (__A, __B,
> +                                         _mm256_setzero_ph (),
> +                                         (__mmask16) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
>  {
> -  return __builtin_ia32_vscalefph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
> @@ -528,24 +528,24 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
>                        __m256h __D)
>  {
> -  return __builtin_ia32_vscalefph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
>  {
> -  return __builtin_ia32_vscalefph_v8hf_mask (__B, __C,
> -                                            _mm_setzero_ph (), __A);
> +  return __builtin_ia32_scalefph128_mask (__B, __C,
> +                                         _mm_setzero_ph (), __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
>  {
> -  return __builtin_ia32_vscalefph_v16hf_mask (__B, __C,
> -                                             _mm256_setzero_ph (),
> -                                             __A);
> +  return __builtin_ia32_scalefph256_mask (__B, __C,
> +                                         _mm256_setzero_ph (),
> +                                         __A);
>  }
>
>  /* Intrinsics vreduceph.  */
> @@ -554,109 +554,109 @@ extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_reduce_ph (__m128h __A, int __B)
>  {
> -  return __builtin_ia32_vreduceph_v8hf_mask (__A, __B,
> -                                            _mm_setzero_ph (),
> -                                            (__mmask8) -1);
> +  return __builtin_ia32_reduceph128_mask (__A, __B,
> +                                         _mm_setzero_ph (),
> +                                         (__mmask8) -1);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
>  {
> -  return __builtin_ia32_vreduceph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
>  {
> -  return __builtin_ia32_vreduceph_v8hf_mask (__B, __C,
> -                                            _mm_setzero_ph (), __A);
> +  return __builtin_ia32_reduceph128_mask (__B, __C,
> +                                         _mm_setzero_ph (), __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_reduce_ph (__m256h __A, int __B)
>  {
> -  return __builtin_ia32_vreduceph_v16hf_mask (__A, __B,
> -                                             _mm256_setzero_ph (),
> -                                             (__mmask16) -1);
> +  return __builtin_ia32_reduceph256_mask (__A, __B,
> +                                         _mm256_setzero_ph (),
> +                                         (__mmask16) -1);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
>  {
> -  return __builtin_ia32_vreduceph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
>  {
> -  return __builtin_ia32_vreduceph_v16hf_mask (__B, __C,
> -                                             _mm256_setzero_ph (),
> -                                             __A);
> +  return __builtin_ia32_reduceph256_mask (__B, __C,
> +                                         _mm256_setzero_ph (),
> +                                         __A);
>  }
>
>  #else
> -#define _mm_reduce_ph(A, B)                                    \
> -  (__builtin_ia32_vreduceph_v8hf_mask ((A), (B),\
> -                                      _mm_setzero_ph (),       \
> -                                      ((__mmask8)-1)))
> +#define _mm_reduce_ph(A, B)                            \
> +  (__builtin_ia32_reduceph128_mask ((A), (B),          \
> +                                   _mm_setzero_ph (),  \
> +                                   ((__mmask8)-1)))
>
> -#define _mm_mask_reduce_ph(A,  B,  C, D)               \
> -  (__builtin_ia32_vreduceph_v8hf_mask ((C), (D), (A), (B)))
> +#define _mm_mask_reduce_ph(A,  B,  C, D)                       \
> +  (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
>
> -#define _mm_maskz_reduce_ph(A,  B, C)                          \
> -  (__builtin_ia32_vreduceph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
> +#define _mm_maskz_reduce_ph(A,  B, C)                                  \
> +  (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
>
>  #define _mm256_reduce_ph(A, B)                                 \
> -  (__builtin_ia32_vreduceph_v16hf_mask ((A), (B),\
> -                                       _mm256_setzero_ph (),   \
> -                                       ((__mmask16)-1)))
> +  (__builtin_ia32_reduceph256_mask ((A), (B),                  \
> +                                   _mm256_setzero_ph (),       \
> +                                   ((__mmask16)-1)))
>
> -#define _mm256_mask_reduce_ph(A, B, C, D)              \
> -  (__builtin_ia32_vreduceph_v16hf_mask ((C), (D), (A), (B)))
> +#define _mm256_mask_reduce_ph(A, B, C, D)                      \
> +  (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
>
> -#define _mm256_maskz_reduce_ph(A, B, C)                                \
> -  (__builtin_ia32_vreduceph_v16hf_mask ((B), (C), _mm256_setzero_ph (), (A)))
> +#define _mm256_maskz_reduce_ph(A, B, C)                                        \
> +  (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
>
>  #endif /* __OPTIMIZE__ */
>
>  /* Intrinsics vrndscaleph.  */
>  #ifdef __OPTIMIZE__
> -extern __inline __m128h
> -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_roundscale_ph (__m128h __A, int __B)
> -{
> -  return __builtin_ia32_vrndscaleph_v8hf_mask (__A, __B,
> -                                              _mm_setzero_ph (),
> -                                              (__mmask8) -1);
> -}
> +  extern __inline __m128h
> +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +  _mm_roundscale_ph (__m128h __A, int __B)
> +  {
> +    return __builtin_ia32_rndscaleph128_mask (__A, __B,
> +                                             _mm_setzero_ph (),
> +                                             (__mmask8) -1);
> +  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
>  {
> -  return __builtin_ia32_vrndscaleph_v8hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m128h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
>  {
> -  return __builtin_ia32_vrndscaleph_v8hf_mask (__B, __C,
> -                                              _mm_setzero_ph (), __A);
> +  return __builtin_ia32_rndscaleph128_mask (__B, __C,
> +                                           _mm_setzero_ph (), __A);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_roundscale_ph (__m256h __A, int __B)
>  {
> -  return __builtin_ia32_vrndscaleph_v16hf_mask (__A, __B,
> -                                               _mm256_setzero_ph (),
> -                                               (__mmask16) -1);
> +  return __builtin_ia32_rndscaleph256_mask (__A, __B,
> +                                           _mm256_setzero_ph (),
> +                                           (__mmask16) -1);
>  }
>
>  extern __inline __m256h
> @@ -664,40 +664,40 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
>                            int __D)
>  {
> -  return __builtin_ia32_vrndscaleph_v16hf_mask (__C, __D, __A, __B);
> +  return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
>  }
>
>  extern __inline __m256h
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
>  {
> -  return __builtin_ia32_vrndscaleph_v16hf_mask (__B, __C,
> -                                               _mm256_setzero_ph (),
> -                                               __A);
> +  return __builtin_ia32_rndscaleph256_mask (__B, __C,
> +                                           _mm256_setzero_ph (),
> +                                           __A);
>  }
>
>  #else
> -#define _mm_roundscale_ph(A, B) \
> -  (__builtin_ia32_vrndscaleph_v8hf_mask ((A), (B), _mm_setzero_ph (),  \
> -                                        ((__mmask8)-1)))
> +#define _mm_roundscale_ph(A, B)                                                \
> +  (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (),     \
> +                                     ((__mmask8)-1)))
>
> -#define _mm_mask_roundscale_ph(A, B, C, D) \
> -  (__builtin_ia32_vrndscaleph_v8hf_mask ((C), (D), (A), (B)))
> +#define _mm_mask_roundscale_ph(A, B, C, D)                     \
> +  (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
>
> -#define _mm_maskz_roundscale_ph(A, B, C) \
> -  (__builtin_ia32_vrndscaleph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
> +#define _mm_maskz_roundscale_ph(A, B, C)                               \
> +  (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
>
> -#define _mm256_roundscale_ph(A, B) \
> -  (__builtin_ia32_vrndscaleph_v16hf_mask ((A), (B),          \
> -                                        _mm256_setzero_ph(), \
> -                                         ((__mmask16)-1)))
> +#define _mm256_roundscale_ph(A, B)                             \
> +  (__builtin_ia32_rndscaleph256_mask ((A), (B),                        \
> +                                     _mm256_setzero_ph(),      \
> +                                     ((__mmask16)-1)))
>
> -#define _mm256_mask_roundscale_ph(A, B, C, D) \
> -  (__builtin_ia32_vrndscaleph_v16hf_mask ((C), (D), (A), (B)))
> +#define _mm256_mask_roundscale_ph(A, B, C, D)                  \
> +  (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
>
> -#define _mm256_maskz_roundscale_ph(A, B, C) \
> -  (__builtin_ia32_vrndscaleph_v16hf_mask ((B), (C),                    \
> -                                         _mm256_setzero_ph (), (A)))
> +#define _mm256_maskz_roundscale_ph(A, B, C)                            \
> +  (__builtin_ia32_rndscaleph256_mask ((B), (C),                                \
> +                                     _mm256_setzero_ph (), (A)))
>
>  #endif /* __OPTIMIZE__ */
>
> @@ -705,7 +705,7 @@ _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
>  #ifdef __OPTIMIZE__
>  extern __inline __mmask8
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
> +  _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
>  {
>    return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
>                                                       __imm, __U);
> @@ -725,7 +725,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
>  {
>    return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
> -                                                     __imm, __U);
> +                                                      __imm, __U);
>  }
>
>  extern __inline __mmask16
> @@ -733,26 +733,26 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_fpclass_ph_mask (__m256h __A, const int __imm)
>  {
>    return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
> -                                                     __imm,
> -                                                     (__mmask16) -1);
> +                                                      __imm,
> +                                                      (__mmask16) -1);
>  }
>
>  #else
>  #define _mm_fpclass_ph_mask(X, C)                                       \
> -  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),  \
> +  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),        \
>                                                 (int) (C),(__mmask8)-1))
>
>  #define _mm_mask_fpclass_ph_mask(u, X, C)                               \
> -  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),  \
> +  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),        \
>                                                 (int) (C),(__mmask8)(u)))
>
>  #define _mm256_fpclass_ph_mask(X, C)                                    \
> -  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X),  \
> -                                               (int) (C),(__mmask16)-1))
> +  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
> +                                                (int) (C),(__mmask16)-1))
>
>  #define _mm256_mask_fpclass_ph_mask(u, X, C)                           \
> -  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X),  \
> -                                               (int) (C),(__mmask16)(u)))
> +  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
> +                                                (int) (C),(__mmask16)(u)))
>  #endif /* __OPTIMIZE__ */
>
>  /* Intrinsics vgetexpph, vgetexpsh.  */
> @@ -761,9 +761,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_getexp_ph (__m256h __A)
>  {
>    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
> -                                                  (__v16hf)
> -                                                  _mm256_setzero_ph (),
> -                                                  (__mmask16) -1);
> +                                                   (__v16hf)
> +                                                   _mm256_setzero_ph (),
> +                                                   (__mmask16) -1);
>  }
>
>  extern __inline __m256h
> @@ -771,8 +771,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
>  {
>    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
> -                                                  (__v16hf) __W,
> -                                                  (__mmask16) __U);
> +                                                   (__v16hf) __W,
> +                                                   (__mmask16) __U);
>  }
>
>  extern __inline __m256h
> @@ -780,9 +780,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
>  {
>    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
> -                                                  (__v16hf)
> -                                                  _mm256_setzero_ph (),
> -                                                  (__mmask16) __U);
> +                                                   (__v16hf)
> +                                                   _mm256_setzero_ph (),
> +                                                   (__mmask16) __U);
>  }
>
>  extern __inline __m128h
> @@ -790,9 +790,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_getexp_ph (__m128h __A)
>  {
>    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
> -                                                  (__v8hf)
> -                                                  _mm_setzero_ph (),
> -                                                  (__mmask8) -1);
> +                                                   (__v8hf)
> +                                                   _mm_setzero_ph (),
> +                                                   (__mmask8) -1);
>  }
>
>  extern __inline __m128h
> @@ -800,8 +800,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
>  {
>    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
> -                                                  (__v8hf) __W,
> -                                                  (__mmask8) __U);
> +                                                   (__v8hf) __W,
> +                                                   (__mmask8) __U);
>  }
>
>  extern __inline __m128h
> @@ -809,9 +809,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
>  {
>    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
> -                                                  (__v8hf)
> -                                                  _mm_setzero_ph (),
> -                                                  (__mmask8) __U);
> +                                                   (__v8hf)
> +                                                   _mm_setzero_ph (),
> +                                                   (__mmask8) __U);
>  }
>
>
> @@ -892,41 +892,41 @@ _mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
>  }
>
>  #else
> -#define _mm256_getmant_ph(X, B, C)                                              \
> -  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
> -                                        (int)(((C)<<2) | (B)),                 \
> -                                         (__v16hf)(__m256h)_mm256_setzero_ph (),\
> -                                         (__mmask16)-1))
> -
> -#define _mm256_mask_getmant_ph(W, U, X, B, C)                                   \
> -  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
> -                                        (int)(((C)<<2) | (B)),                 \
> -                                         (__v16hf)(__m256h)(W),                 \
> -                                         (__mmask16)(U)))
> -
> -#define _mm256_maskz_getmant_ph(U, X, B, C)                                     \
> -  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
> -                                        (int)(((C)<<2) | (B)),                 \
> -                                         (__v16hf)(__m256h)_mm256_setzero_ph (),\
> -                                         (__mmask16)(U)))
> -
> -#define _mm_getmant_ph(X, B, C)                                                 \
> -  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
> -                                        (int)(((C)<<2) | (B)),                 \
> -                                         (__v8hf)(__m128h)_mm_setzero_ph (),   \
> -                                         (__mmask8)-1))
> -
> -#define _mm_mask_getmant_ph(W, U, X, B, C)                                      \
> -  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
> -                                        (int)(((C)<<2) | (B)),                 \
> -                                         (__v8hf)(__m128h)(W),                 \
> -                                         (__mmask8)(U)))
> -
> -#define _mm_maskz_getmant_ph(U, X, B, C)                                        \
> -  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
> -                                        (int)(((C)<<2) | (B)),                 \
> -                                         (__v8hf)(__m128h)_mm_setzero_ph (),   \
> -                                         (__mmask8)(U)))
> +#define _mm256_getmant_ph(X, B, C)                                     \
> +  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
> +                                              (int)(((C)<<2) | (B)),   \
> +                                              (__v16hf)(__m256h)_mm256_setzero_ph (), \
> +                                              (__mmask16)-1))
> +
> +#define _mm256_mask_getmant_ph(W, U, X, B, C)                          \
> +  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
> +                                              (int)(((C)<<2) | (B)),   \
> +                                              (__v16hf)(__m256h)(W),   \
> +                                              (__mmask16)(U)))
> +
> +#define _mm256_maskz_getmant_ph(U, X, B, C)                            \
> +  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
> +                                              (int)(((C)<<2) | (B)),   \
> +                                              (__v16hf)(__m256h)_mm256_setzero_ph (), \
> +                                              (__mmask16)(U)))
> +
> +#define _mm_getmant_ph(X, B, C)                                                \
> +  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),  \
> +                                              (int)(((C)<<2) | (B)),   \
> +                                              (__v8hf)(__m128h)_mm_setzero_ph (), \
> +                                              (__mmask8)-1))
> +
> +#define _mm_mask_getmant_ph(W, U, X, B, C)                             \
> +  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),  \
> +                                              (int)(((C)<<2) | (B)),   \
> +                                              (__v8hf)(__m128h)(W),    \
> +                                              (__mmask8)(U)))
> +
> +#define _mm_maskz_getmant_ph(U, X, B, C)                               \
> +  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),  \
> +                                              (int)(((C)<<2) | (B)),   \
> +                                              (__v8hf)(__m128h)_mm_setzero_ph (), \
> +                                              (__mmask8)(U)))
>
>  #endif /* __OPTIMIZE__ */
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index bfa1d56a7b8..10f6fd87cbb 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -2775,49 +2775,49 @@ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__b
>  BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
>
>  /* AVX512FP16.  */
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_vaddph_v8hf_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_vaddph_v16hf_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_vaddph_v32hf_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_vsubph_v8hf_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_vsubph_v16hf_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_vsubph_v32hf_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_vmulph_v8hf_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_vmulph_v16hf_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_vmulph_v32hf_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_vdivph_v8hf_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_vdivph_v16hf_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_vdivph_v32hf_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_vaddsh_v8hf_mask", IX86_BUILTIN_VADDSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_vsubsh_v8hf_mask", IX86_BUILTIN_VSUBSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_vmulsh_v8hf_mask", IX86_BUILTIN_VMULSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_vdivsh_v8hf_mask", IX86_BUILTIN_VDIVSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_vmaxph_v8hf_mask", IX86_BUILTIN_VMAXPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_vmaxph_v16hf_mask", IX86_BUILTIN_VMAXPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_vmaxph_v32hf_mask", IX86_BUILTIN_VMAXPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_vminph_v8hf_mask", IX86_BUILTIN_VMINPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_vminph_v16hf_mask", IX86_BUILTIN_VMINPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_vminph_v32hf_mask", IX86_BUILTIN_VMINPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_vmaxsh_v8hf_mask", IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_vminsh_v8hf_mask", IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_vcmpph_v8hf_mask", IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_vcmpph_v16hf_mask", IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_vcmpph_v32hf_mask", IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_vsqrtph_v8hf_mask", IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_vsqrtph_v16hf_mask", IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_vrsqrtph_v8hf_mask", IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_vrsqrtph_v16hf_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_vrsqrtph_v32hf_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_vrsqrtsh_v8hf_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_vrcpph_v8hf_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_vrcpph_v16hf_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_vrcpph_v32hf_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_vrcpsh_v8hf_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_vscalefph_v8hf_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_vscalefph_v16hf_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_vreduceph_v8hf_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_vreduceph_v16hf_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_vrndscaleph_v8hf_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_vrndscaleph_v16hf_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_VADDSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_VSUBSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_VMULSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_VDIVSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_VMAXPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_VMAXPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_VMAXPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_VMINPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_VMINPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_VMINPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_reduceph128_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_reduceph256_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_rndscaleph128_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
>  BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI)
>  BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
>  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
> @@ -3027,28 +3027,28 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "_
>  BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
>
>  /* AVX512FP16.  */
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_vaddph_v32hf_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_vsubph_v32hf_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_vmulph_v32hf_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_vdivph_v32hf_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_vaddsh_v8hf_mask_round", IX86_BUILTIN_VADDSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_vsubsh_v8hf_mask_round", IX86_BUILTIN_VSUBSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_vmulsh_v8hf_mask_round", IX86_BUILTIN_VMULSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_vdivsh_v8hf_mask_round", IX86_BUILTIN_VDIVSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_vmaxph_v32hf_mask_round", IX86_BUILTIN_VMAXPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_vminph_v32hf_mask_round", IX86_BUILTIN_VMINPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_vmaxsh_v8hf_mask_round", IX86_BUILTIN_VMAXSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_vminsh_v8hf_mask_round", IX86_BUILTIN_VMINSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_vcmpph_v32hf_mask_round", IX86_BUILTIN_VCMPPH_V32HF_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_vcmpsh_v8hf_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_vsqrtph_v32hf_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_vsqrtsh_v8hf_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_vscalefph_v32hf_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_vscalefsh_v8hf_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_vreduceph_v32hf_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_vreducesh_v8hf_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_vrndscaleph_v32hf_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_vrndscalesh_v8hf_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_VADDSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_VSUBSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_VMULSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_VDIVSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_VMAXPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_VMINPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_VMAXSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_VMINSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_VCMPPH_V32HF_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
>  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
>  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
>  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
> index b3cffa0644f..3a96e586418 100644
> --- a/gcc/testsuite/gcc.target/i386/avx-1.c
> +++ b/gcc/testsuite/gcc.target/i386/avx-1.c
> @@ -686,33 +686,33 @@
>  #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
>
>  /* avx512fp16intrin.h */
> -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
> -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
> -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
> -#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
> -#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
> -#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
> -#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
> -#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
> -#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
> +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
> +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
> +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
> +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
> +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
> +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
> +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
> +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
> +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
> +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
> +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
> +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
>  #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
>  #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
>  #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
> @@ -721,8 +721,8 @@
>  #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
>
>  /* avx512fp16vlintrin.h */
> -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
>  #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
>  #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
>  #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
> index 67ef567e437..aafcd414530 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> @@ -703,33 +703,33 @@
>  #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
>
>  /* avx512fp16intrin.h */
> -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
> -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
> -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
> -#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
> -#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
> -#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
> -#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
> -#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
> -#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
> +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
> +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
> +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
> +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
> +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
> +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
> +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
> +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
> +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
> +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
> +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
> +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
>  #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
>  #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
>  #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
> @@ -738,8 +738,8 @@
>  #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
>
>  /* avx512fp16vlintrin.h */
> -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
>  #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
>  #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
>  #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
> index b3f07587acb..8b600282c67 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> @@ -704,33 +704,33 @@
>  #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
>
>  /* avx512fp16intrin.h */
> -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
> -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
> -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
> -#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
> -#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
> -#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
> -#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
> -#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
> -#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
> -#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
> -#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
> +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
> +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
> +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
> +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
> +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
> +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
> +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
> +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
> +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
> +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
> +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
> +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
> +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
>  #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
>  #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
>  #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
> @@ -739,8 +739,8 @@
>  #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
>
>  /* avx512fp16vlintrin.h */
> -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
> +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
>  #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
>  #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
>  #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
> --
> 2.18.1
>
Hongtao Liu Sept. 15, 2021, 10:01 a.m. UTC | #2
On Wed, Sep 15, 2021 at 5:52 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
>
> Sorry for missing that part.
>
> Updated patch.
Ok, thanks.
>
> Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2021年9月15日周三 下午5:16写道:
> >
> > On Wed, Sep 15, 2021 at 5:08 PM Hongyu Wang via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Hi,
> > >
> > > For AVX512FP16 builtins, they all contain format like vaddph_v8hf,
> > > while AVX512F builtins use addps128 which succeeded SSE/AVX style.
> > > Adjust AVX512FP16 builtins to match such format.
> > >
> > > Bootstraped/regtested on x86-64-*-linux, ok for master?
> > >
> > > gcc/ChangeLog:
> > >
> > >         * config/i386/avx512fp16intrin.h: Adjust all builtin calls.
> > >         * config/i386/avx512fp16vlintrin.h: Likewise.
> > >         * config/i386/i386-builtin.def: Adjust builtin name to match
> > >         AVX512F style.
> > >
> >
> > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask",
> > IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int)
> > V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask",
> > IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int)
> > V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask",
> > IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int)
> > UQI_FTYPE_V8HF_V8HF_INT_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask",
> > IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int)
> > UHI_FTYPE_V16HF_V16HF_INT_UHI)
> > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask",
> > IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int)
> > USI_FTYPE_V32HF_V32HF_INT_USI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask",
> > IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int)
> > V8HF_FTYPE_V8HF_V8HF_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask",
> > IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int)
> > V16HF_FTYPE_V16HF_V16HF_UHI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask",
> > IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int)
> > V8HF_FTYPE_V8HF_V8HF_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_rsqrtv16hf2_mask,
> > "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK,
> > UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_rsqrtv32hf2_mask,
> > "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK,
> > UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask",
> > IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int)
> > V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask",
> > IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int)
> > V8HF_FTYPE_V8HF_V8HF_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask",
> > IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int)
> > V16HF_FTYPE_V16HF_V16HF_UHI)
> > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask",
> > IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int)
> > V32HF_FTYPE_V32HF_V32HF_USI)
> > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask",
> > IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int)
> > V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512fp16_scalefv8hf_mask,
> > "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK,
> > UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16,
> > CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask",
> > IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int)
> > V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > Could you also change IX86_BUILTIN_VSCALEFPH_V16HF_MASK
> > IX86_BUILTIN_VSCALEFPH256_MASK, similar for others.
> >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * gcc.target/i386/avx-1.c: Adjust builtin macros.
> > >         * gcc.target/i386/sse-13.c: Likewise.
> > >         * gcc.target/i386/sse-23.c: Likewise.
> > > ---
> > >  gcc/config/i386/avx512fp16intrin.h     | 1236 ++++++++++++------------
> > >  gcc/config/i386/avx512fp16vlintrin.h   |  454 ++++-----
> > >  gcc/config/i386/i386-builtin.def       |  130 +--
> > >  gcc/testsuite/gcc.target/i386/avx-1.c  |   58 +-
> > >  gcc/testsuite/gcc.target/i386/sse-13.c |   58 +-
> > >  gcc/testsuite/gcc.target/i386/sse-23.c |   58 +-
> > >  6 files changed, 997 insertions(+), 997 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
> > > index 2fbfc140c44..5d66ca5c820 100644
> > > --- a/gcc/config/i386/avx512fp16intrin.h
> > > +++ b/gcc/config/i386/avx512fp16intrin.h
> > > @@ -229,15 +229,15 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
> > >  {
> > > -  return __builtin_ia32_vaddph_v32hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vaddph_v32hf_mask (__B, __C,
> > > -                                          _mm512_setzero_ph (), __A);
> > > +  return __builtin_ia32_addph512_mask (__B, __C,
> > > +                                      _mm512_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -251,15 +251,15 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
> > >  {
> > > -  return __builtin_ia32_vsubph_v32hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vsubph_v32hf_mask (__B, __C,
> > > -                                          _mm512_setzero_ph (), __A);
> > > +  return __builtin_ia32_subph512_mask (__B, __C,
> > > +                                      _mm512_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -273,15 +273,15 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
> > >  {
> > > -  return __builtin_ia32_vmulph_v32hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vmulph_v32hf_mask (__B, __C,
> > > -                                          _mm512_setzero_ph (), __A);
> > > +  return __builtin_ia32_mulph512_mask (__B, __C,
> > > +                                      _mm512_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -295,15 +295,15 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
> > >  {
> > > -  return __builtin_ia32_vdivph_v32hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vdivph_v32hf_mask (__B, __C,
> > > -                                          _mm512_setzero_ph (), __A);
> > > +  return __builtin_ia32_divph512_mask (__B, __C,
> > > +                                      _mm512_setzero_ph (), __A);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -311,9 +311,9 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vaddph_v32hf_mask_round (__A, __B,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                (__mmask32) -1, __C);
> > > +  return __builtin_ia32_addph512_mask_round (__A, __B,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -321,7 +321,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                           __m512h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vaddph_v32hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -329,18 +329,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vaddph_v32hf_mask_round (__B, __C,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                __A, __D);
> > > +  return __builtin_ia32_addph512_mask_round (__B, __C,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            __A, __D);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vsubph_v32hf_mask_round (__A, __B,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                (__mmask32) -1, __C);
> > > +  return __builtin_ia32_subph512_mask_round (__A, __B,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -348,7 +348,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                           __m512h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vsubph_v32hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -356,18 +356,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vsubph_v32hf_mask_round (__B, __C,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                __A, __D);
> > > +  return __builtin_ia32_subph512_mask_round (__B, __C,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            __A, __D);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vmulph_v32hf_mask_round (__A, __B,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                (__mmask32) -1, __C);
> > > +  return __builtin_ia32_mulph512_mask_round (__A, __B,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -375,7 +375,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                           __m512h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vmulph_v32hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -383,18 +383,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vmulph_v32hf_mask_round (__B, __C,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                __A, __D);
> > > +  return __builtin_ia32_mulph512_mask_round (__B, __C,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            __A, __D);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vdivph_v32hf_mask_round (__A, __B,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                (__mmask32) -1, __C);
> > > +  return __builtin_ia32_divph512_mask_round (__A, __B,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -402,7 +402,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                           __m512h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vdivph_v32hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -410,67 +410,67 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vdivph_v32hf_mask_round (__B, __C,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                __A, __D);
> > > +  return __builtin_ia32_divph512_mask_round (__B, __C,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            __A, __D);
> > >  }
> > >  #else
> > >  #define _mm512_add_round_ph(A, B, C)                                   \
> > > -  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((A), (B),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (__mmask32)-1, (C)))
> > > +  ((__m512h)__builtin_ia32_addph512_mask_round((A), (B),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (__mmask32)-1, (C)))
> > >
> > > -#define _mm512_mask_add_round_ph(A, B, C, D, E)                        \
> > > -  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((C), (D), (A), (B), (E)))
> > > +#define _mm512_mask_add_round_ph(A, B, C, D, E)                                \
> > > +  ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
> > >
> > >  #define _mm512_maskz_add_round_ph(A, B, C, D)                          \
> > > -  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((B), (C),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (A), (D)))
> > > +  ((__m512h)__builtin_ia32_addph512_mask_round((B), (C),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (A), (D)))
> > >
> > >  #define _mm512_sub_round_ph(A, B, C)                                   \
> > > -  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((A), (B),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (__mmask32)-1, (C)))
> > > +  ((__m512h)__builtin_ia32_subph512_mask_round((A), (B),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (__mmask32)-1, (C)))
> > >
> > > -#define _mm512_mask_sub_round_ph(A, B, C, D, E)                        \
> > > -  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((C), (D), (A), (B), (E)))
> > > +#define _mm512_mask_sub_round_ph(A, B, C, D, E)                                \
> > > +  ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
> > >
> > >  #define _mm512_maskz_sub_round_ph(A, B, C, D)                          \
> > > -  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((B), (C),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (A), (D)))
> > > +  ((__m512h)__builtin_ia32_subph512_mask_round((B), (C),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (A), (D)))
> > >
> > >  #define _mm512_mul_round_ph(A, B, C)                                   \
> > > -  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((A), (B),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (__mmask32)-1, (C)))
> > > +  ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (__mmask32)-1, (C)))
> > >
> > > -#define _mm512_mask_mul_round_ph(A, B, C, D, E)                        \
> > > -  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((C), (D), (A), (B), (E)))
> > > +#define _mm512_mask_mul_round_ph(A, B, C, D, E)                                \
> > > +  ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
> > >
> > >  #define _mm512_maskz_mul_round_ph(A, B, C, D)                          \
> > > -  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((B), (C),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (A), (D)))
> > > +  ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (A), (D)))
> > >
> > >  #define _mm512_div_round_ph(A, B, C)                                   \
> > > -  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((A), (B),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (__mmask32)-1, (C)))
> > > +  ((__m512h)__builtin_ia32_divph512_mask_round((A), (B),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (__mmask32)-1, (C)))
> > >
> > > -#define _mm512_mask_div_round_ph(A, B, C, D, E)                        \
> > > -  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((C), (D), (A), (B), (E)))
> > > +#define _mm512_mask_div_round_ph(A, B, C, D, E)                                \
> > > +  ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
> > >
> > >  #define _mm512_maskz_div_round_ph(A, B, C, D)                          \
> > > -  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((B), (C),           \
> > > -                                                  _mm512_setzero_ph (),\
> > > -                                                  (A), (D)))
> > > +  ((__m512h)__builtin_ia32_divph512_mask_round((B), (C),               \
> > > +                                              _mm512_setzero_ph (),    \
> > > +                                              (A), (D)))
> > >  #endif  /* __OPTIMIZE__  */
> > >
> > >  /* Intrinsics of v[add,sub,mul,div]sh.  */
> > >  extern __inline __m128h
> > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_add_sh (__m128h __A, __m128h __B)
> > >  {
> > >    __A[0] += __B[0];
> > > @@ -481,15 +481,15 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vaddsh_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vaddsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
> > > +                                   __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -504,15 +504,15 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vsubsh_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vsubsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
> > > +                                   __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -527,14 +527,14 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vmulsh_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vmulsh_v8hf_mask (__B, __C, _mm_setzero_ph (), __A);
> > > +  return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -549,15 +549,15 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vdivsh_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vdivsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
> > > +                                   __A);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -565,9 +565,9 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vaddsh_v8hf_mask_round (__A, __B,
> > > -                                               _mm_setzero_ph (),
> > > -                                               (__mmask8) -1, __C);
> > > +  return __builtin_ia32_addsh_mask_round (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -575,7 +575,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                        __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vaddsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -583,18 +583,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                         const int __D)
> > >  {
> > > -  return __builtin_ia32_vaddsh_v8hf_mask_round (__B, __C,
> > > -                                               _mm_setzero_ph (),
> > > -                                               __A, __D);
> > > +  return __builtin_ia32_addsh_mask_round (__B, __C,
> > > +                                         _mm_setzero_ph (),
> > > +                                         __A, __D);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vsubsh_v8hf_mask_round (__A, __B,
> > > -                                               _mm_setzero_ph (),
> > > -                                               (__mmask8) -1, __C);
> > > +  return __builtin_ia32_subsh_mask_round (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -602,7 +602,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                        __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vsubsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -610,18 +610,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                         const int __D)
> > >  {
> > > -  return __builtin_ia32_vsubsh_v8hf_mask_round (__B, __C,
> > > -                                               _mm_setzero_ph (),
> > > -                                               __A, __D);
> > > +  return __builtin_ia32_subsh_mask_round (__B, __C,
> > > +                                         _mm_setzero_ph (),
> > > +                                         __A, __D);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vmulsh_v8hf_mask_round (__A, __B,
> > > -                                               _mm_setzero_ph (),
> > > -                                               (__mmask8) -1, __C);
> > > +  return __builtin_ia32_mulsh_mask_round (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -629,7 +629,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                        __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vmulsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -637,18 +637,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                         const int __D)
> > >  {
> > > -  return __builtin_ia32_vmulsh_v8hf_mask_round (__B, __C,
> > > -                                               _mm_setzero_ph (),
> > > -                                               __A, __D);
> > > +  return __builtin_ia32_mulsh_mask_round (__B, __C,
> > > +                                         _mm_setzero_ph (),
> > > +                                         __A, __D);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vdivsh_v8hf_mask_round (__A, __B,
> > > -                                               _mm_setzero_ph (),
> > > -                                               (__mmask8) -1, __C);
> > > +  return __builtin_ia32_divsh_mask_round (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -656,7 +656,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                        __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vdivsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -664,62 +664,62 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                         const int __D)
> > >  {
> > > -  return __builtin_ia32_vdivsh_v8hf_mask_round (__B, __C,
> > > -                                               _mm_setzero_ph (),
> > > -                                               __A, __D);
> > > +  return __builtin_ia32_divsh_mask_round (__B, __C,
> > > +                                         _mm_setzero_ph (),
> > > +                                         __A, __D);
> > >  }
> > >  #else
> > >  #define _mm_add_round_sh(A, B, C)                                      \
> > > -  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((A), (B),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (__mmask8)-1, (C)))
> > > +  ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B),                 \
> > > +                                            _mm_setzero_ph (),         \
> > > +                                            (__mmask8)-1, (C)))
> > >
> > >  #define _mm_mask_add_round_sh(A, B, C, D, E)                           \
> > > -  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_add_round_sh(A, B, C, D)                             \
> > > -  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((B), (C),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (A), (D)))
> > > +#define _mm_maskz_add_round_sh(A, B, C, D)                     \
> > > +  ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C),         \
> > > +                                            _mm_setzero_ph (), \
> > > +                                            (A), (D)))
> > >
> > >  #define _mm_sub_round_sh(A, B, C)                                      \
> > > -  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((A), (B),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (__mmask8)-1, (C)))
> > > +  ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B),                 \
> > > +                                            _mm_setzero_ph (),         \
> > > +                                            (__mmask8)-1, (C)))
> > >
> > >  #define _mm_mask_sub_round_sh(A, B, C, D, E)                           \
> > > -  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_sub_round_sh(A, B, C, D)                             \
> > > -  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((B), (C),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (A), (D)))
> > > +#define _mm_maskz_sub_round_sh(A, B, C, D)                     \
> > > +  ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C),         \
> > > +                                            _mm_setzero_ph (), \
> > > +                                            (A), (D)))
> > >
> > >  #define _mm_mul_round_sh(A, B, C)                                      \
> > > -  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((A), (B),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (__mmask8)-1, (C)))
> > > +  ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B),                 \
> > > +                                            _mm_setzero_ph (),         \
> > > +                                            (__mmask8)-1, (C)))
> > >
> > >  #define _mm_mask_mul_round_sh(A, B, C, D, E)                           \
> > > -  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_mul_round_sh(A, B, C, D)                             \
> > > -  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((B), (C),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (A), (D)))
> > > +#define _mm_maskz_mul_round_sh(A, B, C, D)                     \
> > > +  ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C),         \
> > > +                                            _mm_setzero_ph (), \
> > > +                                            (A), (D)))
> > >
> > >  #define _mm_div_round_sh(A, B, C)                                      \
> > > -  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((A), (B),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (__mmask8)-1, (C)))
> > > +  ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B),                 \
> > > +                                            _mm_setzero_ph (),         \
> > > +                                            (__mmask8)-1, (C)))
> > >
> > >  #define _mm_mask_div_round_sh(A, B, C, D, E)                           \
> > > -  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_div_round_sh(A, B, C, D)                             \
> > > -  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((B), (C),           \
> > > -                                                  _mm_setzero_ph (),   \
> > > -                                                  (A), (D)))
> > > +#define _mm_maskz_div_round_sh(A, B, C, D)                     \
> > > +  ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C),         \
> > > +                                            _mm_setzero_ph (), \
> > > +                                            (A), (D)))
> > >  #endif /* __OPTIMIZE__ */
> > >
> > >  /* Intrinsic vmaxph vminph.  */
> > > @@ -727,48 +727,48 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_max_ph (__m512h __A, __m512h __B)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v32hf_mask (__A, __B,
> > > -                                          _mm512_setzero_ph (),
> > > -                                          (__mmask32) -1);
> > > +  return __builtin_ia32_maxph512_mask (__A, __B,
> > > +                                      _mm512_setzero_ph (),
> > > +                                      (__mmask32) -1);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v32hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v32hf_mask (__B, __C,
> > > -                                          _mm512_setzero_ph (), __A);
> > > +  return __builtin_ia32_maxph512_mask (__B, __C,
> > > +                                      _mm512_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_min_ph (__m512h __A, __m512h __B)
> > >  {
> > > -  return __builtin_ia32_vminph_v32hf_mask (__A, __B,
> > > -                                          _mm512_setzero_ph (),
> > > -                                          (__mmask32) -1);
> > > +  return __builtin_ia32_minph512_mask (__A, __B,
> > > +                                      _mm512_setzero_ph (),
> > > +                                      (__mmask32) -1);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
> > >  {
> > > -  return __builtin_ia32_vminph_v32hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vminph_v32hf_mask (__B, __C,
> > > -                                          _mm512_setzero_ph (), __A);
> > > +  return __builtin_ia32_minph512_mask (__B, __C,
> > > +                                      _mm512_setzero_ph (), __A);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -776,9 +776,9 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v32hf_mask_round (__A, __B,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                (__mmask32) -1, __C);
> > > +  return __builtin_ia32_maxph512_mask_round (__A, __B,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -786,7 +786,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                           __m512h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v32hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -794,18 +794,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v32hf_mask_round (__B, __C,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                __A, __D);
> > > +  return __builtin_ia32_maxph512_mask_round (__B, __C,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            __A, __D);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vminph_v32hf_mask_round (__A, __B,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                (__mmask32) -1, __C);
> > > +  return __builtin_ia32_minph512_mask_round (__A, __B,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -813,7 +813,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                           __m512h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vminph_v32hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -821,37 +821,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vminph_v32hf_mask_round (__B, __C,
> > > -                                                _mm512_setzero_ph (),
> > > -                                                __A, __D);
> > > +  return __builtin_ia32_minph512_mask_round (__B, __C,
> > > +                                            _mm512_setzero_ph (),
> > > +                                            __A, __D);
> > >  }
> > >
> > >  #else
> > > -#define _mm512_max_round_ph(A, B, C)                                   \
> > > -  (__builtin_ia32_vmaxph_v32hf_mask_round ((A), (B),                   \
> > > -                                          _mm512_setzero_ph (),        \
> > > -                                          (__mmask32)-1, (C)))
> > > +#define _mm512_max_round_ph(A, B, C)                           \
> > > +  (__builtin_ia32_maxph512_mask_round ((A), (B),               \
> > > +                                      _mm512_setzero_ph (),    \
> > > +                                      (__mmask32)-1, (C)))
> > >
> > >  #define _mm512_mask_max_round_ph(A, B, C, D, E)                                \
> > > -  (__builtin_ia32_vmaxph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm512_maskz_max_round_ph(A, B, C, D)                          \
> > > -  (__builtin_ia32_vmaxph_v32hf_mask_round ((B), (C),                   \
> > > -                                          _mm512_setzero_ph (),        \
> > > -                                          (A), (D)))
> > > +#define _mm512_maskz_max_round_ph(A, B, C, D)                  \
> > > +  (__builtin_ia32_maxph512_mask_round ((B), (C),               \
> > > +                                      _mm512_setzero_ph (),    \
> > > +                                      (A), (D)))
> > >
> > > -#define _mm512_min_round_ph(A, B, C)                                   \
> > > -  (__builtin_ia32_vminph_v32hf_mask_round ((A), (B),                   \
> > > -                                          _mm512_setzero_ph (),        \
> > > -                                          (__mmask32)-1, (C)))
> > > +#define _mm512_min_round_ph(A, B, C)                           \
> > > +  (__builtin_ia32_minph512_mask_round ((A), (B),               \
> > > +                                      _mm512_setzero_ph (),    \
> > > +                                      (__mmask32)-1, (C)))
> > >
> > >  #define _mm512_mask_min_round_ph(A, B, C, D, E)                                \
> > > -  (__builtin_ia32_vminph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm512_maskz_min_round_ph(A, B, C, D)                          \
> > > -  (__builtin_ia32_vminph_v32hf_mask_round ((B), (C),                   \
> > > -                                          _mm512_setzero_ph (),        \
> > > -                                          (A), (D)))
> > > +#define _mm512_maskz_min_round_ph(A, B, C, D)                  \
> > > +  (__builtin_ia32_minph512_mask_round ((B), (C),               \
> > > +                                      _mm512_setzero_ph (),    \
> > > +                                      (A), (D)))
> > >  #endif /* __OPTIMIZE__ */
> > >
> > >  /* Intrinsic vmaxsh vminsh.  */
> > > @@ -867,15 +867,15 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vmaxsh_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vmaxsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
> > > +                                   __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -890,15 +890,15 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vminsh_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vminsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
> > > +                                   __A);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -906,9 +906,9 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vmaxsh_v8hf_mask_round (__A, __B,
> > > -                                               _mm_setzero_ph (),
> > > -                                               (__mmask8) -1, __C);
> > > +  return __builtin_ia32_maxsh_mask_round (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -916,7 +916,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                        __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vmaxsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -924,18 +924,18 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                         const int __D)
> > >  {
> > > -  return __builtin_ia32_vmaxsh_v8hf_mask_round (__B, __C,
> > > -                                               _mm_setzero_ph (),
> > > -                                               __A, __D);
> > > +  return __builtin_ia32_maxsh_mask_round (__B, __C,
> > > +                                         _mm_setzero_ph (),
> > > +                                         __A, __D);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vminsh_v8hf_mask_round (__A, __B,
> > > -                                               _mm_setzero_ph (),
> > > -                                               (__mmask8) -1, __C);
> > > +  return __builtin_ia32_minsh_mask_round (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -943,7 +943,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                        __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vminsh_v8hf_mask_round (__C, __D, __A, __B, __E);
> > > +  return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -951,37 +951,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                         const int __D)
> > >  {
> > > -  return __builtin_ia32_vminsh_v8hf_mask_round (__B, __C,
> > > -                                               _mm_setzero_ph (),
> > > -                                               __A, __D);
> > > +  return __builtin_ia32_minsh_mask_round (__B, __C,
> > > +                                         _mm_setzero_ph (),
> > > +                                         __A, __D);
> > >  }
> > >
> > >  #else
> > > -#define _mm_max_round_sh(A, B, C)                                      \
> > > -  (__builtin_ia32_vmaxsh_v8hf_mask_round ((A), (B),                    \
> > > -                                         _mm_setzero_ph (),            \
> > > -                                         (__mmask8)-1, (C)))
> > > +#define _mm_max_round_sh(A, B, C)                      \
> > > +  (__builtin_ia32_maxsh_mask_round ((A), (B),          \
> > > +                                   _mm_setzero_ph (),  \
> > > +                                   (__mmask8)-1, (C)))
> > >
> > > -#define _mm_mask_max_round_sh(A, B, C, D, E)                           \
> > > -  (__builtin_ia32_vmaxsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> > > +#define _mm_mask_max_round_sh(A, B, C, D, E)                   \
> > > +  (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_max_round_sh(A, B, C, D)                             \
> > > -  (__builtin_ia32_vmaxsh_v8hf_mask_round ((B), (C),                    \
> > > -                                         _mm_setzero_ph (),            \
> > > -                                         (A), (D)))
> > > +#define _mm_maskz_max_round_sh(A, B, C, D)             \
> > > +  (__builtin_ia32_maxsh_mask_round ((B), (C),          \
> > > +                                   _mm_setzero_ph (),  \
> > > +                                   (A), (D)))
> > >
> > > -#define _mm_min_round_sh(A, B, C)                                      \
> > > -  (__builtin_ia32_vminsh_v8hf_mask_round ((A), (B),                    \
> > > -                                         _mm_setzero_ph (),            \
> > > -                                         (__mmask8)-1, (C)))
> > > +#define _mm_min_round_sh(A, B, C)                      \
> > > +  (__builtin_ia32_minsh_mask_round ((A), (B),          \
> > > +                                   _mm_setzero_ph (),  \
> > > +                                   (__mmask8)-1, (C)))
> > >
> > > -#define _mm_mask_min_round_sh(A, B, C, D, E)                           \
> > > -  (__builtin_ia32_vminsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> > > +#define _mm_mask_min_round_sh(A, B, C, D, E)                   \
> > > +  (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_min_round_sh(A, B, C, D)                             \
> > > -  (__builtin_ia32_vminsh_v8hf_mask_round ((B), (C),                    \
> > > -                                         _mm_setzero_ph (),            \
> > > -                                         (A), (D)))
> > > +#define _mm_maskz_min_round_sh(A, B, C, D)             \
> > > +  (__builtin_ia32_minsh_mask_round ((B), (C),          \
> > > +                                   _mm_setzero_ph (),  \
> > > +                                   (A), (D)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -991,8 +991,8 @@ extern __inline __mmask32
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__A, __B, __C,
> > > -                                                      (__mmask32) -1);
> > > +  return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
> > > +                                                  (__mmask32) -1);
> > >  }
> > >
> > >  extern __inline __mmask32
> > > @@ -1000,8 +1000,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
> > >                          const int __D)
> > >  {
> > > -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__B, __C, __D,
> > > -                                                      __A);
> > > +  return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
> > > +                                                  __A);
> > >  }
> > >
> > >  extern __inline __mmask32
> > > @@ -1009,9 +1009,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
> > >                           const int __D)
> > >  {
> > > -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__A, __B,
> > > -                                                            __C, (__mmask32) -1,
> > > -                                                            __D);
> > > +  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
> > > +                                                        __C, (__mmask32) -1,
> > > +                                                        __D);
> > >  }
> > >
> > >  extern __inline __mmask32
> > > @@ -1019,23 +1019,23 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
> > >                                const int __D, const int __E)
> > >  {
> > > -  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__B, __C,
> > > -                                                            __D, __A,
> > > -                                                            __E);
> > > +  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
> > > +                                                        __D, __A,
> > > +                                                        __E);
> > >  }
> > >
> > >  #else
> > >  #define _mm512_cmp_ph_mask(A, B, C)                    \
> > > -  (__builtin_ia32_vcmpph_v32hf_mask ((A), (B), (C), (-1)))
> > > +  (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
> > >
> > >  #define _mm512_mask_cmp_ph_mask(A, B, C, D)            \
> > > -  (__builtin_ia32_vcmpph_v32hf_mask ((B), (C), (D), (A)))
> > > +  (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
> > >
> > > -#define _mm512_cmp_round_ph_mask(A, B, C, D)           \
> > > -  (__builtin_ia32_vcmpph_v32hf_mask_round ((A), (B), (C), (-1), (D)))
> > > +#define _mm512_cmp_round_ph_mask(A, B, C, D)                           \
> > > +  (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
> > >
> > > -#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)   \
> > > -  (__builtin_ia32_vcmpph_v32hf_mask_round ((B), (C), (D), (A), (E)))
> > > +#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)                   \
> > > +  (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -1046,9 +1046,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
> > >  {
> > >    return (__mmask8)
> > > -    __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B,
> > > -                                          __C, (__mmask8) -1,
> > > -                                          _MM_FROUND_CUR_DIRECTION);
> > > +    __builtin_ia32_cmpsh_mask_round (__A, __B,
> > > +                                    __C, (__mmask8) -1,
> > > +                                    _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __mmask8
> > > @@ -1057,9 +1057,9 @@ _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
> > >                       const int __D)
> > >  {
> > >    return (__mmask8)
> > > -    __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C,
> > > -                                          __D, __A,
> > > -                                          _MM_FROUND_CUR_DIRECTION);
> > > +    __builtin_ia32_cmpsh_mask_round (__B, __C,
> > > +                                    __D, __A,
> > > +                                    _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __mmask8
> > > @@ -1067,9 +1067,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
> > >                        const int __D)
> > >  {
> > > -  return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B,
> > > -                                                          __C, (__mmask8) -1,
> > > -                                                          __D);
> > > +  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
> > > +                                                    __C, (__mmask8) -1,
> > > +                                                    __D);
> > >  }
> > >
> > >  extern __inline __mmask8
> > > @@ -1077,25 +1077,25 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
> > >                             const int __D, const int __E)
> > >  {
> > > -  return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C,
> > > -                                                          __D, __A,
> > > -                                                          __E);
> > > +  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
> > > +                                                    __D, __A,
> > > +                                                    __E);
> > >  }
> > >
> > >  #else
> > > -#define _mm_cmp_sh_mask(A, B, C)               \
> > > -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), \
> > > -                                         (_MM_FROUND_CUR_DIRECTION)))
> > > +#define _mm_cmp_sh_mask(A, B, C)                                       \
> > > +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1),               \
> > > +                                   (_MM_FROUND_CUR_DIRECTION)))
> > >
> > > -#define _mm_mask_cmp_sh_mask(A, B, C, D)       \
> > > -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A),          \
> > > -                                         (_MM_FROUND_CUR_DIRECTION)))
> > > +#define _mm_mask_cmp_sh_mask(A, B, C, D)                               \
> > > +  (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A),                        \
> > > +                                   (_MM_FROUND_CUR_DIRECTION)))
> > >
> > > -#define _mm_cmp_round_sh_mask(A, B, C, D)                              \
> > > -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), (D)))
> > > +#define _mm_cmp_round_sh_mask(A, B, C, D)                      \
> > > +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
> > >
> > > -#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)      \
> > > -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A), (E)))
> > > +#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)              \
> > > +  (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -1104,134 +1104,134 @@ extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_comieq_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OS,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_comilt_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OS,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_comile_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OS,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_comigt_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OS,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_comige_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OS,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_comineq_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_US,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_ucomieq_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OQ,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_ucomilt_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OQ,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_ucomile_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OQ,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_ucomigt_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OQ,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_ucomige_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OQ,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_ucomineq_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_UQ,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > -  _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
> > > +_mm_comi_sh (__m128h __A, __m128h __B, const int __P)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P,
> > > -                                               (__mmask8) -1,
> > > -                                               _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
> > > +                                         (__mmask8) -1,
> > > +                                         _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline int
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
> > >  {
> > > -  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P,
> > > -                                               (__mmask8) -1,__R);
> > > +  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
> > > +                                         (__mmask8) -1,__R);
> > >  }
> > >
> > >  #else
> > > -#define _mm_comi_round_sh(A, B, P, R)          \
> > > -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
> > > -#define _mm_comi_sh(A, B, P)           \
> > > -  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), \
> > > -                                         _MM_FROUND_CUR_DIRECTION))
> > > +#define _mm_comi_round_sh(A, B, P, R)                                  \
> > > +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
> > > +#define _mm_comi_sh(A, B, P)                                           \
> > > +  (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1),    \
> > > +                                   _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #endif /* __OPTIMIZE__  */
> > >
> > > @@ -1240,28 +1240,28 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_sqrt_ph (__m512h __A)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__A,
> > > -                                                 _mm512_setzero_ph(),
> > > -                                                 (__mmask32) -1,
> > > -                                                 _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_sqrtph512_mask_round (__A,
> > > +                                             _mm512_setzero_ph(),
> > > +                                             (__mmask32) -1,
> > > +                                             _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__C, __A, __B,
> > > -                                                 _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
> > > +                                             _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__B,
> > > -                                                 _mm512_setzero_ph (),
> > > -                                                 __A,
> > > -                                                 _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_sqrtph512_mask_round (__B,
> > > +                                             _mm512_setzero_ph (),
> > > +                                             __A,
> > > +                                             _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -1269,9 +1269,9 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_sqrt_round_ph (__m512h __A, const int __B)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__A,
> > > -                                                 _mm512_setzero_ph(),
> > > -                                                 (__mmask32) -1, __B);
> > > +  return __builtin_ia32_sqrtph512_mask_round (__A,
> > > +                                             _mm512_setzero_ph(),
> > > +                                             (__mmask32) -1, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -1279,31 +1279,31 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__C, __A, __B, __D);
> > > +  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v32hf_mask_round (__B,
> > > -                                                 _mm512_setzero_ph (),
> > > -                                                 __A, __C);
> > > +  return __builtin_ia32_sqrtph512_mask_round (__B,
> > > +                                             _mm512_setzero_ph (),
> > > +                                             __A, __C);
> > >  }
> > >
> > >  #else
> > > -#define _mm512_sqrt_round_ph(A, B)                                     \
> > > -  (__builtin_ia32_vsqrtph_v32hf_mask_round ((A),                       \
> > > -                                           _mm512_setzero_ph (),       \
> > > -                                           (__mmask32)-1, (B)))
> > > +#define _mm512_sqrt_round_ph(A, B)                             \
> > > +  (__builtin_ia32_sqrtph512_mask_round ((A),                   \
> > > +                                       _mm512_setzero_ph (),   \
> > > +                                       (__mmask32)-1, (B)))
> > >
> > > -#define _mm512_mask_sqrt_round_ph(A, B, C, D)                          \
> > > -  (__builtin_ia32_vsqrtph_v32hf_mask_round ((C), (A), (B), (D)))
> > > +#define _mm512_mask_sqrt_round_ph(A, B, C, D)                  \
> > > +  (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
> > >
> > > -#define _mm512_maskz_sqrt_round_ph(A, B, C)                            \
> > > -  (__builtin_ia32_vsqrtph_v32hf_mask_round ((B),                       \
> > > -                                           _mm512_setzero_ph (),       \
> > > -                                           (A), (C)))
> > > +#define _mm512_maskz_sqrt_round_ph(A, B, C)                    \
> > > +  (__builtin_ia32_sqrtph512_mask_round ((B),                   \
> > > +                                       _mm512_setzero_ph (),   \
> > > +                                       (A), (C)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -1312,23 +1312,23 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_rsqrt_ph (__m512h __A)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v32hf_mask (__A, _mm512_setzero_ph (),
> > > -                                            (__mmask32) -1);
> > > +  return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
> > > +                                        (__mmask32) -1);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v32hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v32hf_mask (__B, _mm512_setzero_ph (),
> > > -                                            __A);
> > > +  return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
> > > +                                        __A);
> > >  }
> > >
> > >  /* Intrinsics vrsqrtsh.  */
> > > @@ -1336,23 +1336,23 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_rsqrt_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vrsqrtsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
> > > -                                           (__mmask8) -1);
> > > +  return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
> > > +                                     (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vrsqrtsh_v8hf_mask (__D, __C, __A, __B);
> > > +  return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vrsqrtsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
> > > -                                           __A);
> > > +  return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
> > > +                                     __A);
> > >  }
> > >
> > >  /* Intrinsics vsqrtsh.  */
> > > @@ -1360,27 +1360,27 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_sqrt_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__B, __A,
> > > -                                                _mm_setzero_ph (),
> > > -                                                (__mmask8) -1,
> > > -                                                _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
> > > +                                          _mm_setzero_ph (),
> > > +                                          (__mmask8) -1,
> > > +                                          _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__D, __C, __A, __B,
> > > -                                                _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
> > > +                                          _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__C, __B,
> > > -                                                _mm_setzero_ph (),
> > > -                                                __A, _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
> > > +                                          _mm_setzero_ph (),
> > > +                                          __A, _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -1388,9 +1388,9 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__B, __A,
> > > -                                                _mm_setzero_ph (),
> > > -                                                (__mmask8) -1, __C);
> > > +  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
> > > +                                          _mm_setzero_ph (),
> > > +                                          (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1398,8 +1398,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                         __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__D, __C, __A, __B,
> > > -                                                __E);
> > > +  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
> > > +                                          __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1407,24 +1407,24 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                          const int __D)
> > >  {
> > > -  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__C, __B,
> > > -                                                _mm_setzero_ph (),
> > > -                                                __A, __D);
> > > +  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
> > > +                                          _mm_setzero_ph (),
> > > +                                          __A, __D);
> > >  }
> > >
> > >  #else
> > >  #define _mm_sqrt_round_sh(A, B, C)                             \
> > > -  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((B), (A),           \
> > > -                                          _mm_setzero_ph (),   \
> > > -                                          (__mmask8)-1, (C)))
> > > +  (__builtin_ia32_sqrtsh_mask_round ((B), (A),                 \
> > > +                                    _mm_setzero_ph (),         \
> > > +                                    (__mmask8)-1, (C)))
> > >
> > >  #define _mm_mask_sqrt_round_sh(A, B, C, D, E)                  \
> > > -  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((D), (C), (A), (B), (E)))
> > > +  (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_sqrt_round_sh(A, B, C, D)                    \
> > > -  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((C), (B),           \
> > > -                                          _mm_setzero_ph (),   \
> > > -                                          (A), (D)))
> > > +#define _mm_maskz_sqrt_round_sh(A, B, C, D)            \
> > > +  (__builtin_ia32_sqrtsh_mask_round ((C), (B),         \
> > > +                                    _mm_setzero_ph (), \
> > > +                                    (A), (D)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -1433,23 +1433,23 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_rcp_ph (__m512h __A)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v32hf_mask (__A, _mm512_setzero_ph (),
> > > -                                          (__mmask32) -1);
> > > +  return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
> > > +                                      (__mmask32) -1);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v32hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_rcpph512_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v32hf_mask (__B, _mm512_setzero_ph (),
> > > -                                          __A);
> > > +  return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  /* Intrinsics vrcpsh.  */
> > > @@ -1457,23 +1457,23 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_rcp_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vrcpsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
> > > -                                         (__mmask8) -1);
> > > +  return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
> > > +                                   (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vrcpsh_v8hf_mask (__D, __C, __A, __B);
> > > +  return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vrcpsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
> > > +                                   __A);
> > >  }
> > >
> > >  /* Intrinsics vscalefph.  */
> > > @@ -1481,28 +1481,28 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_scalef_ph (__m512h __A, __m512h __B)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   (__mmask32) -1,
> > > -                                                   _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_scalefph512_mask_round (__A, __B,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               (__mmask32) -1,
> > > +                                               _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
> > > -                                                   _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
> > > +                                               _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   __A,
> > > -                                                   _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_scalefph512_mask_round (__B, __C,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               __A,
> > > +                                               _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -1510,9 +1510,9 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   (__mmask32) -1, __C);
> > > +  return __builtin_ia32_scalefph512_mask_round (__A, __B,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -1520,8 +1520,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                              __m512h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
> > > -                                                   __E);
> > > +  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
> > > +                                               __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -1529,24 +1529,24 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
> > >                               const int __D)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   __A, __D);
> > > +  return __builtin_ia32_scalefph512_mask_round (__B, __C,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               __A, __D);
> > >  }
> > >
> > >  #else
> > > -#define _mm512_scalef_round_ph(A, B, C)                                        \
> > > -  (__builtin_ia32_vscalefph_v32hf_mask_round ((A), (B),                        \
> > > -                                             _mm512_setzero_ph (),     \
> > > -                                             (__mmask32)-1, (C)))
> > > +#define _mm512_scalef_round_ph(A, B, C)                                \
> > > +  (__builtin_ia32_scalefph512_mask_round ((A), (B),            \
> > > +                                         _mm512_setzero_ph (), \
> > > +                                         (__mmask32)-1, (C)))
> > >
> > >  #define _mm512_mask_scalef_round_ph(A, B, C, D, E)                     \
> > > -  (__builtin_ia32_vscalefph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm512_maskz_scalef_round_ph(A, B, C, D)                       \
> > > -  (__builtin_ia32_vscalefph_v32hf_mask_round ((B), (C),                        \
> > > -                                             _mm512_setzero_ph (),     \
> > > -                                             (A), (D)))
> > > +#define _mm512_maskz_scalef_round_ph(A, B, C, D)               \
> > > +  (__builtin_ia32_scalefph512_mask_round ((B), (C),            \
> > > +                                         _mm512_setzero_ph (), \
> > > +                                         (A), (D)))
> > >
> > >  #endif  /* __OPTIMIZE__ */
> > >
> > > @@ -1555,28 +1555,28 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_scalef_sh (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  (__mmask8) -1,
> > > -                                                  _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_scalefsh_mask_round (__A, __B,
> > > +                                            _mm_setzero_ph (),
> > > +                                            (__mmask8) -1,
> > > +                                            _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
> > > -                                                  _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
> > > +                                            _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  __A,
> > > -                                                  _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_scalefsh_mask_round (__B, __C,
> > > +                                            _mm_setzero_ph (),
> > > +                                            __A,
> > > +                                            _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  #ifdef __OPTIMIZE__
> > > @@ -1584,9 +1584,9 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  (__mmask8) -1, __C);
> > > +  return __builtin_ia32_scalefsh_mask_round (__A, __B,
> > > +                                            _mm_setzero_ph (),
> > > +                                            (__mmask8) -1, __C);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1594,8 +1594,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                           __m128h __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
> > > -                                                  __E);
> > > +  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
> > > +                                            __E);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1603,23 +1603,23 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                            const int __D)
> > >  {
> > > -  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  __A, __D);
> > > +  return __builtin_ia32_scalefsh_mask_round (__B, __C,
> > > +                                            _mm_setzero_ph (),
> > > +                                            __A, __D);
> > >  }
> > >
> > >  #else
> > > -#define _mm_scalef_round_sh(A, B, C)                                     \
> > > -  (__builtin_ia32_vscalefsh_v8hf_mask_round ((A), (B),                   \
> > > -                                            _mm_setzero_ph (),           \
> > > -                                            (__mmask8)-1, (C)))
> > > +#define _mm_scalef_round_sh(A, B, C)                           \
> > > +  (__builtin_ia32_scalefsh_mask_round ((A), (B),               \
> > > +                                      _mm_setzero_ph (),       \
> > > +                                      (__mmask8)-1, (C)))
> > >
> > > -#define _mm_mask_scalef_round_sh(A, B, C, D, E)                                  \
> > > -  (__builtin_ia32_vscalefsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
> > > +#define _mm_mask_scalef_round_sh(A, B, C, D, E)                                \
> > > +  (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm_maskz_scalef_round_sh(A, B, C, D)                            \
> > > -  (__builtin_ia32_vscalefsh_v8hf_mask_round ((B), (C), _mm_setzero_ph (), \
> > > -                                            (A), (D)))
> > > +#define _mm_maskz_scalef_round_sh(A, B, C, D)                          \
> > > +  (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (),    \
> > > +                                      (A), (D)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -1629,37 +1629,37 @@ extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_reduce_ph (__m512h __A, int __B)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   (__mmask32) -1,
> > > -                                                   _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_reduceph512_mask_round (__A, __B,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               (__mmask32) -1,
> > > +                                               _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
> > > -                                                   _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
> > > +                                               _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   __A,
> > > -                                                   _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_reduceph512_mask_round (__B, __C,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               __A,
> > > +                                               _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   (__mmask32) -1, __C);
> > > +  return __builtin_ia32_reduceph512_mask_round (__A, __B,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               (__mmask32) -1, __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -1667,8 +1667,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
> > >                              int __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
> > > -                                                   __E);
> > > +  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
> > > +                                               __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -1676,39 +1676,39 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
> > >                               const int __D)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
> > > -                                                   _mm512_setzero_ph (),
> > > -                                                   __A, __D);
> > > +  return __builtin_ia32_reduceph512_mask_round (__B, __C,
> > > +                                               _mm512_setzero_ph (),
> > > +                                               __A, __D);
> > >  }
> > >
> > >  #else
> > >  #define _mm512_reduce_ph(A, B)                                         \
> > > -  (__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B),                        \
> > > -                                             _mm512_setzero_ph (),     \
> > > -                                             (__mmask32)-1,            \
> > > -                                             _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_reduceph512_mask_round ((A), (B),                    \
> > > +                                         _mm512_setzero_ph (),         \
> > > +                                         (__mmask32)-1,                \
> > > +                                         _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #define _mm512_mask_reduce_ph(A, B, C, D)                              \
> > > -  (__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B),      \
> > > -                                             _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B),          \
> > > +                                         _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #define _mm512_maskz_reduce_ph(A, B, C)                                        \
> > > -  (__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C),                        \
> > > -                                             _mm512_setzero_ph (),     \
> > > -                                             (A), _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_reduceph512_mask_round ((B), (C),                    \
> > > +                                         _mm512_setzero_ph (),         \
> > > +                                         (A), _MM_FROUND_CUR_DIRECTION))
> > >
> > > -#define _mm512_reduce_round_ph(A, B, C)                                        \
> > > -  (__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B),                        \
> > > -                                             _mm512_setzero_ph (),     \
> > > -                                             (__mmask32)-1, (C)))
> > > +#define _mm512_reduce_round_ph(A, B, C)                                \
> > > +  (__builtin_ia32_reduceph512_mask_round ((A), (B),            \
> > > +                                         _mm512_setzero_ph (), \
> > > +                                         (__mmask32)-1, (C)))
> > >
> > >  #define _mm512_mask_reduce_round_ph(A, B, C, D, E)                     \
> > > -  (__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm512_maskz_reduce_round_ph(A, B, C, D)                       \
> > > -  (__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C),                        \
> > > -                                             _mm512_setzero_ph (),     \
> > > -                                             (A), (D)))
> > > +#define _mm512_maskz_reduce_round_ph(A, B, C, D)               \
> > > +  (__builtin_ia32_reduceph512_mask_round ((B), (C),            \
> > > +                                         _mm512_setzero_ph (), \
> > > +                                         (A), (D)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -1718,10 +1718,10 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  (__mmask8) -1,
> > > -                                                  _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
> > > +                                            _mm_setzero_ph (),
> > > +                                            (__mmask8) -1,
> > > +                                            _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1729,26 +1729,26 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                     __m128h __D, int __E)
> > >  {
> > > -  return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A, __B,
> > > -                                                  _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
> > > +                                            _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
> > >  {
> > > -  return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
> > > -                                                  _mm_setzero_ph (), __A,
> > > -                                                  _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
> > > +                                            _mm_setzero_ph (), __A,
> > > +                                            _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
> > >  {
> > > -  return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  (__mmask8) -1, __D);
> > > +  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
> > > +                                            _mm_setzero_ph (),
> > > +                                            (__mmask8) -1, __D);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1756,8 +1756,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                           __m128h __D, int __E, const int __F)
> > >  {
> > > -  return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A,
> > > -                                                  __B, __F);
> > > +  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
> > > +                                            __B, __F);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1765,81 +1765,81 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                            int __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  __A, __E);
> > > +  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
> > > +                                            _mm_setzero_ph (),
> > > +                                            __A, __E);
> > >  }
> > >
> > >  #else
> > >  #define _mm_reduce_sh(A, B, C)                                         \
> > > -  (__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C),            \
> > > -                                            _mm_setzero_ph (), \
> > > -                                            (__mmask8)-1,              \
> > > -                                            _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_reducesh_mask_round ((A), (B), (C),                  \
> > > +                                      _mm_setzero_ph (),               \
> > > +                                      (__mmask8)-1,                    \
> > > +                                      _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #define _mm_mask_reduce_sh(A, B, C, D, E)                              \
> > > -  (__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B),  \
> > > -                                            _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B),                \
> > > +                                      _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #define _mm_maskz_reduce_sh(A, B, C, D)                                        \
> > > -  (__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D),            \
> > > -                                            _mm_setzero_ph (), \
> > > -                                            (A), _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_reducesh_mask_round ((B), (C), (D),                  \
> > > +                                      _mm_setzero_ph (),               \
> > > +                                      (A), _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #define _mm_reduce_round_sh(A, B, C, D)                                \
> > > -  (__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C),    \
> > > -                                            _mm_setzero_ph (), \
> > > -                                            (__mmask8)-1, (D)))
> > > +  (__builtin_ia32_reducesh_mask_round ((A), (B), (C),          \
> > > +                                      _mm_setzero_ph (),       \
> > > +                                      (__mmask8)-1, (D)))
> > >
> > >  #define _mm_mask_reduce_round_sh(A, B, C, D, E, F)                     \
> > > -  (__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
> > > +  (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
> > >
> > >  #define _mm_maskz_reduce_round_sh(A, B, C, D, E)               \
> > > -  (__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D),    \
> > > -                                            _mm_setzero_ph (), \
> > > -                                            (A), (E)))
> > > +  (__builtin_ia32_reducesh_mask_round ((B), (C), (D),          \
> > > +                                      _mm_setzero_ph (),       \
> > > +                                      (A), (E)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > >  /* Intrinsics vrndscaleph.  */
> > >  #ifdef __OPTIMIZE__
> > >  extern __inline __m512h
> > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_roundscale_ph (__m512h __A, int __B)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
> > > -                                                     _mm512_setzero_ph (),
> > > -                                                     (__mmask32) -1,
> > > -                                                     _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
> > > +                                                 _mm512_setzero_ph (),
> > > +                                                 (__mmask32) -1,
> > > +                                                 _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
> > > -                                __m512h __C, int __D)
> > > +                          __m512h __C, int __D)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A, __B,
> > > -                                                     _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
> > > +                                                 _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
> > > -                                                     _mm512_setzero_ph (),
> > > -                                                     __A,
> > > -                                                     _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
> > > +                                                 _mm512_setzero_ph (),
> > > +                                                 __A,
> > > +                                                 _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m512h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
> > > -                                                     _mm512_setzero_ph (),
> > > -                                                     (__mmask32) -1,
> > > -                                                     __C);
> > > +  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
> > > +                                                 _mm512_setzero_ph (),
> > > +                                                 (__mmask32) -1,
> > > +                                                 __C);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -1847,8 +1847,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
> > >                                  __m512h __C, int __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A,
> > > -                                                     __B, __E);
> > > +  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
> > > +                                                 __B, __E);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -1856,52 +1856,52 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
> > >                                   const int __D)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
> > > -                                                     _mm512_setzero_ph (),
> > > -                                                     __A, __D);
> > > +  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
> > > +                                                 _mm512_setzero_ph (),
> > > +                                                 __A, __D);
> > >  }
> > >
> > >  #else
> > > -#define _mm512_roundscale_ph(A, B) \
> > > -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B),              \
> > > -                                               _mm512_setzero_ph (),   \
> > > -                                               (__mmask32)-1,          \
> > > -                                               _MM_FROUND_CUR_DIRECTION))
> > > -
> > > -#define _mm512_mask_roundscale_ph(A, B, C, D) \
> > > -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B),    \
> > > -                                               _MM_FROUND_CUR_DIRECTION))
> > > -
> > > -#define _mm512_maskz_roundscale_ph(A, B, C) \
> > > -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C),              \
> > > -                                               _mm512_setzero_ph (),   \
> > > -                                               (A),                    \
> > > -                                               _MM_FROUND_CUR_DIRECTION))
> > > -#define _mm512_roundscale_round_ph(A, B, C) \
> > > -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B),              \
> > > -                                               _mm512_setzero_ph (),   \
> > > -                                               (__mmask32)-1, (C)))
> > > +#define _mm512_roundscale_ph(A, B)                                     \
> > > +  (__builtin_ia32_rndscaleph512_mask_round ((A), (B),                  \
> > > +                                           _mm512_setzero_ph (),       \
> > > +                                           (__mmask32)-1,              \
> > > +                                           _MM_FROUND_CUR_DIRECTION))
> > > +
> > > +#define _mm512_mask_roundscale_ph(A, B, C, D)                          \
> > > +  (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B),                \
> > > +                                           _MM_FROUND_CUR_DIRECTION))
> > > +
> > > +#define _mm512_maskz_roundscale_ph(A, B, C)                            \
> > > +  (__builtin_ia32_rndscaleph512_mask_round ((B), (C),                  \
> > > +                                           _mm512_setzero_ph (),       \
> > > +                                           (A),                        \
> > > +                                           _MM_FROUND_CUR_DIRECTION))
> > > +#define _mm512_roundscale_round_ph(A, B, C)                            \
> > > +  (__builtin_ia32_rndscaleph512_mask_round ((A), (B),                  \
> > > +                                           _mm512_setzero_ph (),       \
> > > +                                           (__mmask32)-1, (C)))
> > >
> > >  #define _mm512_mask_roundscale_round_ph(A, B, C, D, E)                 \
> > > -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B), (E)))
> > > +  (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
> > >
> > > -#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
> > > -  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C),              \
> > > -                                               _mm512_setzero_ph (),   \
> > > -                                               (A), (D)))
> > > +#define _mm512_maskz_roundscale_round_ph(A, B, C, D)                   \
> > > +  (__builtin_ia32_rndscaleph512_mask_round ((B), (C),                  \
> > > +                                           _mm512_setzero_ph (),       \
> > > +                                           (A), (D)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > >  /* Intrinsics vrndscalesh.  */
> > >  #ifdef __OPTIMIZE__
> > >  extern __inline __m128h
> > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
> > > -                                                    _mm_setzero_ph (),
> > > -                                                    (__mmask8) -1,
> > > -                                                    _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
> > > +                                              _mm_setzero_ph (),
> > > +                                              (__mmask8) -1,
> > > +                                              _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1909,27 +1909,27 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                         __m128h __D, int __E)
> > >  {
> > > -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E, __A, __B,
> > > -                                                    _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
> > > +                                              _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
> > >  {
> > > -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
> > > -                                                    _mm_setzero_ph (), __A,
> > > -                                                    _MM_FROUND_CUR_DIRECTION);
> > > +  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
> > > +                                              _mm_setzero_ph (), __A,
> > > +                                              _MM_FROUND_CUR_DIRECTION);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
> > >  {
> > > -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
> > > -                                                    _mm_setzero_ph (),
> > > -                                                    (__mmask8) -1,
> > > -                                                    __D);
> > > +  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
> > > +                                              _mm_setzero_ph (),
> > > +                                              (__mmask8) -1,
> > > +                                              __D);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1937,8 +1937,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
> > >                               __m128h __D, int __E, const int __F)
> > >  {
> > > -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E,
> > > -                                                    __A, __B, __F);
> > > +  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
> > > +                                              __A, __B, __F);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -1946,46 +1946,46 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
> > >                                int __D, const int __E)
> > >  {
> > > -  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
> > > -                                                    _mm_setzero_ph (),
> > > -                                                    __A, __E);
> > > +  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
> > > +                                              _mm_setzero_ph (),
> > > +                                              __A, __E);
> > >  }
> > >
> > >  #else
> > >  #define _mm_roundscale_sh(A, B, C)                                     \
> > > -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C),          \
> > > -                                              _mm_setzero_ph (),       \
> > > -                                              (__mmask8)-1, \
> > > -                                              _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),                        \
> > > +                                        _mm_setzero_ph (),             \
> > > +                                        (__mmask8)-1,                  \
> > > +                                        _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #define _mm_mask_roundscale_sh(A, B, C, D, E)                          \
> > > -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), \
> > > -                                              _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B),      \
> > > +                                        _MM_FROUND_CUR_DIRECTION))
> > >
> > >  #define _mm_maskz_roundscale_sh(A, B, C, D)                            \
> > > -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D),          \
> > > -                                              _mm_setzero_ph (),       \
> > > -                                              (A), _MM_FROUND_CUR_DIRECTION))
> > > +  (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),                        \
> > > +                                        _mm_setzero_ph (),             \
> > > +                                        (A), _MM_FROUND_CUR_DIRECTION))
> > >
> > > -#define _mm_roundscale_round_sh(A, B, C, D)                            \
> > > -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C),          \
> > > -                                              _mm_setzero_ph (),       \
> > > -                                              (__mmask8)-1, (D)))
> > > +#define _mm_roundscale_round_sh(A, B, C, D)                    \
> > > +  (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),                \
> > > +                                        _mm_setzero_ph (),     \
> > > +                                        (__mmask8)-1, (D)))
> > >
> > >  #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F)                 \
> > > -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
> > > +  (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
> > >
> > > -#define _mm_maskz_roundscale_round_sh(A, B, C, D, E)                   \
> > > -  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D),          \
> > > -                                              _mm_setzero_ph (),       \
> > > -                                              (A), (E)))
> > > +#define _mm_maskz_roundscale_round_sh(A, B, C, D, E)           \
> > > +  (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),                \
> > > +                                        _mm_setzero_ph (),     \
> > > +                                        (A), (E)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > >  /* Intrinsics vfpclasssh.  */
> > >  #ifdef __OPTIMIZE__
> > >  extern __inline __mmask8
> > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_fpclass_sh_mask (__m128h __A, const int __imm)
> > >  {
> > >    return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
> > > @@ -2031,11 +2031,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
> > >
> > >  #else
> > >  #define _mm512_mask_fpclass_ph_mask(u, x, c)                           \
> > > -  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x),\
> > > +  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
> > >                                                  (int) (c),(__mmask8)(u)))
> > >
> > >  #define _mm512_fpclass_ph_mask(x, c)                                    \
> > > -  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x),\
> > > +  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
> > >                                                  (int) (c),(__mmask8)-1))
> > >  #endif /* __OPIMTIZE__ */
> > >
> > > @@ -2141,9 +2141,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_getexp_round_ph (__m512h __A, const int __R)
> > >  {
> > >    return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
> > > -                                                  (__v32hf)
> > > -                                                  _mm512_setzero_ph (),
> > > -                                                  (__mmask32) -1, __R);
> > > +                                                   (__v32hf)
> > > +                                                   _mm512_setzero_ph (),
> > > +                                                   (__mmask32) -1, __R);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -2152,8 +2152,8 @@ _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
> > >                              const int __R)
> > >  {
> > >    return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
> > > -                                                  (__v32hf) __W,
> > > -                                                  (__mmask32) __U, __R);
> > > +                                                   (__v32hf) __W,
> > > +                                                   (__mmask32) __U, __R);
> > >  }
> > >
> > >  extern __inline __m512h
> > > @@ -2161,37 +2161,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
> > >  {
> > >    return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
> > > -                                                  (__v32hf)
> > > -                                                  _mm512_setzero_ph (),
> > > -                                                  (__mmask32) __U, __R);
> > > +                                                   (__v32hf)
> > > +                                                   _mm512_setzero_ph (),
> > > +                                                   (__mmask32) __U, __R);
> > >  }
> > >
> > >  #else
> > > -#define _mm_getexp_round_sh(A, B, R)                                           \
> > > -  ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),           \
> > > -                                              (__v8hf)(__m128h)(B),            \
> > > -                                              (__v8hf)_mm_setzero_ph(),        \
> > > +#define _mm_getexp_round_sh(A, B, R)                                   \
> > > +  ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),   \
> > > +                                              (__v8hf)(__m128h)(B),    \
> > > +                                              (__v8hf)_mm_setzero_ph(), \
> > >                                                (__mmask8)-1, R))
> > >
> > > -#define _mm_mask_getexp_round_sh(W, U, A, B, C)                                        \
> > > +#define _mm_mask_getexp_round_sh(W, U, A, B, C)                        \
> > >    (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
> > >
> > > -#define _mm_maskz_getexp_round_sh(U, A, B, C)                                  \
> > > -  (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,                            \
> > > -                                             (__v8hf)_mm_setzero_ph(),         \
> > > +#define _mm_maskz_getexp_round_sh(U, A, B, C)                          \
> > > +  (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,                    \
> > > +                                             (__v8hf)_mm_setzero_ph(), \
> > >                                               U, C)
> > >
> > > -#define _mm512_getexp_round_ph(A, R)                                           \
> > > -  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),             \
> > > -  (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
> > > +#define _mm512_getexp_round_ph(A, R)                                   \
> > > +  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),     \
> > > +                                           (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
> > >
> > > -#define _mm512_mask_getexp_round_ph(W, U, A, R)                                        \
> > > -  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),             \
> > > -  (__v32hf)(__m512h)(W), (__mmask32)(U), R))
> > > +#define _mm512_mask_getexp_round_ph(W, U, A, R)                                \
> > > +  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),     \
> > > +                                           (__v32hf)(__m512h)(W), (__mmask32)(U), R))
> > >
> > > -#define _mm512_maskz_getexp_round_ph(U, A, R)                                  \
> > > -  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),             \
> > > -  (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
> > > +#define _mm512_maskz_getexp_round_ph(U, A, R)                          \
> > > +  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),     \
> > > +                                           (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
> > > index 206d60407fc..e9478792a03 100644
> > > --- a/gcc/config/i386/avx512fp16vlintrin.h
> > > +++ b/gcc/config/i386/avx512fp16vlintrin.h
> > > @@ -53,30 +53,30 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vaddph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
> > >  {
> > > -  return __builtin_ia32_vaddph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vaddph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vaddph_v16hf_mask (__B, __C,
> > > -                                          _mm256_setzero_ph (), __A);
> > > +  return __builtin_ia32_addph256_mask (__B, __C,
> > > +                                      _mm256_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -97,30 +97,30 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vsubph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
> > >  {
> > > -  return __builtin_ia32_vsubph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vsubph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vsubph_v16hf_mask (__B, __C,
> > > -                                          _mm256_setzero_ph (), __A);
> > > +  return __builtin_ia32_subph256_mask (__B, __C,
> > > +                                      _mm256_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -141,30 +141,30 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vmulph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
> > >  {
> > > -  return __builtin_ia32_vmulph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vmulph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vmulph_v16hf_mask (__B, __C,
> > > -                                          _mm256_setzero_ph (), __A);
> > > +  return __builtin_ia32_mulph256_mask (__B, __C,
> > > +                                      _mm256_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -185,30 +185,30 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vdivph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
> > >  {
> > > -  return __builtin_ia32_vdivph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vdivph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vdivph_v16hf_mask (__B, __C,
> > > -                                          _mm256_setzero_ph (), __A);
> > > +  return __builtin_ia32_divph256_mask (__B, __C,
> > > +                                      _mm256_setzero_ph (), __A);
> > >  }
> > >
> > >  /* Intrinsics v[max,min]ph.  */
> > > @@ -216,96 +216,96 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_max_ph (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v8hf_mask (__A, __B,
> > > -                                         _mm_setzero_ph (),
> > > -                                         (__mmask8) -1);
> > > +  return __builtin_ia32_maxph128_mask (__A, __B,
> > > +                                      _mm_setzero_ph (),
> > > +                                      (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_max_ph (__m256h __A, __m256h __B)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v16hf_mask (__A, __B,
> > > -                                         _mm256_setzero_ph (),
> > > -                                         (__mmask16) -1);
> > > +  return __builtin_ia32_maxph256_mask (__A, __B,
> > > +                                      _mm256_setzero_ph (),
> > > +                                      (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vmaxph_v16hf_mask (__B, __C,
> > > -                                          _mm256_setzero_ph (), __A);
> > > +  return __builtin_ia32_maxph256_mask (__B, __C,
> > > +                                      _mm256_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_min_ph (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vminph_v8hf_mask (__A, __B,
> > > -                                         _mm_setzero_ph (),
> > > -                                         (__mmask8) -1);
> > > +  return __builtin_ia32_minph128_mask (__A, __B,
> > > +                                      _mm_setzero_ph (),
> > > +                                      (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_min_ph (__m256h __A, __m256h __B)
> > >  {
> > > -  return __builtin_ia32_vminph_v16hf_mask (__A, __B,
> > > -                                         _mm256_setzero_ph (),
> > > -                                         (__mmask16) -1);
> > > +  return __builtin_ia32_minph256_mask (__A, __B,
> > > +                                      _mm256_setzero_ph (),
> > > +                                      (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vminph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
> > >  {
> > > -  return __builtin_ia32_vminph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vminph_v8hf_mask (__B, __C, _mm_setzero_ph (),
> > > -                                         __A);
> > > +  return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vminph_v16hf_mask (__B, __C,
> > > -                                          _mm256_setzero_ph (), __A);
> > > +  return __builtin_ia32_minph256_mask (__B, __C,
> > > +                                      _mm256_setzero_ph (), __A);
> > >  }
> > >
> > >  /* vcmpph */
> > > @@ -314,8 +314,8 @@ extern __inline __mmask8
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
> > >  {
> > > -  return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__A, __B, __C,
> > > -                                                    (__mmask8) -1);
> > > +  return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
> > > +                                                 (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __mmask8
> > > @@ -323,15 +323,15 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
> > >                       const int __D)
> > >  {
> > > -  return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__B, __C, __D, __A);
> > > +  return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
> > >  }
> > >
> > >  extern __inline __mmask16
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
> > >  {
> > > -  return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__A, __B, __C,
> > > -                                                      (__mmask16) -1);
> > > +  return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
> > > +                                                  (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __mmask16
> > > @@ -339,22 +339,22 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
> > >                       const int __D)
> > >  {
> > > -  return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__B, __C, __D,
> > > -                                                      __A);
> > > +  return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
> > > +                                                  __A);
> > >  }
> > >
> > >  #else
> > > -#define _mm_cmp_ph_mask(A, B, C)               \
> > > -  (__builtin_ia32_vcmpph_v8hf_mask ((A), (B), (C), (-1)))
> > > +#define _mm_cmp_ph_mask(A, B, C)                       \
> > > +  (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
> > >
> > > -#define _mm_mask_cmp_ph_mask(A, B, C, D)       \
> > > -  (__builtin_ia32_vcmpph_v8hf_mask ((B), (C), (D), (A)))
> > > +#define _mm_mask_cmp_ph_mask(A, B, C, D)               \
> > > +  (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
> > >
> > > -#define _mm256_cmp_ph_mask(A, B, C)            \
> > > -  (__builtin_ia32_vcmpph_v16hf_mask ((A), (B), (C), (-1)))
> > > +#define _mm256_cmp_ph_mask(A, B, C)                    \
> > > +  (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
> > >
> > > -#define _mm256_mask_cmp_ph_mask(A, B, C, D)    \
> > > -  (__builtin_ia32_vcmpph_v16hf_mask ((B), (C), (D), (A)))
> > > +#define _mm256_mask_cmp_ph_mask(A, B, C, D)            \
> > > +  (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -363,46 +363,46 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_sqrt_ph (__m128h __A)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v8hf_mask (__A, _mm_setzero_ph (),
> > > -                                          (__mmask8) -1);
> > > +  return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
> > > +                                       (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_sqrt_ph (__m256h __A)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v16hf_mask (__A, _mm256_setzero_ph (),
> > > -                                           (__mmask16) -1);
> > > +  return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
> > > +                                       (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v8hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v16hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v8hf_mask (__B, _mm_setzero_ph (),
> > > -                                          __A);
> > > +  return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
> > > +                                       __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
> > >  {
> > > -  return __builtin_ia32_vsqrtph_v16hf_mask (__B, _mm256_setzero_ph (),
> > > -                                           __A);
> > > +  return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
> > > +                                       __A);
> > >  }
> > >
> > >  /* Intrinsics vrsqrtph.  */
> > > @@ -410,45 +410,45 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_rsqrt_ph (__m128h __A)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v8hf_mask (__A, _mm_setzero_ph (),
> > > -                                           (__mmask8) -1);
> > > +  return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
> > > +                                        (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_rsqrt_ph (__m256h __A)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v16hf_mask (__A, _mm256_setzero_ph (),
> > > -                                            (__mmask16) -1);
> > > +  return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
> > > +                                        (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v8hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v16hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v8hf_mask (__B, _mm_setzero_ph (), __A);
> > > +  return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
> > >  {
> > > -  return __builtin_ia32_vrsqrtph_v16hf_mask (__B, _mm256_setzero_ph (),
> > > -                                            __A);
> > > +  return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
> > > +                                        __A);
> > >  }
> > >
> > >  /* Intrinsics vrcpph.  */
> > > @@ -456,45 +456,45 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_rcp_ph (__m128h __A)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v8hf_mask (__A, _mm_setzero_ph (),
> > > -                                         (__mmask8) -1);
> > > +  return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
> > > +                                      (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_rcp_ph (__m256h __A)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v16hf_mask (__A, _mm256_setzero_ph (),
> > > -                                          (__mmask16) -1);
> > > +  return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
> > > +                                      (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v8hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_rcpph128_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v16hf_mask (__C, __A, __B);
> > > +  return __builtin_ia32_rcpph256_mask (__C, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v8hf_mask (__B, _mm_setzero_ph (), __A);
> > > +  return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
> > >  {
> > > -  return __builtin_ia32_vrcpph_v16hf_mask (__B, _mm256_setzero_ph (),
> > > -                                          __A);
> > > +  return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
> > > +                                      __A);
> > >  }
> > >
> > >  /* Intrinsics vscalefph.  */
> > > @@ -502,25 +502,25 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_scalef_ph (__m128h __A, __m128h __B)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v8hf_mask (__A, __B,
> > > -                                            _mm_setzero_ph (),
> > > -                                            (__mmask8) -1);
> > > +  return __builtin_ia32_scalefph128_mask (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_scalef_ph (__m256h __A, __m256h __B)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v16hf_mask (__A, __B,
> > > -                                             _mm256_setzero_ph (),
> > > -                                             (__mmask16) -1);
> > > +  return __builtin_ia32_scalefph256_mask (__A, __B,
> > > +                                         _mm256_setzero_ph (),
> > > +                                         (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > > @@ -528,24 +528,24 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
> > >                        __m256h __D)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v8hf_mask (__B, __C,
> > > -                                            _mm_setzero_ph (), __A);
> > > +  return __builtin_ia32_scalefph128_mask (__B, __C,
> > > +                                         _mm_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
> > >  {
> > > -  return __builtin_ia32_vscalefph_v16hf_mask (__B, __C,
> > > -                                             _mm256_setzero_ph (),
> > > -                                             __A);
> > > +  return __builtin_ia32_scalefph256_mask (__B, __C,
> > > +                                         _mm256_setzero_ph (),
> > > +                                         __A);
> > >  }
> > >
> > >  /* Intrinsics vreduceph.  */
> > > @@ -554,109 +554,109 @@ extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_reduce_ph (__m128h __A, int __B)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v8hf_mask (__A, __B,
> > > -                                            _mm_setzero_ph (),
> > > -                                            (__mmask8) -1);
> > > +  return __builtin_ia32_reduceph128_mask (__A, __B,
> > > +                                         _mm_setzero_ph (),
> > > +                                         (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v8hf_mask (__B, __C,
> > > -                                            _mm_setzero_ph (), __A);
> > > +  return __builtin_ia32_reduceph128_mask (__B, __C,
> > > +                                         _mm_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_reduce_ph (__m256h __A, int __B)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v16hf_mask (__A, __B,
> > > -                                             _mm256_setzero_ph (),
> > > -                                             (__mmask16) -1);
> > > +  return __builtin_ia32_reduceph256_mask (__A, __B,
> > > +                                         _mm256_setzero_ph (),
> > > +                                         (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vreduceph_v16hf_mask (__B, __C,
> > > -                                             _mm256_setzero_ph (),
> > > -                                             __A);
> > > +  return __builtin_ia32_reduceph256_mask (__B, __C,
> > > +                                         _mm256_setzero_ph (),
> > > +                                         __A);
> > >  }
> > >
> > >  #else
> > > -#define _mm_reduce_ph(A, B)                                    \
> > > -  (__builtin_ia32_vreduceph_v8hf_mask ((A), (B),\
> > > -                                      _mm_setzero_ph (),       \
> > > -                                      ((__mmask8)-1)))
> > > +#define _mm_reduce_ph(A, B)                            \
> > > +  (__builtin_ia32_reduceph128_mask ((A), (B),          \
> > > +                                   _mm_setzero_ph (),  \
> > > +                                   ((__mmask8)-1)))
> > >
> > > -#define _mm_mask_reduce_ph(A,  B,  C, D)               \
> > > -  (__builtin_ia32_vreduceph_v8hf_mask ((C), (D), (A), (B)))
> > > +#define _mm_mask_reduce_ph(A,  B,  C, D)                       \
> > > +  (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
> > >
> > > -#define _mm_maskz_reduce_ph(A,  B, C)                          \
> > > -  (__builtin_ia32_vreduceph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
> > > +#define _mm_maskz_reduce_ph(A,  B, C)                                  \
> > > +  (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
> > >
> > >  #define _mm256_reduce_ph(A, B)                                 \
> > > -  (__builtin_ia32_vreduceph_v16hf_mask ((A), (B),\
> > > -                                       _mm256_setzero_ph (),   \
> > > -                                       ((__mmask16)-1)))
> > > +  (__builtin_ia32_reduceph256_mask ((A), (B),                  \
> > > +                                   _mm256_setzero_ph (),       \
> > > +                                   ((__mmask16)-1)))
> > >
> > > -#define _mm256_mask_reduce_ph(A, B, C, D)              \
> > > -  (__builtin_ia32_vreduceph_v16hf_mask ((C), (D), (A), (B)))
> > > +#define _mm256_mask_reduce_ph(A, B, C, D)                      \
> > > +  (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
> > >
> > > -#define _mm256_maskz_reduce_ph(A, B, C)                                \
> > > -  (__builtin_ia32_vreduceph_v16hf_mask ((B), (C), _mm256_setzero_ph (), (A)))
> > > +#define _mm256_maskz_reduce_ph(A, B, C)                                        \
> > > +  (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > >  /* Intrinsics vrndscaleph.  */
> > >  #ifdef __OPTIMIZE__
> > > -extern __inline __m128h
> > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > -_mm_roundscale_ph (__m128h __A, int __B)
> > > -{
> > > -  return __builtin_ia32_vrndscaleph_v8hf_mask (__A, __B,
> > > -                                              _mm_setzero_ph (),
> > > -                                              (__mmask8) -1);
> > > -}
> > > +  extern __inline __m128h
> > > +  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > +  _mm_roundscale_ph (__m128h __A, int __B)
> > > +  {
> > > +    return __builtin_ia32_rndscaleph128_mask (__A, __B,
> > > +                                             _mm_setzero_ph (),
> > > +                                             (__mmask8) -1);
> > > +  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v8hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m128h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v8hf_mask (__B, __C,
> > > -                                              _mm_setzero_ph (), __A);
> > > +  return __builtin_ia32_rndscaleph128_mask (__B, __C,
> > > +                                           _mm_setzero_ph (), __A);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_roundscale_ph (__m256h __A, int __B)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v16hf_mask (__A, __B,
> > > -                                               _mm256_setzero_ph (),
> > > -                                               (__mmask16) -1);
> > > +  return __builtin_ia32_rndscaleph256_mask (__A, __B,
> > > +                                           _mm256_setzero_ph (),
> > > +                                           (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > > @@ -664,40 +664,40 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
> > >                            int __D)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v16hf_mask (__C, __D, __A, __B);
> > > +  return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
> > >  }
> > >
> > >  extern __inline __m256h
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
> > >  {
> > > -  return __builtin_ia32_vrndscaleph_v16hf_mask (__B, __C,
> > > -                                               _mm256_setzero_ph (),
> > > -                                               __A);
> > > +  return __builtin_ia32_rndscaleph256_mask (__B, __C,
> > > +                                           _mm256_setzero_ph (),
> > > +                                           __A);
> > >  }
> > >
> > >  #else
> > > -#define _mm_roundscale_ph(A, B) \
> > > -  (__builtin_ia32_vrndscaleph_v8hf_mask ((A), (B), _mm_setzero_ph (),  \
> > > -                                        ((__mmask8)-1)))
> > > +#define _mm_roundscale_ph(A, B)                                                \
> > > +  (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (),     \
> > > +                                     ((__mmask8)-1)))
> > >
> > > -#define _mm_mask_roundscale_ph(A, B, C, D) \
> > > -  (__builtin_ia32_vrndscaleph_v8hf_mask ((C), (D), (A), (B)))
> > > +#define _mm_mask_roundscale_ph(A, B, C, D)                     \
> > > +  (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
> > >
> > > -#define _mm_maskz_roundscale_ph(A, B, C) \
> > > -  (__builtin_ia32_vrndscaleph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
> > > +#define _mm_maskz_roundscale_ph(A, B, C)                               \
> > > +  (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
> > >
> > > -#define _mm256_roundscale_ph(A, B) \
> > > -  (__builtin_ia32_vrndscaleph_v16hf_mask ((A), (B),          \
> > > -                                        _mm256_setzero_ph(), \
> > > -                                         ((__mmask16)-1)))
> > > +#define _mm256_roundscale_ph(A, B)                             \
> > > +  (__builtin_ia32_rndscaleph256_mask ((A), (B),                        \
> > > +                                     _mm256_setzero_ph(),      \
> > > +                                     ((__mmask16)-1)))
> > >
> > > -#define _mm256_mask_roundscale_ph(A, B, C, D) \
> > > -  (__builtin_ia32_vrndscaleph_v16hf_mask ((C), (D), (A), (B)))
> > > +#define _mm256_mask_roundscale_ph(A, B, C, D)                  \
> > > +  (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
> > >
> > > -#define _mm256_maskz_roundscale_ph(A, B, C) \
> > > -  (__builtin_ia32_vrndscaleph_v16hf_mask ((B), (C),                    \
> > > -                                         _mm256_setzero_ph (), (A)))
> > > +#define _mm256_maskz_roundscale_ph(A, B, C)                            \
> > > +  (__builtin_ia32_rndscaleph256_mask ((B), (C),                                \
> > > +                                     _mm256_setzero_ph (), (A)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > @@ -705,7 +705,7 @@ _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
> > >  #ifdef __OPTIMIZE__
> > >  extern __inline __mmask8
> > >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > > -_mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
> > > +  _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
> > >  {
> > >    return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
> > >                                                       __imm, __U);
> > > @@ -725,7 +725,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
> > >  {
> > >    return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
> > > -                                                     __imm, __U);
> > > +                                                      __imm, __U);
> > >  }
> > >
> > >  extern __inline __mmask16
> > > @@ -733,26 +733,26 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_fpclass_ph_mask (__m256h __A, const int __imm)
> > >  {
> > >    return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
> > > -                                                     __imm,
> > > -                                                     (__mmask16) -1);
> > > +                                                      __imm,
> > > +                                                      (__mmask16) -1);
> > >  }
> > >
> > >  #else
> > >  #define _mm_fpclass_ph_mask(X, C)                                       \
> > > -  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),  \
> > > +  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),        \
> > >                                                 (int) (C),(__mmask8)-1))
> > >
> > >  #define _mm_mask_fpclass_ph_mask(u, X, C)                               \
> > > -  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),  \
> > > +  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),        \
> > >                                                 (int) (C),(__mmask8)(u)))
> > >
> > >  #define _mm256_fpclass_ph_mask(X, C)                                    \
> > > -  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X),  \
> > > -                                               (int) (C),(__mmask16)-1))
> > > +  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
> > > +                                                (int) (C),(__mmask16)-1))
> > >
> > >  #define _mm256_mask_fpclass_ph_mask(u, X, C)                           \
> > > -  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X),  \
> > > -                                               (int) (C),(__mmask16)(u)))
> > > +  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
> > > +                                                (int) (C),(__mmask16)(u)))
> > >  #endif /* __OPTIMIZE__ */
> > >
> > >  /* Intrinsics vgetexpph, vgetexpsh.  */
> > > @@ -761,9 +761,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_getexp_ph (__m256h __A)
> > >  {
> > >    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
> > > -                                                  (__v16hf)
> > > -                                                  _mm256_setzero_ph (),
> > > -                                                  (__mmask16) -1);
> > > +                                                   (__v16hf)
> > > +                                                   _mm256_setzero_ph (),
> > > +                                                   (__mmask16) -1);
> > >  }
> > >
> > >  extern __inline __m256h
> > > @@ -771,8 +771,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
> > >  {
> > >    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
> > > -                                                  (__v16hf) __W,
> > > -                                                  (__mmask16) __U);
> > > +                                                   (__v16hf) __W,
> > > +                                                   (__mmask16) __U);
> > >  }
> > >
> > >  extern __inline __m256h
> > > @@ -780,9 +780,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
> > >  {
> > >    return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
> > > -                                                  (__v16hf)
> > > -                                                  _mm256_setzero_ph (),
> > > -                                                  (__mmask16) __U);
> > > +                                                   (__v16hf)
> > > +                                                   _mm256_setzero_ph (),
> > > +                                                   (__mmask16) __U);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -790,9 +790,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_getexp_ph (__m128h __A)
> > >  {
> > >    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
> > > -                                                  (__v8hf)
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  (__mmask8) -1);
> > > +                                                   (__v8hf)
> > > +                                                   _mm_setzero_ph (),
> > > +                                                   (__mmask8) -1);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -800,8 +800,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
> > >  {
> > >    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
> > > -                                                  (__v8hf) __W,
> > > -                                                  (__mmask8) __U);
> > > +                                                   (__v8hf) __W,
> > > +                                                   (__mmask8) __U);
> > >  }
> > >
> > >  extern __inline __m128h
> > > @@ -809,9 +809,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > >  _mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
> > >  {
> > >    return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
> > > -                                                  (__v8hf)
> > > -                                                  _mm_setzero_ph (),
> > > -                                                  (__mmask8) __U);
> > > +                                                   (__v8hf)
> > > +                                                   _mm_setzero_ph (),
> > > +                                                   (__mmask8) __U);
> > >  }
> > >
> > >
> > > @@ -892,41 +892,41 @@ _mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
> > >  }
> > >
> > >  #else
> > > -#define _mm256_getmant_ph(X, B, C)                                              \
> > > -  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
> > > -                                        (int)(((C)<<2) | (B)),                 \
> > > -                                         (__v16hf)(__m256h)_mm256_setzero_ph (),\
> > > -                                         (__mmask16)-1))
> > > -
> > > -#define _mm256_mask_getmant_ph(W, U, X, B, C)                                   \
> > > -  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
> > > -                                        (int)(((C)<<2) | (B)),                 \
> > > -                                         (__v16hf)(__m256h)(W),                 \
> > > -                                         (__mmask16)(U)))
> > > -
> > > -#define _mm256_maskz_getmant_ph(U, X, B, C)                                     \
> > > -  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
> > > -                                        (int)(((C)<<2) | (B)),                 \
> > > -                                         (__v16hf)(__m256h)_mm256_setzero_ph (),\
> > > -                                         (__mmask16)(U)))
> > > -
> > > -#define _mm_getmant_ph(X, B, C)                                                 \
> > > -  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
> > > -                                        (int)(((C)<<2) | (B)),                 \
> > > -                                         (__v8hf)(__m128h)_mm_setzero_ph (),   \
> > > -                                         (__mmask8)-1))
> > > -
> > > -#define _mm_mask_getmant_ph(W, U, X, B, C)                                      \
> > > -  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
> > > -                                        (int)(((C)<<2) | (B)),                 \
> > > -                                         (__v8hf)(__m128h)(W),                 \
> > > -                                         (__mmask8)(U)))
> > > -
> > > -#define _mm_maskz_getmant_ph(U, X, B, C)                                        \
> > > -  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
> > > -                                        (int)(((C)<<2) | (B)),                 \
> > > -                                         (__v8hf)(__m128h)_mm_setzero_ph (),   \
> > > -                                         (__mmask8)(U)))
> > > +#define _mm256_getmant_ph(X, B, C)                                     \
> > > +  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
> > > +                                              (int)(((C)<<2) | (B)),   \
> > > +                                              (__v16hf)(__m256h)_mm256_setzero_ph (), \
> > > +                                              (__mmask16)-1))
> > > +
> > > +#define _mm256_mask_getmant_ph(W, U, X, B, C)                          \
> > > +  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
> > > +                                              (int)(((C)<<2) | (B)),   \
> > > +                                              (__v16hf)(__m256h)(W),   \
> > > +                                              (__mmask16)(U)))
> > > +
> > > +#define _mm256_maskz_getmant_ph(U, X, B, C)                            \
> > > +  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
> > > +                                              (int)(((C)<<2) | (B)),   \
> > > +                                              (__v16hf)(__m256h)_mm256_setzero_ph (), \
> > > +                                              (__mmask16)(U)))
> > > +
> > > +#define _mm_getmant_ph(X, B, C)                                                \
> > > +  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),  \
> > > +                                              (int)(((C)<<2) | (B)),   \
> > > +                                              (__v8hf)(__m128h)_mm_setzero_ph (), \
> > > +                                              (__mmask8)-1))
> > > +
> > > +#define _mm_mask_getmant_ph(W, U, X, B, C)                             \
> > > +  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),  \
> > > +                                              (int)(((C)<<2) | (B)),   \
> > > +                                              (__v8hf)(__m128h)(W),    \
> > > +                                              (__mmask8)(U)))
> > > +
> > > +#define _mm_maskz_getmant_ph(U, X, B, C)                               \
> > > +  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),  \
> > > +                                              (int)(((C)<<2) | (B)),   \
> > > +                                              (__v8hf)(__m128h)_mm_setzero_ph (), \
> > > +                                              (__mmask8)(U)))
> > >
> > >  #endif /* __OPTIMIZE__ */
> > >
> > > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> > > index bfa1d56a7b8..10f6fd87cbb 100644
> > > --- a/gcc/config/i386/i386-builtin.def
> > > +++ b/gcc/config/i386/i386-builtin.def
> > > @@ -2775,49 +2775,49 @@ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__b
> > >  BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
> > >
> > >  /* AVX512FP16.  */
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_vaddph_v8hf_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_vaddph_v16hf_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_vaddph_v32hf_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_vsubph_v8hf_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_vsubph_v16hf_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_vsubph_v32hf_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_vmulph_v8hf_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_vmulph_v16hf_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_vmulph_v32hf_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_vdivph_v8hf_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_vdivph_v16hf_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_vdivph_v32hf_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_vaddsh_v8hf_mask", IX86_BUILTIN_VADDSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_vsubsh_v8hf_mask", IX86_BUILTIN_VSUBSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_vmulsh_v8hf_mask", IX86_BUILTIN_VMULSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_vdivsh_v8hf_mask", IX86_BUILTIN_VDIVSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_vmaxph_v8hf_mask", IX86_BUILTIN_VMAXPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_vmaxph_v16hf_mask", IX86_BUILTIN_VMAXPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_vmaxph_v32hf_mask", IX86_BUILTIN_VMAXPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_vminph_v8hf_mask", IX86_BUILTIN_VMINPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_vminph_v16hf_mask", IX86_BUILTIN_VMINPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_vminph_v32hf_mask", IX86_BUILTIN_VMINPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_vmaxsh_v8hf_mask", IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_vminsh_v8hf_mask", IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_vcmpph_v8hf_mask", IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_vcmpph_v16hf_mask", IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_vcmpph_v32hf_mask", IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_vsqrtph_v8hf_mask", IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_vsqrtph_v16hf_mask", IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_vrsqrtph_v8hf_mask", IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_vrsqrtph_v16hf_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_vrsqrtph_v32hf_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_vrsqrtsh_v8hf_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_vrcpph_v8hf_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_vrcpph_v16hf_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_vrcpph_v32hf_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_vrcpsh_v8hf_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_vscalefph_v8hf_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_vscalefph_v16hf_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_vreduceph_v8hf_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_vreduceph_v16hf_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_vrndscaleph_v8hf_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> > > -BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_vrndscaleph_v16hf_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_VADDSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_VSUBSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_VMULSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_VDIVSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_VMAXPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_VMAXPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_VMAXPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_VMINPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_VMINPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_VMINPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_reduceph128_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_reduceph256_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_rndscaleph128_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
> > > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
> > >  BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI)
> > >  BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
> > >  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
> > > @@ -3027,28 +3027,28 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "_
> > >  BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
> > >
> > >  /* AVX512FP16.  */
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_vaddph_v32hf_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_vsubph_v32hf_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_vmulph_v32hf_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_vdivph_v32hf_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_vaddsh_v8hf_mask_round", IX86_BUILTIN_VADDSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_vsubsh_v8hf_mask_round", IX86_BUILTIN_VSUBSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_vmulsh_v8hf_mask_round", IX86_BUILTIN_VMULSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_vdivsh_v8hf_mask_round", IX86_BUILTIN_VDIVSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_vmaxph_v32hf_mask_round", IX86_BUILTIN_VMAXPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_vminph_v32hf_mask_round", IX86_BUILTIN_VMINPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_vmaxsh_v8hf_mask_round", IX86_BUILTIN_VMAXSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_vminsh_v8hf_mask_round", IX86_BUILTIN_VMINSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_vcmpph_v32hf_mask_round", IX86_BUILTIN_VCMPPH_V32HF_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_vcmpsh_v8hf_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_vsqrtph_v32hf_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_vsqrtsh_v8hf_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_vscalefph_v32hf_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_vscalefsh_v8hf_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_vreduceph_v32hf_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_vreducesh_v8hf_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_vrndscaleph_v32hf_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> > > -BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_vrndscalesh_v8hf_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_VADDSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_VSUBSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_VMULSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_VDIVSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_VMAXPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_VMINPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_VMAXSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_VMINSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_VCMPPH_V32HF_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> > > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
> > >  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
> > >  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
> > >  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
> > > diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
> > > index b3cffa0644f..3a96e586418 100644
> > > --- a/gcc/testsuite/gcc.target/i386/avx-1.c
> > > +++ b/gcc/testsuite/gcc.target/i386/avx-1.c
> > > @@ -686,33 +686,33 @@
> > >  #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
> > >
> > >  /* avx512fp16intrin.h */
> > > -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
> > > -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
> > > -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
> > > -#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
> > > -#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
> > > -#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
> > > -#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
> > > -#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
> > > -#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
> > > +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
> > > +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
> > > +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
> > > +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
> > > +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
> > > +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
> > > +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
> > > +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
> > > +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
> > > +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
> > > +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
> > > +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
> > >  #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
> > >  #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
> > >  #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
> > > @@ -721,8 +721,8 @@
> > >  #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
> > >
> > >  /* avx512fp16vlintrin.h */
> > > -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> > > -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
> > >  #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
> > >  #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
> > >  #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
> > > index 67ef567e437..aafcd414530 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> > > @@ -703,33 +703,33 @@
> > >  #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
> > >
> > >  /* avx512fp16intrin.h */
> > > -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
> > > -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
> > > -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
> > > -#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
> > > -#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
> > > -#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
> > > -#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
> > > -#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
> > > -#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
> > > +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
> > > +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
> > > +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
> > > +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
> > > +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
> > > +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
> > > +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
> > > +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
> > > +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
> > > +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
> > > +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
> > > +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
> > >  #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
> > >  #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
> > >  #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
> > > @@ -738,8 +738,8 @@
> > >  #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
> > >
> > >  /* avx512fp16vlintrin.h */
> > > -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> > > -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
> > >  #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
> > >  #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
> > >  #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
> > > diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
> > > index b3f07587acb..8b600282c67 100644
> > > --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> > > +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> > > @@ -704,33 +704,33 @@
> > >  #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
> > >
> > >  /* avx512fp16intrin.h */
> > > -#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
> > > -#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
> > > -#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
> > > -#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
> > > -#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
> > > -#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
> > > -#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
> > > -#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
> > > -#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
> > > -#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
> > > -#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
> > > +#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
> > > +#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
> > > +#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
> > > +#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
> > > +#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
> > > +#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
> > > +#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
> > > +#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
> > > +#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
> > > +#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
> > > +#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
> > > +#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
> > > +#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
> > >  #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
> > >  #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
> > >  #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
> > > @@ -739,8 +739,8 @@
> > >  #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
> > >
> > >  /* avx512fp16vlintrin.h */
> > > -#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> > > -#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
> > > +#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
> > >  #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
> > >  #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
> > >  #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
> > > --
> > > 2.18.1
> > >
> >
> >
> > --
> > BR,
> > Hongtao
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 2fbfc140c44..5d66ca5c820 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -229,15 +229,15 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
 {
-  return __builtin_ia32_vaddph_v32hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_addph512_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
 {
-  return __builtin_ia32_vaddph_v32hf_mask (__B, __C,
-					   _mm512_setzero_ph (), __A);
+  return __builtin_ia32_addph512_mask (__B, __C,
+				       _mm512_setzero_ph (), __A);
 }
 
 extern __inline __m512h
@@ -251,15 +251,15 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
 {
-  return __builtin_ia32_vsubph_v32hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_subph512_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
 {
-  return __builtin_ia32_vsubph_v32hf_mask (__B, __C,
-					   _mm512_setzero_ph (), __A);
+  return __builtin_ia32_subph512_mask (__B, __C,
+				       _mm512_setzero_ph (), __A);
 }
 
 extern __inline __m512h
@@ -273,15 +273,15 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
 {
-  return __builtin_ia32_vmulph_v32hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_mulph512_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
 {
-  return __builtin_ia32_vmulph_v32hf_mask (__B, __C,
-					   _mm512_setzero_ph (), __A);
+  return __builtin_ia32_mulph512_mask (__B, __C,
+				       _mm512_setzero_ph (), __A);
 }
 
 extern __inline __m512h
@@ -295,15 +295,15 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
 {
-  return __builtin_ia32_vdivph_v32hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_divph512_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
 {
-  return __builtin_ia32_vdivph_v32hf_mask (__B, __C,
-					   _mm512_setzero_ph (), __A);
+  return __builtin_ia32_divph512_mask (__B, __C,
+				       _mm512_setzero_ph (), __A);
 }
 
 #ifdef __OPTIMIZE__
@@ -311,9 +311,9 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vaddph_v32hf_mask_round (__A, __B,
-						 _mm512_setzero_ph (),
-						 (__mmask32) -1, __C);
+  return __builtin_ia32_addph512_mask_round (__A, __B,
+					     _mm512_setzero_ph (),
+					     (__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -321,7 +321,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			  __m512h __D, const int __E)
 {
-  return __builtin_ia32_vaddph_v32hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_addph512_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m512h
@@ -329,18 +329,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vaddph_v32hf_mask_round (__B, __C,
-						 _mm512_setzero_ph (),
-						 __A, __D);
+  return __builtin_ia32_addph512_mask_round (__B, __C,
+					     _mm512_setzero_ph (),
+					     __A, __D);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vsubph_v32hf_mask_round (__A, __B,
-						 _mm512_setzero_ph (),
-						 (__mmask32) -1, __C);
+  return __builtin_ia32_subph512_mask_round (__A, __B,
+					     _mm512_setzero_ph (),
+					     (__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -348,7 +348,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			  __m512h __D, const int __E)
 {
-  return __builtin_ia32_vsubph_v32hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_subph512_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m512h
@@ -356,18 +356,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vsubph_v32hf_mask_round (__B, __C,
-						 _mm512_setzero_ph (),
-						 __A, __D);
+  return __builtin_ia32_subph512_mask_round (__B, __C,
+					     _mm512_setzero_ph (),
+					     __A, __D);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vmulph_v32hf_mask_round (__A, __B,
-						 _mm512_setzero_ph (),
-						 (__mmask32) -1, __C);
+  return __builtin_ia32_mulph512_mask_round (__A, __B,
+					     _mm512_setzero_ph (),
+					     (__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -375,7 +375,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			  __m512h __D, const int __E)
 {
-  return __builtin_ia32_vmulph_v32hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_mulph512_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m512h
@@ -383,18 +383,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vmulph_v32hf_mask_round (__B, __C,
-						 _mm512_setzero_ph (),
-						 __A, __D);
+  return __builtin_ia32_mulph512_mask_round (__B, __C,
+					     _mm512_setzero_ph (),
+					     __A, __D);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vdivph_v32hf_mask_round (__A, __B,
-						 _mm512_setzero_ph (),
-						 (__mmask32) -1, __C);
+  return __builtin_ia32_divph512_mask_round (__A, __B,
+					     _mm512_setzero_ph (),
+					     (__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -402,7 +402,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			  __m512h __D, const int __E)
 {
-  return __builtin_ia32_vdivph_v32hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_divph512_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m512h
@@ -410,67 +410,67 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vdivph_v32hf_mask_round (__B, __C,
-						 _mm512_setzero_ph (),
-						 __A, __D);
+  return __builtin_ia32_divph512_mask_round (__B, __C,
+					     _mm512_setzero_ph (),
+					     __A, __D);
 }
 #else
 #define _mm512_add_round_ph(A, B, C)					\
-  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((A), (B),		\
-						   _mm512_setzero_ph (),\
-						   (__mmask32)-1, (C)))
+  ((__m512h)__builtin_ia32_addph512_mask_round((A), (B),		\
+					       _mm512_setzero_ph (),	\
+					       (__mmask32)-1, (C)))
 
-#define _mm512_mask_add_round_ph(A, B, C, D, E)			\
-  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_mask_add_round_ph(A, B, C, D, E)				\
+  ((__m512h)__builtin_ia32_addph512_mask_round((C), (D), (A), (B), (E)))
 
 #define _mm512_maskz_add_round_ph(A, B, C, D)				\
-  ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((B), (C),		\
-						   _mm512_setzero_ph (),\
-						   (A), (D)))
+  ((__m512h)__builtin_ia32_addph512_mask_round((B), (C),		\
+					       _mm512_setzero_ph (),	\
+					       (A), (D)))
 
 #define _mm512_sub_round_ph(A, B, C)					\
-  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((A), (B),		\
-						   _mm512_setzero_ph (),\
-						   (__mmask32)-1, (C)))
+  ((__m512h)__builtin_ia32_subph512_mask_round((A), (B),		\
+					       _mm512_setzero_ph (),	\
+					       (__mmask32)-1, (C)))
 
-#define _mm512_mask_sub_round_ph(A, B, C, D, E)			\
-  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_mask_sub_round_ph(A, B, C, D, E)				\
+  ((__m512h)__builtin_ia32_subph512_mask_round((C), (D), (A), (B), (E)))
 
 #define _mm512_maskz_sub_round_ph(A, B, C, D)				\
-  ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((B), (C),		\
-						   _mm512_setzero_ph (),\
-						   (A), (D)))
+  ((__m512h)__builtin_ia32_subph512_mask_round((B), (C),		\
+					       _mm512_setzero_ph (),	\
+					       (A), (D)))
 
 #define _mm512_mul_round_ph(A, B, C)					\
-  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((A), (B),		\
-						   _mm512_setzero_ph (),\
-						   (__mmask32)-1, (C)))
+  ((__m512h)__builtin_ia32_mulph512_mask_round((A), (B),		\
+					       _mm512_setzero_ph (),	\
+					       (__mmask32)-1, (C)))
 
-#define _mm512_mask_mul_round_ph(A, B, C, D, E)			\
-  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_mask_mul_round_ph(A, B, C, D, E)				\
+  ((__m512h)__builtin_ia32_mulph512_mask_round((C), (D), (A), (B), (E)))
 
 #define _mm512_maskz_mul_round_ph(A, B, C, D)				\
-  ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((B), (C),		\
-						   _mm512_setzero_ph (),\
-						   (A), (D)))
+  ((__m512h)__builtin_ia32_mulph512_mask_round((B), (C),		\
+					       _mm512_setzero_ph (),	\
+					       (A), (D)))
 
 #define _mm512_div_round_ph(A, B, C)					\
-  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((A), (B),		\
-						   _mm512_setzero_ph (),\
-						   (__mmask32)-1, (C)))
+  ((__m512h)__builtin_ia32_divph512_mask_round((A), (B),		\
+					       _mm512_setzero_ph (),	\
+					       (__mmask32)-1, (C)))
 
-#define _mm512_mask_div_round_ph(A, B, C, D, E)			\
-  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((C), (D), (A), (B), (E)))
+#define _mm512_mask_div_round_ph(A, B, C, D, E)				\
+  ((__m512h)__builtin_ia32_divph512_mask_round((C), (D), (A), (B), (E)))
 
 #define _mm512_maskz_div_round_ph(A, B, C, D)				\
-  ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((B), (C),		\
-						   _mm512_setzero_ph (),\
-						   (A), (D)))
+  ((__m512h)__builtin_ia32_divph512_mask_round((B), (C),		\
+					       _mm512_setzero_ph (),	\
+					       (A), (D)))
 #endif  /* __OPTIMIZE__  */
 
 /* Intrinsics of v[add,sub,mul,div]sh.  */
 extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_add_sh (__m128h __A, __m128h __B)
 {
   __A[0] += __B[0];
@@ -481,15 +481,15 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_add_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vaddsh_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_addsh_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_add_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vaddsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_addsh_mask (__B, __C, _mm_setzero_ph (),
+				    __A);
 }
 
 extern __inline __m128h
@@ -504,15 +504,15 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_sub_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vsubsh_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_subsh_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_sub_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vsubsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_subsh_mask (__B, __C, _mm_setzero_ph (),
+				    __A);
 }
 
 extern __inline __m128h
@@ -527,14 +527,14 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_mul_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vmulsh_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_mulsh_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_mul_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vmulsh_v8hf_mask (__B, __C, _mm_setzero_ph (), __A);
+  return __builtin_ia32_mulsh_mask (__B, __C, _mm_setzero_ph (), __A);
 }
 
 extern __inline __m128h
@@ -549,15 +549,15 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_div_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vdivsh_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_divsh_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_div_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vdivsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_divsh_mask (__B, __C, _mm_setzero_ph (),
+				    __A);
 }
 
 #ifdef __OPTIMIZE__
@@ -565,9 +565,9 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_add_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vaddsh_v8hf_mask_round (__A, __B,
-						_mm_setzero_ph (),
-						(__mmask8) -1, __C);
+  return __builtin_ia32_addsh_mask_round (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -575,7 +575,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_add_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 		       __m128h __D, const int __E)
 {
-  return __builtin_ia32_vaddsh_v8hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_addsh_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m128h
@@ -583,18 +583,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_add_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			const int __D)
 {
-  return __builtin_ia32_vaddsh_v8hf_mask_round (__B, __C,
-						_mm_setzero_ph (),
-						__A, __D);
+  return __builtin_ia32_addsh_mask_round (__B, __C,
+					  _mm_setzero_ph (),
+					  __A, __D);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sub_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vsubsh_v8hf_mask_round (__A, __B,
-						_mm_setzero_ph (),
-						(__mmask8) -1, __C);
+  return __builtin_ia32_subsh_mask_round (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -602,7 +602,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_sub_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 		       __m128h __D, const int __E)
 {
-  return __builtin_ia32_vsubsh_v8hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_subsh_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m128h
@@ -610,18 +610,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_sub_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			const int __D)
 {
-  return __builtin_ia32_vsubsh_v8hf_mask_round (__B, __C,
-						_mm_setzero_ph (),
-						__A, __D);
+  return __builtin_ia32_subsh_mask_round (__B, __C,
+					  _mm_setzero_ph (),
+					  __A, __D);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mul_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vmulsh_v8hf_mask_round (__A, __B,
-						_mm_setzero_ph (),
-						(__mmask8) -1, __C);
+  return __builtin_ia32_mulsh_mask_round (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -629,7 +629,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_mul_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 		       __m128h __D, const int __E)
 {
-  return __builtin_ia32_vmulsh_v8hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_mulsh_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m128h
@@ -637,18 +637,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_mul_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			const int __D)
 {
-  return __builtin_ia32_vmulsh_v8hf_mask_round (__B, __C,
-						_mm_setzero_ph (),
-						__A, __D);
+  return __builtin_ia32_mulsh_mask_round (__B, __C,
+					  _mm_setzero_ph (),
+					  __A, __D);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_div_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vdivsh_v8hf_mask_round (__A, __B,
-						_mm_setzero_ph (),
-						(__mmask8) -1, __C);
+  return __builtin_ia32_divsh_mask_round (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -656,7 +656,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_div_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 		       __m128h __D, const int __E)
 {
-  return __builtin_ia32_vdivsh_v8hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_divsh_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m128h
@@ -664,62 +664,62 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_div_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			const int __D)
 {
-  return __builtin_ia32_vdivsh_v8hf_mask_round (__B, __C,
-						_mm_setzero_ph (),
-						__A, __D);
+  return __builtin_ia32_divsh_mask_round (__B, __C,
+					  _mm_setzero_ph (),
+					  __A, __D);
 }
 #else
 #define _mm_add_round_sh(A, B, C)					\
-  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((A), (B),		\
-						   _mm_setzero_ph (),	\
-						   (__mmask8)-1, (C)))
+  ((__m128h)__builtin_ia32_addsh_mask_round ((A), (B),			\
+					     _mm_setzero_ph (),		\
+					     (__mmask8)-1, (C)))
 
 #define _mm_mask_add_round_sh(A, B, C, D, E)				\
-  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+  ((__m128h)__builtin_ia32_addsh_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm_maskz_add_round_sh(A, B, C, D)				\
-  ((__m128h)__builtin_ia32_vaddsh_v8hf_mask_round ((B), (C),		\
-						   _mm_setzero_ph (),	\
-						   (A), (D)))
+#define _mm_maskz_add_round_sh(A, B, C, D)			\
+  ((__m128h)__builtin_ia32_addsh_mask_round ((B), (C),		\
+					     _mm_setzero_ph (),	\
+					     (A), (D)))
 
 #define _mm_sub_round_sh(A, B, C)					\
-  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((A), (B),		\
-						   _mm_setzero_ph (),	\
-						   (__mmask8)-1, (C)))
+  ((__m128h)__builtin_ia32_subsh_mask_round ((A), (B),			\
+					     _mm_setzero_ph (),		\
+					     (__mmask8)-1, (C)))
 
 #define _mm_mask_sub_round_sh(A, B, C, D, E)				\
-  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+  ((__m128h)__builtin_ia32_subsh_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm_maskz_sub_round_sh(A, B, C, D)				\
-  ((__m128h)__builtin_ia32_vsubsh_v8hf_mask_round ((B), (C),		\
-						   _mm_setzero_ph (),	\
-						   (A), (D)))
+#define _mm_maskz_sub_round_sh(A, B, C, D)			\
+  ((__m128h)__builtin_ia32_subsh_mask_round ((B), (C),		\
+					     _mm_setzero_ph (),	\
+					     (A), (D)))
 
 #define _mm_mul_round_sh(A, B, C)					\
-  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((A), (B),		\
-						   _mm_setzero_ph (),	\
-						   (__mmask8)-1, (C)))
+  ((__m128h)__builtin_ia32_mulsh_mask_round ((A), (B),			\
+					     _mm_setzero_ph (),		\
+					     (__mmask8)-1, (C)))
 
 #define _mm_mask_mul_round_sh(A, B, C, D, E)				\
-  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+  ((__m128h)__builtin_ia32_mulsh_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm_maskz_mul_round_sh(A, B, C, D)				\
-  ((__m128h)__builtin_ia32_vmulsh_v8hf_mask_round ((B), (C),		\
-						   _mm_setzero_ph (),	\
-						   (A), (D)))
+#define _mm_maskz_mul_round_sh(A, B, C, D)			\
+  ((__m128h)__builtin_ia32_mulsh_mask_round ((B), (C),		\
+					     _mm_setzero_ph (),	\
+					     (A), (D)))
 
 #define _mm_div_round_sh(A, B, C)					\
-  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((A), (B),		\
-						   _mm_setzero_ph (),	\
-						   (__mmask8)-1, (C)))
+  ((__m128h)__builtin_ia32_divsh_mask_round ((A), (B),			\
+					     _mm_setzero_ph (),		\
+					     (__mmask8)-1, (C)))
 
 #define _mm_mask_div_round_sh(A, B, C, D, E)				\
-  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+  ((__m128h)__builtin_ia32_divsh_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm_maskz_div_round_sh(A, B, C, D)				\
-  ((__m128h)__builtin_ia32_vdivsh_v8hf_mask_round ((B), (C),		\
-						   _mm_setzero_ph (),	\
-						   (A), (D)))
+#define _mm_maskz_div_round_sh(A, B, C, D)			\
+  ((__m128h)__builtin_ia32_divsh_mask_round ((B), (C),		\
+					     _mm_setzero_ph (),	\
+					     (A), (D)))
 #endif /* __OPTIMIZE__ */
 
 /* Intrinsic vmaxph vminph.  */
@@ -727,48 +727,48 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_max_ph (__m512h __A, __m512h __B)
 {
-  return __builtin_ia32_vmaxph_v32hf_mask (__A, __B,
-					   _mm512_setzero_ph (),
-					   (__mmask32) -1);
+  return __builtin_ia32_maxph512_mask (__A, __B,
+				       _mm512_setzero_ph (),
+				       (__mmask32) -1);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_max_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
 {
-  return __builtin_ia32_vmaxph_v32hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_maxph512_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_max_ph (__mmask32 __A, __m512h __B, __m512h __C)
 {
-  return __builtin_ia32_vmaxph_v32hf_mask (__B, __C,
-					   _mm512_setzero_ph (), __A);
+  return __builtin_ia32_maxph512_mask (__B, __C,
+				       _mm512_setzero_ph (), __A);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_min_ph (__m512h __A, __m512h __B)
 {
-  return __builtin_ia32_vminph_v32hf_mask (__A, __B,
-					   _mm512_setzero_ph (),
-					   (__mmask32) -1);
+  return __builtin_ia32_minph512_mask (__A, __B,
+				       _mm512_setzero_ph (),
+				       (__mmask32) -1);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_min_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
 {
-  return __builtin_ia32_vminph_v32hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_minph512_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_min_ph (__mmask32 __A, __m512h __B, __m512h __C)
 {
-  return __builtin_ia32_vminph_v32hf_mask (__B, __C,
-					   _mm512_setzero_ph (), __A);
+  return __builtin_ia32_minph512_mask (__B, __C,
+				       _mm512_setzero_ph (), __A);
 }
 
 #ifdef __OPTIMIZE__
@@ -776,9 +776,9 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_max_round_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vmaxph_v32hf_mask_round (__A, __B,
-						 _mm512_setzero_ph (),
-						 (__mmask32) -1, __C);
+  return __builtin_ia32_maxph512_mask_round (__A, __B,
+					     _mm512_setzero_ph (),
+					     (__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -786,7 +786,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_max_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			  __m512h __D, const int __E)
 {
-  return __builtin_ia32_vmaxph_v32hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_maxph512_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m512h
@@ -794,18 +794,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_max_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vmaxph_v32hf_mask_round (__B, __C,
-						 _mm512_setzero_ph (),
-						 __A, __D);
+  return __builtin_ia32_maxph512_mask_round (__B, __C,
+					     _mm512_setzero_ph (),
+					     __A, __D);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_min_round_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vminph_v32hf_mask_round (__A, __B,
-						 _mm512_setzero_ph (),
-						 (__mmask32) -1, __C);
+  return __builtin_ia32_minph512_mask_round (__A, __B,
+					     _mm512_setzero_ph (),
+					     (__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -813,7 +813,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_min_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			  __m512h __D, const int __E)
 {
-  return __builtin_ia32_vminph_v32hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_minph512_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m512h
@@ -821,37 +821,37 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_min_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vminph_v32hf_mask_round (__B, __C,
-						 _mm512_setzero_ph (),
-						 __A, __D);
+  return __builtin_ia32_minph512_mask_round (__B, __C,
+					     _mm512_setzero_ph (),
+					     __A, __D);
 }
 
 #else
-#define _mm512_max_round_ph(A, B, C)					\
-  (__builtin_ia32_vmaxph_v32hf_mask_round ((A), (B),			\
-					   _mm512_setzero_ph (),	\
-					   (__mmask32)-1, (C)))
+#define _mm512_max_round_ph(A, B, C)				\
+  (__builtin_ia32_maxph512_mask_round ((A), (B),		\
+				       _mm512_setzero_ph (),	\
+				       (__mmask32)-1, (C)))
 
 #define _mm512_mask_max_round_ph(A, B, C, D, E)				\
-  (__builtin_ia32_vmaxph_v32hf_mask_round ((C), (D), (A), (B), (E)))
+  (__builtin_ia32_maxph512_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm512_maskz_max_round_ph(A, B, C, D)				\
-  (__builtin_ia32_vmaxph_v32hf_mask_round ((B), (C),			\
-					   _mm512_setzero_ph (),	\
-					   (A), (D)))
+#define _mm512_maskz_max_round_ph(A, B, C, D)			\
+  (__builtin_ia32_maxph512_mask_round ((B), (C),		\
+				       _mm512_setzero_ph (),	\
+				       (A), (D)))
 
-#define _mm512_min_round_ph(A, B, C)					\
-  (__builtin_ia32_vminph_v32hf_mask_round ((A), (B),			\
-					   _mm512_setzero_ph (),	\
-					   (__mmask32)-1, (C)))
+#define _mm512_min_round_ph(A, B, C)				\
+  (__builtin_ia32_minph512_mask_round ((A), (B),		\
+				       _mm512_setzero_ph (),	\
+				       (__mmask32)-1, (C)))
 
 #define _mm512_mask_min_round_ph(A, B, C, D, E)				\
-  (__builtin_ia32_vminph_v32hf_mask_round ((C), (D), (A), (B), (E)))
+  (__builtin_ia32_minph512_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm512_maskz_min_round_ph(A, B, C, D)				\
-  (__builtin_ia32_vminph_v32hf_mask_round ((B), (C),			\
-					   _mm512_setzero_ph (),	\
-					   (A), (D)))
+#define _mm512_maskz_min_round_ph(A, B, C, D)			\
+  (__builtin_ia32_minph512_mask_round ((B), (C),		\
+				       _mm512_setzero_ph (),	\
+				       (A), (D)))
 #endif /* __OPTIMIZE__ */
 
 /* Intrinsic vmaxsh vminsh.  */
@@ -867,15 +867,15 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_max_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vmaxsh_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_maxsh_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_max_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vmaxsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_maxsh_mask (__B, __C, _mm_setzero_ph (),
+				    __A);
 }
 
 extern __inline __m128h
@@ -890,15 +890,15 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_min_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vminsh_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_minsh_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_min_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vminsh_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_minsh_mask (__B, __C, _mm_setzero_ph (),
+				    __A);
 }
 
 #ifdef __OPTIMIZE__
@@ -906,9 +906,9 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vmaxsh_v8hf_mask_round (__A, __B,
-						_mm_setzero_ph (),
-						(__mmask8) -1, __C);
+  return __builtin_ia32_maxsh_mask_round (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -916,7 +916,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_max_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 		       __m128h __D, const int __E)
 {
-  return __builtin_ia32_vmaxsh_v8hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_maxsh_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m128h
@@ -924,18 +924,18 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_max_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			const int __D)
 {
-  return __builtin_ia32_vmaxsh_v8hf_mask_round (__B, __C,
-						_mm_setzero_ph (),
-						__A, __D);
+  return __builtin_ia32_maxsh_mask_round (__B, __C,
+					  _mm_setzero_ph (),
+					  __A, __D);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vminsh_v8hf_mask_round (__A, __B,
-						_mm_setzero_ph (),
-						(__mmask8) -1, __C);
+  return __builtin_ia32_minsh_mask_round (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -943,7 +943,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_min_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 		       __m128h __D, const int __E)
 {
-  return __builtin_ia32_vminsh_v8hf_mask_round (__C, __D, __A, __B, __E);
+  return __builtin_ia32_minsh_mask_round (__C, __D, __A, __B, __E);
 }
 
 extern __inline __m128h
@@ -951,37 +951,37 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_min_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			const int __D)
 {
-  return __builtin_ia32_vminsh_v8hf_mask_round (__B, __C,
-						_mm_setzero_ph (),
-						__A, __D);
+  return __builtin_ia32_minsh_mask_round (__B, __C,
+					  _mm_setzero_ph (),
+					  __A, __D);
 }
 
 #else
-#define _mm_max_round_sh(A, B, C)					\
-  (__builtin_ia32_vmaxsh_v8hf_mask_round ((A), (B),			\
-					  _mm_setzero_ph (),		\
-					  (__mmask8)-1, (C)))
+#define _mm_max_round_sh(A, B, C)			\
+  (__builtin_ia32_maxsh_mask_round ((A), (B),		\
+				    _mm_setzero_ph (),	\
+				    (__mmask8)-1, (C)))
 
-#define _mm_mask_max_round_sh(A, B, C, D, E)				\
-  (__builtin_ia32_vmaxsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_mask_max_round_sh(A, B, C, D, E)			\
+  (__builtin_ia32_maxsh_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm_maskz_max_round_sh(A, B, C, D)				\
-  (__builtin_ia32_vmaxsh_v8hf_mask_round ((B), (C),			\
-					  _mm_setzero_ph (),		\
-					  (A), (D)))
+#define _mm_maskz_max_round_sh(A, B, C, D)		\
+  (__builtin_ia32_maxsh_mask_round ((B), (C),		\
+				    _mm_setzero_ph (),	\
+				    (A), (D)))
 
-#define _mm_min_round_sh(A, B, C)					\
-  (__builtin_ia32_vminsh_v8hf_mask_round ((A), (B),			\
-					  _mm_setzero_ph (),		\
-					  (__mmask8)-1, (C)))
+#define _mm_min_round_sh(A, B, C)			\
+  (__builtin_ia32_minsh_mask_round ((A), (B),		\
+				    _mm_setzero_ph (),	\
+				    (__mmask8)-1, (C)))
 
-#define _mm_mask_min_round_sh(A, B, C, D, E)				\
-  (__builtin_ia32_vminsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_mask_min_round_sh(A, B, C, D, E)			\
+  (__builtin_ia32_minsh_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm_maskz_min_round_sh(A, B, C, D)				\
-  (__builtin_ia32_vminsh_v8hf_mask_round ((B), (C),			\
-					  _mm_setzero_ph (),		\
-					  (A), (D)))
+#define _mm_maskz_min_round_sh(A, B, C, D)		\
+  (__builtin_ia32_minsh_mask_round ((B), (C),		\
+				    _mm_setzero_ph (),	\
+				    (A), (D)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -991,8 +991,8 @@  extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cmp_ph_mask (__m512h __A, __m512h __B, const int __C)
 {
-  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__A, __B, __C,
-						       (__mmask32) -1);
+  return (__mmask32) __builtin_ia32_cmpph512_mask (__A, __B, __C,
+						   (__mmask32) -1);
 }
 
 extern __inline __mmask32
@@ -1000,8 +1000,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_cmp_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
 			 const int __D)
 {
-  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask (__B, __C, __D,
-						       __A);
+  return (__mmask32) __builtin_ia32_cmpph512_mask (__B, __C, __D,
+						   __A);
 }
 
 extern __inline __mmask32
@@ -1009,9 +1009,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cmp_round_ph_mask (__m512h __A, __m512h __B, const int __C,
 			  const int __D)
 {
-  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__A, __B,
-							     __C, (__mmask32) -1,
-							     __D);
+  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__A, __B,
+							 __C, (__mmask32) -1,
+							 __D);
 }
 
 extern __inline __mmask32
@@ -1019,23 +1019,23 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_cmp_round_ph_mask (__mmask32 __A, __m512h __B, __m512h __C,
 			       const int __D, const int __E)
 {
-  return (__mmask32) __builtin_ia32_vcmpph_v32hf_mask_round (__B, __C,
-							     __D, __A,
-							     __E);
+  return (__mmask32) __builtin_ia32_cmpph512_mask_round (__B, __C,
+							 __D, __A,
+							 __E);
 }
 
 #else
 #define _mm512_cmp_ph_mask(A, B, C)			\
-  (__builtin_ia32_vcmpph_v32hf_mask ((A), (B), (C), (-1)))
+  (__builtin_ia32_cmpph512_mask ((A), (B), (C), (-1)))
 
 #define _mm512_mask_cmp_ph_mask(A, B, C, D)		\
-  (__builtin_ia32_vcmpph_v32hf_mask ((B), (C), (D), (A)))
+  (__builtin_ia32_cmpph512_mask ((B), (C), (D), (A)))
 
-#define _mm512_cmp_round_ph_mask(A, B, C, D)		\
-  (__builtin_ia32_vcmpph_v32hf_mask_round ((A), (B), (C), (-1), (D)))
+#define _mm512_cmp_round_ph_mask(A, B, C, D)				\
+  (__builtin_ia32_cmpph512_mask_round ((A), (B), (C), (-1), (D)))
 
-#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)	\
-  (__builtin_ia32_vcmpph_v32hf_mask_round ((B), (C), (D), (A), (E)))
+#define _mm512_mask_cmp_round_ph_mask(A, B, C, D, E)			\
+  (__builtin_ia32_cmpph512_mask_round ((B), (C), (D), (A), (E)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -1046,9 +1046,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmp_sh_mask (__m128h __A, __m128h __B, const int __C)
 {
   return (__mmask8)
-    __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B,
-					   __C, (__mmask8) -1,
-					   _MM_FROUND_CUR_DIRECTION);
+    __builtin_ia32_cmpsh_mask_round (__A, __B,
+				     __C, (__mmask8) -1,
+				     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __mmask8
@@ -1057,9 +1057,9 @@  _mm_mask_cmp_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
 		      const int __D)
 {
   return (__mmask8)
-    __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C,
-					   __D, __A,
-					   _MM_FROUND_CUR_DIRECTION);
+    __builtin_ia32_cmpsh_mask_round (__B, __C,
+				     __D, __A,
+				     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __mmask8
@@ -1067,9 +1067,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmp_round_sh_mask (__m128h __A, __m128h __B, const int __C,
 		       const int __D)
 {
-  return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B,
-							   __C, (__mmask8) -1,
-							   __D);
+  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__A, __B,
+						     __C, (__mmask8) -1,
+						     __D);
 }
 
 extern __inline __mmask8
@@ -1077,25 +1077,25 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_cmp_round_sh_mask (__mmask8 __A, __m128h __B, __m128h __C,
 			    const int __D, const int __E)
 {
-  return (__mmask8) __builtin_ia32_vcmpsh_v8hf_mask_round (__B, __C,
-							   __D, __A,
-							   __E);
+  return (__mmask8) __builtin_ia32_cmpsh_mask_round (__B, __C,
+						     __D, __A,
+						     __E);
 }
 
 #else
-#define _mm_cmp_sh_mask(A, B, C)		\
-  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), \
-					  (_MM_FROUND_CUR_DIRECTION)))
+#define _mm_cmp_sh_mask(A, B, C)					\
+  (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1),		\
+				    (_MM_FROUND_CUR_DIRECTION)))
 
-#define _mm_mask_cmp_sh_mask(A, B, C, D)	\
-  (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A),		\
-					  (_MM_FROUND_CUR_DIRECTION)))
+#define _mm_mask_cmp_sh_mask(A, B, C, D)				\
+  (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A),			\
+				    (_MM_FROUND_CUR_DIRECTION)))
 
-#define _mm_cmp_round_sh_mask(A, B, C, D)				\
-  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (C), (-1), (D)))
+#define _mm_cmp_round_sh_mask(A, B, C, D)			\
+  (__builtin_ia32_cmpsh_mask_round ((A), (B), (C), (-1), (D)))
 
-#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)	\
-  (__builtin_ia32_vcmpsh_v8hf_mask_round ((B), (C), (D), (A), (E)))
+#define _mm_mask_cmp_round_sh_mask(A, B, C, D, E)		\
+  (__builtin_ia32_cmpsh_mask_round ((B), (C), (D), (A), (E)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -1104,134 +1104,134 @@  extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_comieq_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OS,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OS,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_comilt_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OS,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OS,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_comile_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OS,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OS,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_comigt_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OS,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OS,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_comige_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OS,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OS,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_comineq_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_US,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_US,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ucomieq_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_EQ_OQ,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_EQ_OQ,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ucomilt_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LT_OQ,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LT_OQ,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ucomile_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_LE_OQ,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_LE_OQ,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ucomigt_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GT_OQ,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GT_OQ,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ucomige_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_GE_OQ,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_GE_OQ,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ucomineq_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, _CMP_NEQ_UQ,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, _CMP_NEQ_UQ,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 #ifdef __OPTIMIZE__
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-  _mm_comi_sh (__m128h __A, __m128h __B, const int __P)
+_mm_comi_sh (__m128h __A, __m128h __B, const int __P)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P,
-						(__mmask8) -1,
-						_MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
+					  (__mmask8) -1,
+					  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_comi_round_sh (__m128h __A, __m128h __B, const int __P, const int __R)
 {
-  return __builtin_ia32_vcmpsh_v8hf_mask_round (__A, __B, __P,
-						(__mmask8) -1,__R);
+  return __builtin_ia32_cmpsh_mask_round (__A, __B, __P,
+					  (__mmask8) -1,__R);
 }
 
 #else
-#define _mm_comi_round_sh(A, B, P, R)		\
-  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
-#define _mm_comi_sh(A, B, P)		\
-  (__builtin_ia32_vcmpsh_v8hf_mask_round ((A), (B), (P), (__mmask8) (-1), \
-					  _MM_FROUND_CUR_DIRECTION))
+#define _mm_comi_round_sh(A, B, P, R)					\
+  (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1), (R)))
+#define _mm_comi_sh(A, B, P)						\
+  (__builtin_ia32_cmpsh_mask_round ((A), (B), (P), (__mmask8) (-1),	\
+				    _MM_FROUND_CUR_DIRECTION))
 
 #endif /* __OPTIMIZE__  */
 
@@ -1240,28 +1240,28 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_sqrt_ph (__m512h __A)
 {
-  return __builtin_ia32_vsqrtph_v32hf_mask_round (__A,
-						  _mm512_setzero_ph(),
-						  (__mmask32) -1,
-						  _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtph512_mask_round (__A,
+					      _mm512_setzero_ph(),
+					      (__mmask32) -1,
+					      _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_sqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
 {
-  return __builtin_ia32_vsqrtph_v32hf_mask_round (__C, __A, __B,
-						  _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B,
+					      _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_sqrt_ph (__mmask32 __A, __m512h __B)
 {
-  return __builtin_ia32_vsqrtph_v32hf_mask_round (__B,
-						  _mm512_setzero_ph (),
-						  __A,
-						  _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtph512_mask_round (__B,
+					      _mm512_setzero_ph (),
+					      __A,
+					      _MM_FROUND_CUR_DIRECTION);
 }
 
 #ifdef __OPTIMIZE__
@@ -1269,9 +1269,9 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_sqrt_round_ph (__m512h __A, const int __B)
 {
-  return __builtin_ia32_vsqrtph_v32hf_mask_round (__A,
-						  _mm512_setzero_ph(),
-						  (__mmask32) -1, __B);
+  return __builtin_ia32_sqrtph512_mask_round (__A,
+					      _mm512_setzero_ph(),
+					      (__mmask32) -1, __B);
 }
 
 extern __inline __m512h
@@ -1279,31 +1279,31 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_sqrt_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vsqrtph_v32hf_mask_round (__C, __A, __B, __D);
+  return __builtin_ia32_sqrtph512_mask_round (__C, __A, __B, __D);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_sqrt_round_ph (__mmask32 __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vsqrtph_v32hf_mask_round (__B,
-						  _mm512_setzero_ph (),
-						  __A, __C);
+  return __builtin_ia32_sqrtph512_mask_round (__B,
+					      _mm512_setzero_ph (),
+					      __A, __C);
 }
 
 #else
-#define _mm512_sqrt_round_ph(A, B)					\
-  (__builtin_ia32_vsqrtph_v32hf_mask_round ((A),			\
-					    _mm512_setzero_ph (),	\
-					    (__mmask32)-1, (B)))
+#define _mm512_sqrt_round_ph(A, B)				\
+  (__builtin_ia32_sqrtph512_mask_round ((A),			\
+					_mm512_setzero_ph (),	\
+					(__mmask32)-1, (B)))
 
-#define _mm512_mask_sqrt_round_ph(A, B, C, D)				\
-  (__builtin_ia32_vsqrtph_v32hf_mask_round ((C), (A), (B), (D)))
+#define _mm512_mask_sqrt_round_ph(A, B, C, D)			\
+  (__builtin_ia32_sqrtph512_mask_round ((C), (A), (B), (D)))
 
-#define _mm512_maskz_sqrt_round_ph(A, B, C)				\
-  (__builtin_ia32_vsqrtph_v32hf_mask_round ((B),			\
-					    _mm512_setzero_ph (),	\
-					    (A), (C)))
+#define _mm512_maskz_sqrt_round_ph(A, B, C)			\
+  (__builtin_ia32_sqrtph512_mask_round ((B),			\
+					_mm512_setzero_ph (),	\
+					(A), (C)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -1312,23 +1312,23 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_rsqrt_ph (__m512h __A)
 {
-  return __builtin_ia32_vrsqrtph_v32hf_mask (__A, _mm512_setzero_ph (),
-					     (__mmask32) -1);
+  return __builtin_ia32_rsqrtph512_mask (__A, _mm512_setzero_ph (),
+					 (__mmask32) -1);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_rsqrt_ph (__m512h __A, __mmask32 __B, __m512h __C)
 {
-  return __builtin_ia32_vrsqrtph_v32hf_mask (__C, __A, __B);
+  return __builtin_ia32_rsqrtph512_mask (__C, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_rsqrt_ph (__mmask32 __A, __m512h __B)
 {
-  return __builtin_ia32_vrsqrtph_v32hf_mask (__B, _mm512_setzero_ph (),
-					     __A);
+  return __builtin_ia32_rsqrtph512_mask (__B, _mm512_setzero_ph (),
+					 __A);
 }
 
 /* Intrinsics vrsqrtsh.  */
@@ -1336,23 +1336,23 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vrsqrtsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
-					    (__mmask8) -1);
+  return __builtin_ia32_rsqrtsh_mask (__B, __A, _mm_setzero_ph (),
+				      (__mmask8) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_rsqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vrsqrtsh_v8hf_mask (__D, __C, __A, __B);
+  return __builtin_ia32_rsqrtsh_mask (__D, __C, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_rsqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vrsqrtsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
-					    __A);
+  return __builtin_ia32_rsqrtsh_mask (__C, __B, _mm_setzero_ph (),
+				      __A);
 }
 
 /* Intrinsics vsqrtsh.  */
@@ -1360,27 +1360,27 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sqrt_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__B, __A,
-						 _mm_setzero_ph (),
-						 (__mmask8) -1,
-						 _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
+					   _mm_setzero_ph (),
+					   (__mmask8) -1,
+					   _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_sqrt_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__D, __C, __A, __B,
-						 _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
+					   _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_sqrt_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__C, __B,
-						 _mm_setzero_ph (),
-						 __A, _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
+					   _mm_setzero_ph (),
+					   __A, _MM_FROUND_CUR_DIRECTION);
 }
 
 #ifdef __OPTIMIZE__
@@ -1388,9 +1388,9 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sqrt_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__B, __A,
-						 _mm_setzero_ph (),
-						 (__mmask8) -1, __C);
+  return __builtin_ia32_sqrtsh_mask_round (__B, __A,
+					   _mm_setzero_ph (),
+					   (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -1398,8 +1398,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_sqrt_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 			__m128h __D, const int __E)
 {
-  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__D, __C, __A, __B,
-						 __E);
+  return __builtin_ia32_sqrtsh_mask_round (__D, __C, __A, __B,
+					   __E);
 }
 
 extern __inline __m128h
@@ -1407,24 +1407,24 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			 const int __D)
 {
-  return __builtin_ia32_vsqrtsh_v8hf_mask_round (__C, __B,
-						 _mm_setzero_ph (),
-						 __A, __D);
+  return __builtin_ia32_sqrtsh_mask_round (__C, __B,
+					   _mm_setzero_ph (),
+					   __A, __D);
 }
 
 #else
 #define _mm_sqrt_round_sh(A, B, C)				\
-  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((B), (A),		\
-					   _mm_setzero_ph (),	\
-					   (__mmask8)-1, (C)))
+  (__builtin_ia32_sqrtsh_mask_round ((B), (A),			\
+				     _mm_setzero_ph (),		\
+				     (__mmask8)-1, (C)))
 
 #define _mm_mask_sqrt_round_sh(A, B, C, D, E)			\
-  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((D), (C), (A), (B), (E)))
+  (__builtin_ia32_sqrtsh_mask_round ((D), (C), (A), (B), (E)))
 
-#define _mm_maskz_sqrt_round_sh(A, B, C, D)			\
-  (__builtin_ia32_vsqrtsh_v8hf_mask_round ((C), (B),		\
-					   _mm_setzero_ph (),	\
-					   (A), (D)))
+#define _mm_maskz_sqrt_round_sh(A, B, C, D)		\
+  (__builtin_ia32_sqrtsh_mask_round ((C), (B),		\
+				     _mm_setzero_ph (),	\
+				     (A), (D)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -1433,23 +1433,23 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_rcp_ph (__m512h __A)
 {
-  return __builtin_ia32_vrcpph_v32hf_mask (__A, _mm512_setzero_ph (),
-					   (__mmask32) -1);
+  return __builtin_ia32_rcpph512_mask (__A, _mm512_setzero_ph (),
+				       (__mmask32) -1);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
 {
-  return __builtin_ia32_vrcpph_v32hf_mask (__C, __A, __B);
+  return __builtin_ia32_rcpph512_mask (__C, __A, __B);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
 {
-  return __builtin_ia32_vrcpph_v32hf_mask (__B, _mm512_setzero_ph (),
-					   __A);
+  return __builtin_ia32_rcpph512_mask (__B, _mm512_setzero_ph (),
+				       __A);
 }
 
 /* Intrinsics vrcpsh.  */
@@ -1457,23 +1457,23 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vrcpsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
-					  (__mmask8) -1);
+  return __builtin_ia32_rcpsh_mask (__B, __A, _mm_setzero_ph (),
+				    (__mmask8) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vrcpsh_v8hf_mask (__D, __C, __A, __B);
+  return __builtin_ia32_rcpsh_mask (__D, __C, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vrcpsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_rcpsh_mask (__C, __B, _mm_setzero_ph (),
+				    __A);
 }
 
 /* Intrinsics vscalefph.  */
@@ -1481,28 +1481,28 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_scalef_ph (__m512h __A, __m512h __B)
 {
-  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
-						    _mm512_setzero_ph (),
-						    (__mmask32) -1,
-						    _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_scalefph512_mask_round (__A, __B,
+						_mm512_setzero_ph (),
+						(__mmask32) -1,
+						_MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
 {
-  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
-						    _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
+						_MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
 {
-  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
-						    _mm512_setzero_ph (),
-						    __A,
-						    _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_scalefph512_mask_round (__B, __C,
+						_mm512_setzero_ph (),
+						__A,
+						_MM_FROUND_CUR_DIRECTION);
 }
 
 #ifdef __OPTIMIZE__
@@ -1510,9 +1510,9 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
-						    _mm512_setzero_ph (),
-						    (__mmask32) -1, __C);
+  return __builtin_ia32_scalefph512_mask_round (__A, __B,
+						_mm512_setzero_ph (),
+						(__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -1520,8 +1520,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			     __m512h __D, const int __E)
 {
-  return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
-						    __E);
+  return __builtin_ia32_scalefph512_mask_round (__C, __D, __A, __B,
+						__E);
 }
 
 extern __inline __m512h
@@ -1529,24 +1529,24 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
 			      const int __D)
 {
-  return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
-						    _mm512_setzero_ph (),
-						    __A, __D);
+  return __builtin_ia32_scalefph512_mask_round (__B, __C,
+						_mm512_setzero_ph (),
+						__A, __D);
 }
 
 #else
-#define _mm512_scalef_round_ph(A, B, C)					\
-  (__builtin_ia32_vscalefph_v32hf_mask_round ((A), (B),			\
-					      _mm512_setzero_ph (),	\
-					      (__mmask32)-1, (C)))
+#define _mm512_scalef_round_ph(A, B, C)				\
+  (__builtin_ia32_scalefph512_mask_round ((A), (B),		\
+					  _mm512_setzero_ph (),	\
+					  (__mmask32)-1, (C)))
 
 #define _mm512_mask_scalef_round_ph(A, B, C, D, E)			\
-  (__builtin_ia32_vscalefph_v32hf_mask_round ((C), (D), (A), (B), (E)))
+  (__builtin_ia32_scalefph512_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm512_maskz_scalef_round_ph(A, B, C, D)			\
-  (__builtin_ia32_vscalefph_v32hf_mask_round ((B), (C),			\
-					      _mm512_setzero_ph (),	\
-					      (A), (D)))
+#define _mm512_maskz_scalef_round_ph(A, B, C, D)		\
+  (__builtin_ia32_scalefph512_mask_round ((B), (C),		\
+					  _mm512_setzero_ph (),	\
+					  (A), (D)))
 
 #endif  /* __OPTIMIZE__ */
 
@@ -1555,28 +1555,28 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_scalef_sh (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
-						   _mm_setzero_ph (),
-						   (__mmask8) -1,
-						   _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_scalefsh_mask_round (__A, __B,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1,
+					     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
-						   _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
+					     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
-						   _mm_setzero_ph (),
-						   __A,
-						   _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_scalefsh_mask_round (__B, __C,
+					     _mm_setzero_ph (),
+					     __A,
+					     _MM_FROUND_CUR_DIRECTION);
 }
 
 #ifdef __OPTIMIZE__
@@ -1584,9 +1584,9 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
 {
-  return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
-						   _mm_setzero_ph (),
-						   (__mmask8) -1, __C);
+  return __builtin_ia32_scalefsh_mask_round (__A, __B,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1, __C);
 }
 
 extern __inline __m128h
@@ -1594,8 +1594,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 			  __m128h __D, const int __E)
 {
-  return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
-						   __E);
+  return __builtin_ia32_scalefsh_mask_round (__C, __D, __A, __B,
+					     __E);
 }
 
 extern __inline __m128h
@@ -1603,23 +1603,23 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			   const int __D)
 {
-  return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
-						   _mm_setzero_ph (),
-						   __A, __D);
+  return __builtin_ia32_scalefsh_mask_round (__B, __C,
+					     _mm_setzero_ph (),
+					     __A, __D);
 }
 
 #else
-#define _mm_scalef_round_sh(A, B, C)					  \
-  (__builtin_ia32_vscalefsh_v8hf_mask_round ((A), (B),			  \
-					     _mm_setzero_ph (),		  \
-					     (__mmask8)-1, (C)))
+#define _mm_scalef_round_sh(A, B, C)				\
+  (__builtin_ia32_scalefsh_mask_round ((A), (B),		\
+				       _mm_setzero_ph (),	\
+				       (__mmask8)-1, (C)))
 
-#define _mm_mask_scalef_round_sh(A, B, C, D, E)				  \
-  (__builtin_ia32_vscalefsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+#define _mm_mask_scalef_round_sh(A, B, C, D, E)				\
+  (__builtin_ia32_scalefsh_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm_maskz_scalef_round_sh(A, B, C, D)				  \
-  (__builtin_ia32_vscalefsh_v8hf_mask_round ((B), (C), _mm_setzero_ph (), \
-					     (A), (D)))
+#define _mm_maskz_scalef_round_sh(A, B, C, D)				\
+  (__builtin_ia32_scalefsh_mask_round ((B), (C), _mm_setzero_ph (),	\
+				       (A), (D)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -1629,37 +1629,37 @@  extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_reduce_ph (__m512h __A, int __B)
 {
-  return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
-						    _mm512_setzero_ph (),
-						    (__mmask32) -1,
-						    _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_reduceph512_mask_round (__A, __B,
+						_mm512_setzero_ph (),
+						(__mmask32) -1,
+						_MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_reduce_ph (__m512h __A, __mmask32 __B, __m512h __C, int __D)
 {
-  return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
-						    _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
+						_MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_reduce_ph (__mmask32 __A, __m512h __B, int __C)
 {
-  return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
-						    _mm512_setzero_ph (),
-						    __A,
-						    _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_reduceph512_mask_round (__B, __C,
+						_mm512_setzero_ph (),
+						__A,
+						_MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_reduce_round_ph (__m512h __A, int __B, const int __C)
 {
-  return __builtin_ia32_vreduceph_v32hf_mask_round (__A, __B,
-						    _mm512_setzero_ph (),
-						    (__mmask32) -1, __C);
+  return __builtin_ia32_reduceph512_mask_round (__A, __B,
+						_mm512_setzero_ph (),
+						(__mmask32) -1, __C);
 }
 
 extern __inline __m512h
@@ -1667,8 +1667,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_reduce_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
 			     int __D, const int __E)
 {
-  return __builtin_ia32_vreduceph_v32hf_mask_round (__C, __D, __A, __B,
-						    __E);
+  return __builtin_ia32_reduceph512_mask_round (__C, __D, __A, __B,
+						__E);
 }
 
 extern __inline __m512h
@@ -1676,39 +1676,39 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_reduce_round_ph (__mmask32 __A, __m512h __B, int __C,
 			      const int __D)
 {
-  return __builtin_ia32_vreduceph_v32hf_mask_round (__B, __C,
-						    _mm512_setzero_ph (),
-						    __A, __D);
+  return __builtin_ia32_reduceph512_mask_round (__B, __C,
+						_mm512_setzero_ph (),
+						__A, __D);
 }
 
 #else
 #define _mm512_reduce_ph(A, B)						\
-  (__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B),			\
-					      _mm512_setzero_ph (),	\
-					      (__mmask32)-1,		\
-					      _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_reduceph512_mask_round ((A), (B),			\
+					  _mm512_setzero_ph (),		\
+					  (__mmask32)-1,		\
+					  _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_reduce_ph(A, B, C, D)				\
-  (__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B),	\
-					      _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B),		\
+					  _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_maskz_reduce_ph(A, B, C)					\
-  (__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C),			\
-					      _mm512_setzero_ph (),	\
-					      (A), _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_reduceph512_mask_round ((B), (C),			\
+					  _mm512_setzero_ph (),		\
+					  (A), _MM_FROUND_CUR_DIRECTION))
 
-#define _mm512_reduce_round_ph(A, B, C)					\
-  (__builtin_ia32_vreduceph_v32hf_mask_round ((A), (B),			\
-					      _mm512_setzero_ph (),	\
-					      (__mmask32)-1, (C)))
+#define _mm512_reduce_round_ph(A, B, C)				\
+  (__builtin_ia32_reduceph512_mask_round ((A), (B),		\
+					  _mm512_setzero_ph (),	\
+					  (__mmask32)-1, (C)))
 
 #define _mm512_mask_reduce_round_ph(A, B, C, D, E)			\
-  (__builtin_ia32_vreduceph_v32hf_mask_round ((C), (D), (A), (B), (E)))
+  (__builtin_ia32_reduceph512_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm512_maskz_reduce_round_ph(A, B, C, D)			\
-  (__builtin_ia32_vreduceph_v32hf_mask_round ((B), (C),			\
-					      _mm512_setzero_ph (),	\
-					      (A), (D)))
+#define _mm512_maskz_reduce_round_ph(A, B, C, D)		\
+  (__builtin_ia32_reduceph512_mask_round ((B), (C),		\
+					  _mm512_setzero_ph (),	\
+					  (A), (D)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -1718,10 +1718,10 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_reduce_sh (__m128h __A, __m128h __B, int __C)
 {
-  return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
-						   _mm_setzero_ph (),
-						   (__mmask8) -1,
-						   _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1,
+					     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
@@ -1729,26 +1729,26 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_reduce_sh (__m128h __A, __mmask8 __B, __m128h __C,
 		    __m128h __D, int __E)
 {
-  return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A, __B,
-						   _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A, __B,
+					     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_reduce_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
 {
-  return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
-						   _mm_setzero_ph (), __A,
-						   _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
+					     _mm_setzero_ph (), __A,
+					     _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_reduce_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
 {
-  return __builtin_ia32_vreducesh_v8hf_mask_round (__A, __B, __C,
-						   _mm_setzero_ph (),
-						   (__mmask8) -1, __D);
+  return __builtin_ia32_reducesh_mask_round (__A, __B, __C,
+					     _mm_setzero_ph (),
+					     (__mmask8) -1, __D);
 }
 
 extern __inline __m128h
@@ -1756,8 +1756,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_reduce_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 			  __m128h __D, int __E, const int __F)
 {
-  return __builtin_ia32_vreducesh_v8hf_mask_round (__C, __D, __E, __A,
-						   __B, __F);
+  return __builtin_ia32_reducesh_mask_round (__C, __D, __E, __A,
+					     __B, __F);
 }
 
 extern __inline __m128h
@@ -1765,81 +1765,81 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_reduce_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			   int __D, const int __E)
 {
-  return __builtin_ia32_vreducesh_v8hf_mask_round (__B, __C, __D,
-						   _mm_setzero_ph (),
-						   __A, __E);
+  return __builtin_ia32_reducesh_mask_round (__B, __C, __D,
+					     _mm_setzero_ph (),
+					     __A, __E);
 }
 
 #else
 #define _mm_reduce_sh(A, B, C)						\
-  (__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C),		\
-					     _mm_setzero_ph (),	\
-					     (__mmask8)-1,		\
-					     _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_reducesh_mask_round ((A), (B), (C),			\
+				       _mm_setzero_ph (),		\
+				       (__mmask8)-1,			\
+				       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_mask_reduce_sh(A, B, C, D, E)				\
-  (__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B),	\
-					     _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B),		\
+				       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_maskz_reduce_sh(A, B, C, D)					\
-  (__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D),		\
-					     _mm_setzero_ph (),	\
-					     (A), _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_reducesh_mask_round ((B), (C), (D),			\
+				       _mm_setzero_ph (),		\
+				       (A), _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_reduce_round_sh(A, B, C, D)				\
-  (__builtin_ia32_vreducesh_v8hf_mask_round ((A), (B), (C),	\
-					     _mm_setzero_ph (),	\
-					     (__mmask8)-1, (D)))
+  (__builtin_ia32_reducesh_mask_round ((A), (B), (C),		\
+				       _mm_setzero_ph (),	\
+				       (__mmask8)-1, (D)))
 
 #define _mm_mask_reduce_round_sh(A, B, C, D, E, F)			\
-  (__builtin_ia32_vreducesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
+  (__builtin_ia32_reducesh_mask_round ((C), (D), (E), (A), (B), (F)))
 
 #define _mm_maskz_reduce_round_sh(A, B, C, D, E)		\
-  (__builtin_ia32_vreducesh_v8hf_mask_round ((B), (C), (D),	\
-					     _mm_setzero_ph (),	\
-					     (A), (E)))
+  (__builtin_ia32_reducesh_mask_round ((B), (C), (D),		\
+				       _mm_setzero_ph (),	\
+				       (A), (E)))
 
 #endif /* __OPTIMIZE__ */
 
 /* Intrinsics vrndscaleph.  */
 #ifdef __OPTIMIZE__
 extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_roundscale_ph (__m512h __A, int __B)
 {
-  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
-						      _mm512_setzero_ph (),
-						      (__mmask32) -1,
-						      _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
+						  _mm512_setzero_ph (),
+						  (__mmask32) -1,
+						  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_roundscale_ph (__m512h __A, __mmask32 __B,
-				 __m512h __C, int __D)
+			   __m512h __C, int __D)
 {
-  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A, __B,
-						      _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A, __B,
+						  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_roundscale_ph (__mmask32 __A, __m512h __B, int __C)
 {
-  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
-						      _mm512_setzero_ph (),
-						      __A,
-						      _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
+						  _mm512_setzero_ph (),
+						  __A,
+						  _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_roundscale_round_ph (__m512h __A, int __B, const int __C)
 {
-  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__A, __B,
-						      _mm512_setzero_ph (),
-						      (__mmask32) -1,
-						      __C);
+  return __builtin_ia32_rndscaleph512_mask_round (__A, __B,
+						  _mm512_setzero_ph (),
+						  (__mmask32) -1,
+						  __C);
 }
 
 extern __inline __m512h
@@ -1847,8 +1847,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_roundscale_round_ph (__m512h __A, __mmask32 __B,
 				 __m512h __C, int __D, const int __E)
 {
-  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__C, __D, __A,
-						      __B, __E);
+  return __builtin_ia32_rndscaleph512_mask_round (__C, __D, __A,
+						  __B, __E);
 }
 
 extern __inline __m512h
@@ -1856,52 +1856,52 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_roundscale_round_ph (__mmask32 __A, __m512h __B, int __C,
 				  const int __D)
 {
-  return __builtin_ia32_vrndscaleph_v32hf_mask_round (__B, __C,
-						      _mm512_setzero_ph (),
-						      __A, __D);
+  return __builtin_ia32_rndscaleph512_mask_round (__B, __C,
+						  _mm512_setzero_ph (),
+						  __A, __D);
 }
 
 #else
-#define _mm512_roundscale_ph(A, B) \
-  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B),		\
-						_mm512_setzero_ph (),	\
-						(__mmask32)-1,		\
-						_MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_roundscale_ph(A, B, C, D) \
-  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B),	\
-						_MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_roundscale_ph(A, B, C) \
-  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C),		\
-						_mm512_setzero_ph (),	\
-						(A),			\
-						_MM_FROUND_CUR_DIRECTION))
-#define _mm512_roundscale_round_ph(A, B, C) \
-  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((A), (B),		\
-						_mm512_setzero_ph (),	\
-						(__mmask32)-1, (C)))
+#define _mm512_roundscale_ph(A, B)					\
+  (__builtin_ia32_rndscaleph512_mask_round ((A), (B),			\
+					    _mm512_setzero_ph (),	\
+					    (__mmask32)-1,		\
+					    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_roundscale_ph(A, B, C, D)				\
+  (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B),		\
+					    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_roundscale_ph(A, B, C)				\
+  (__builtin_ia32_rndscaleph512_mask_round ((B), (C),			\
+					    _mm512_setzero_ph (),	\
+					    (A),			\
+					    _MM_FROUND_CUR_DIRECTION))
+#define _mm512_roundscale_round_ph(A, B, C)				\
+  (__builtin_ia32_rndscaleph512_mask_round ((A), (B),			\
+					    _mm512_setzero_ph (),	\
+					    (__mmask32)-1, (C)))
 
 #define _mm512_mask_roundscale_round_ph(A, B, C, D, E)			\
-  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((C), (D), (A), (B), (E)))
+  (__builtin_ia32_rndscaleph512_mask_round ((C), (D), (A), (B), (E)))
 
-#define _mm512_maskz_roundscale_round_ph(A, B, C, D) \
-  (__builtin_ia32_vrndscaleph_v32hf_mask_round ((B), (C),		\
-						_mm512_setzero_ph (),	\
-						(A), (D)))
+#define _mm512_maskz_roundscale_round_ph(A, B, C, D)			\
+  (__builtin_ia32_rndscaleph512_mask_round ((B), (C),			\
+					    _mm512_setzero_ph (),	\
+					    (A), (D)))
 
 #endif /* __OPTIMIZE__ */
 
 /* Intrinsics vrndscalesh.  */
 #ifdef __OPTIMIZE__
 extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_roundscale_sh (__m128h __A, __m128h __B, int __C)
 {
-  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
-						     _mm_setzero_ph (),
-						     (__mmask8) -1,
-						     _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
+					       _mm_setzero_ph (),
+					       (__mmask8) -1,
+					       _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
@@ -1909,27 +1909,27 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_roundscale_sh (__m128h __A, __mmask8 __B, __m128h __C,
 			__m128h __D, int __E)
 {
-  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E, __A, __B,
-						     _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E, __A, __B,
+					       _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_roundscale_sh (__mmask8 __A, __m128h __B, __m128h __C, int __D)
 {
-  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
-						     _mm_setzero_ph (), __A,
-						     _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
+					       _mm_setzero_ph (), __A,
+					       _MM_FROUND_CUR_DIRECTION);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_roundscale_round_sh (__m128h __A, __m128h __B, int __C, const int __D)
 {
-  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__A, __B, __C,
-						     _mm_setzero_ph (),
-						     (__mmask8) -1,
-						     __D);
+  return __builtin_ia32_rndscalesh_mask_round (__A, __B, __C,
+					       _mm_setzero_ph (),
+					       (__mmask8) -1,
+					       __D);
 }
 
 extern __inline __m128h
@@ -1937,8 +1937,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_roundscale_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
 			      __m128h __D, int __E, const int __F)
 {
-  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__C, __D, __E,
-						     __A, __B, __F);
+  return __builtin_ia32_rndscalesh_mask_round (__C, __D, __E,
+					       __A, __B, __F);
 }
 
 extern __inline __m128h
@@ -1946,46 +1946,46 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_roundscale_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
 			       int __D, const int __E)
 {
-  return __builtin_ia32_vrndscalesh_v8hf_mask_round (__B, __C, __D,
-						     _mm_setzero_ph (),
-						     __A, __E);
+  return __builtin_ia32_rndscalesh_mask_round (__B, __C, __D,
+					       _mm_setzero_ph (),
+					       __A, __E);
 }
 
 #else
 #define _mm_roundscale_sh(A, B, C)					\
-  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C),		\
-					       _mm_setzero_ph (),	\
-					       (__mmask8)-1, \
-					       _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),			\
+					 _mm_setzero_ph (),		\
+					 (__mmask8)-1,			\
+					 _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_mask_roundscale_sh(A, B, C, D, E)				\
-  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), \
-					       _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B),	\
+					 _MM_FROUND_CUR_DIRECTION))
 
 #define _mm_maskz_roundscale_sh(A, B, C, D)				\
-  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D),		\
-					       _mm_setzero_ph (),	\
-					       (A), _MM_FROUND_CUR_DIRECTION))
+  (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),			\
+					 _mm_setzero_ph (),		\
+					 (A), _MM_FROUND_CUR_DIRECTION))
 
-#define _mm_roundscale_round_sh(A, B, C, D)				\
-  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((A), (B), (C),		\
-					       _mm_setzero_ph (),	\
-					       (__mmask8)-1, (D)))
+#define _mm_roundscale_round_sh(A, B, C, D)			\
+  (__builtin_ia32_rndscalesh_mask_round ((A), (B), (C),		\
+					 _mm_setzero_ph (),	\
+					 (__mmask8)-1, (D)))
 
 #define _mm_mask_roundscale_round_sh(A, B, C, D, E, F)			\
-  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((C), (D), (E), (A), (B), (F)))
+  (__builtin_ia32_rndscalesh_mask_round ((C), (D), (E), (A), (B), (F)))
 
-#define _mm_maskz_roundscale_round_sh(A, B, C, D, E)			\
-  (__builtin_ia32_vrndscalesh_v8hf_mask_round ((B), (C), (D),		\
-					       _mm_setzero_ph (),	\
-					       (A), (E)))
+#define _mm_maskz_roundscale_round_sh(A, B, C, D, E)		\
+  (__builtin_ia32_rndscalesh_mask_round ((B), (C), (D),		\
+					 _mm_setzero_ph (),	\
+					 (A), (E)))
 
 #endif /* __OPTIMIZE__ */
 
 /* Intrinsics vfpclasssh.  */
 #ifdef __OPTIMIZE__
 extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fpclass_sh_mask (__m128h __A, const int __imm)
 {
   return (__mmask8) __builtin_ia32_fpclasssh_mask ((__v8hf) __A, __imm,
@@ -2031,11 +2031,11 @@  _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
 
 #else
 #define _mm512_mask_fpclass_ph_mask(u, x, c)				\
-  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x),\
+  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
 						 (int) (c),(__mmask8)(u)))
 
 #define _mm512_fpclass_ph_mask(x, c)                                    \
-  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x),\
+  ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
 						 (int) (c),(__mmask8)-1))
 #endif /* __OPIMTIZE__ */
 
@@ -2141,9 +2141,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_getexp_round_ph (__m512h __A, const int __R)
 {
   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
-						   (__v32hf)
-						   _mm512_setzero_ph (),
-						   (__mmask32) -1, __R);
+						    (__v32hf)
+						    _mm512_setzero_ph (),
+						    (__mmask32) -1, __R);
 }
 
 extern __inline __m512h
@@ -2152,8 +2152,8 @@  _mm512_mask_getexp_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
 			     const int __R)
 {
   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
-						   (__v32hf) __W,
-						   (__mmask32) __U, __R);
+						    (__v32hf) __W,
+						    (__mmask32) __U, __R);
 }
 
 extern __inline __m512h
@@ -2161,37 +2161,37 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_getexp_round_ph (__mmask32 __U, __m512h __A, const int __R)
 {
   return (__m512h) __builtin_ia32_getexpph512_mask ((__v32hf) __A,
-						   (__v32hf)
-						   _mm512_setzero_ph (),
-						   (__mmask32) __U, __R);
+						    (__v32hf)
+						    _mm512_setzero_ph (),
+						    (__mmask32) __U, __R);
 }
 
 #else
-#define _mm_getexp_round_sh(A, B, R)						\
-  ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),		\
-					       (__v8hf)(__m128h)(B),		\
-					       (__v8hf)_mm_setzero_ph(),	\
+#define _mm_getexp_round_sh(A, B, R)					\
+  ((__m128h)__builtin_ia32_getexpsh_mask_round((__v8hf)(__m128h)(A),	\
+					       (__v8hf)(__m128h)(B),	\
+					       (__v8hf)_mm_setzero_ph(), \
 					       (__mmask8)-1, R))
 
-#define _mm_mask_getexp_round_sh(W, U, A, B, C)					\
+#define _mm_mask_getexp_round_sh(W, U, A, B, C)			\
   (__m128h)__builtin_ia32_getexpsh_mask_round(A, B, W, U, C)
 
-#define _mm_maskz_getexp_round_sh(U, A, B, C)					\
-  (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,				\
-					      (__v8hf)_mm_setzero_ph(),		\
+#define _mm_maskz_getexp_round_sh(U, A, B, C)				\
+  (__m128h)__builtin_ia32_getexpsh_mask_round(A, B,			\
+					      (__v8hf)_mm_setzero_ph(),	\
 					      U, C)
 
-#define _mm512_getexp_round_ph(A, R)						\
-  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),		\
-  (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
+#define _mm512_getexp_round_ph(A, R)					\
+  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
+					    (__v32hf)_mm512_setzero_ph(), (__mmask32)-1, R))
 
-#define _mm512_mask_getexp_round_ph(W, U, A, R)					\
-  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),		\
-  (__v32hf)(__m512h)(W), (__mmask32)(U), R))
+#define _mm512_mask_getexp_round_ph(W, U, A, R)				\
+  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
+					    (__v32hf)(__m512h)(W), (__mmask32)(U), R))
 
-#define _mm512_maskz_getexp_round_ph(U, A, R)					\
-  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),		\
-  (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
+#define _mm512_maskz_getexp_round_ph(U, A, R)				\
+  ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A),	\
+					    (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), R))
 
 #endif /* __OPTIMIZE__ */
 
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 206d60407fc..e9478792a03 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -53,30 +53,30 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vaddph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
 {
-  return __builtin_ia32_vaddph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vaddph_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
+				       __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
 {
-  return __builtin_ia32_vaddph_v16hf_mask (__B, __C,
-					   _mm256_setzero_ph (), __A);
+  return __builtin_ia32_addph256_mask (__B, __C,
+				       _mm256_setzero_ph (), __A);
 }
 
 extern __inline __m128h
@@ -97,30 +97,30 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vsubph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
 {
-  return __builtin_ia32_vsubph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vsubph_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
+				       __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
 {
-  return __builtin_ia32_vsubph_v16hf_mask (__B, __C,
-					   _mm256_setzero_ph (), __A);
+  return __builtin_ia32_subph256_mask (__B, __C,
+				       _mm256_setzero_ph (), __A);
 }
 
 extern __inline __m128h
@@ -141,30 +141,30 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vmulph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
 {
-  return __builtin_ia32_vmulph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vmulph_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
+				       __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
 {
-  return __builtin_ia32_vmulph_v16hf_mask (__B, __C,
-					   _mm256_setzero_ph (), __A);
+  return __builtin_ia32_mulph256_mask (__B, __C,
+				       _mm256_setzero_ph (), __A);
 }
 
 extern __inline __m128h
@@ -185,30 +185,30 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vdivph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
 {
-  return __builtin_ia32_vdivph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vdivph_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
+				       __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
 {
-  return __builtin_ia32_vdivph_v16hf_mask (__B, __C,
-					   _mm256_setzero_ph (), __A);
+  return __builtin_ia32_divph256_mask (__B, __C,
+				       _mm256_setzero_ph (), __A);
 }
 
 /* Intrinsics v[max,min]ph.  */
@@ -216,96 +216,96 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_ph (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vmaxph_v8hf_mask (__A, __B,
-					  _mm_setzero_ph (),
-					  (__mmask8) -1);
+  return __builtin_ia32_maxph128_mask (__A, __B,
+				       _mm_setzero_ph (),
+				       (__mmask8) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_max_ph (__m256h __A, __m256h __B)
 {
-  return __builtin_ia32_vmaxph_v16hf_mask (__A, __B,
-					  _mm256_setzero_ph (),
-					  (__mmask16) -1);
+  return __builtin_ia32_maxph256_mask (__A, __B,
+				       _mm256_setzero_ph (),
+				       (__mmask16) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vmaxph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
 {
-  return __builtin_ia32_vmaxph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vmaxph_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
+				       __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
 {
-  return __builtin_ia32_vmaxph_v16hf_mask (__B, __C,
-					   _mm256_setzero_ph (), __A);
+  return __builtin_ia32_maxph256_mask (__B, __C,
+				       _mm256_setzero_ph (), __A);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_ph (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vminph_v8hf_mask (__A, __B,
-					  _mm_setzero_ph (),
-					  (__mmask8) -1);
+  return __builtin_ia32_minph128_mask (__A, __B,
+				       _mm_setzero_ph (),
+				       (__mmask8) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_min_ph (__m256h __A, __m256h __B)
 {
-  return __builtin_ia32_vminph_v16hf_mask (__A, __B,
-					  _mm256_setzero_ph (),
-					  (__mmask16) -1);
+  return __builtin_ia32_minph256_mask (__A, __B,
+				       _mm256_setzero_ph (),
+				       (__mmask16) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vminph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
 {
-  return __builtin_ia32_vminph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vminph_v8hf_mask (__B, __C, _mm_setzero_ph (),
-					  __A);
+  return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
+				       __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
 {
-  return __builtin_ia32_vminph_v16hf_mask (__B, __C,
-					   _mm256_setzero_ph (), __A);
+  return __builtin_ia32_minph256_mask (__B, __C,
+				       _mm256_setzero_ph (), __A);
 }
 
 /* vcmpph */
@@ -314,8 +314,8 @@  extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
 {
-  return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__A, __B, __C,
-						     (__mmask8) -1);
+  return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
+						  (__mmask8) -1);
 }
 
 extern __inline __mmask8
@@ -323,15 +323,15 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
 		      const int __D)
 {
-  return (__mmask8) __builtin_ia32_vcmpph_v8hf_mask (__B, __C, __D, __A);
+  return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
 }
 
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
 {
-  return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__A, __B, __C,
-						       (__mmask16) -1);
+  return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
+						   (__mmask16) -1);
 }
 
 extern __inline __mmask16
@@ -339,22 +339,22 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
 		      const int __D)
 {
-  return (__mmask16) __builtin_ia32_vcmpph_v16hf_mask (__B, __C, __D,
-						       __A);
+  return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
+						   __A);
 }
 
 #else
-#define _mm_cmp_ph_mask(A, B, C)		\
-  (__builtin_ia32_vcmpph_v8hf_mask ((A), (B), (C), (-1)))
+#define _mm_cmp_ph_mask(A, B, C)			\
+  (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
 
-#define _mm_mask_cmp_ph_mask(A, B, C, D)	\
-  (__builtin_ia32_vcmpph_v8hf_mask ((B), (C), (D), (A)))
+#define _mm_mask_cmp_ph_mask(A, B, C, D)		\
+  (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
 
-#define _mm256_cmp_ph_mask(A, B, C)		\
-  (__builtin_ia32_vcmpph_v16hf_mask ((A), (B), (C), (-1)))
+#define _mm256_cmp_ph_mask(A, B, C)			\
+  (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
 
-#define _mm256_mask_cmp_ph_mask(A, B, C, D)	\
-  (__builtin_ia32_vcmpph_v16hf_mask ((B), (C), (D), (A)))
+#define _mm256_mask_cmp_ph_mask(A, B, C, D)		\
+  (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -363,46 +363,46 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_sqrt_ph (__m128h __A)
 {
-  return __builtin_ia32_vsqrtph_v8hf_mask (__A, _mm_setzero_ph (),
-					   (__mmask8) -1);
+  return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
+					(__mmask8) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_sqrt_ph (__m256h __A)
 {
-  return __builtin_ia32_vsqrtph_v16hf_mask (__A, _mm256_setzero_ph (),
-					    (__mmask16) -1);
+  return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
+					(__mmask16) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
 {
-  return __builtin_ia32_vsqrtph_v8hf_mask (__C, __A, __B);
+  return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
 {
-  return __builtin_ia32_vsqrtph_v16hf_mask (__C, __A, __B);
+  return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
 {
-  return __builtin_ia32_vsqrtph_v8hf_mask (__B, _mm_setzero_ph (),
-					   __A);
+  return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
+					__A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
 {
-  return __builtin_ia32_vsqrtph_v16hf_mask (__B, _mm256_setzero_ph (),
-					    __A);
+  return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
+					__A);
 }
 
 /* Intrinsics vrsqrtph.  */
@@ -410,45 +410,45 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt_ph (__m128h __A)
 {
-  return __builtin_ia32_vrsqrtph_v8hf_mask (__A, _mm_setzero_ph (),
-					    (__mmask8) -1);
+  return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
+					 (__mmask8) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rsqrt_ph (__m256h __A)
 {
-  return __builtin_ia32_vrsqrtph_v16hf_mask (__A, _mm256_setzero_ph (),
-					     (__mmask16) -1);
+  return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
+					 (__mmask16) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
 {
-  return __builtin_ia32_vrsqrtph_v8hf_mask (__C, __A, __B);
+  return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
 {
-  return __builtin_ia32_vrsqrtph_v16hf_mask (__C, __A, __B);
+  return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
 {
-  return __builtin_ia32_vrsqrtph_v8hf_mask (__B, _mm_setzero_ph (), __A);
+  return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
 {
-  return __builtin_ia32_vrsqrtph_v16hf_mask (__B, _mm256_setzero_ph (),
-					     __A);
+  return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
+					 __A);
 }
 
 /* Intrinsics vrcpph.  */
@@ -456,45 +456,45 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp_ph (__m128h __A)
 {
-  return __builtin_ia32_vrcpph_v8hf_mask (__A, _mm_setzero_ph (),
-					  (__mmask8) -1);
+  return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
+				       (__mmask8) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rcp_ph (__m256h __A)
 {
-  return __builtin_ia32_vrcpph_v16hf_mask (__A, _mm256_setzero_ph (),
-					   (__mmask16) -1);
+  return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
+				       (__mmask16) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
 {
-  return __builtin_ia32_vrcpph_v8hf_mask (__C, __A, __B);
+  return __builtin_ia32_rcpph128_mask (__C, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
 {
-  return __builtin_ia32_vrcpph_v16hf_mask (__C, __A, __B);
+  return __builtin_ia32_rcpph256_mask (__C, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
 {
-  return __builtin_ia32_vrcpph_v8hf_mask (__B, _mm_setzero_ph (), __A);
+  return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
 {
-  return __builtin_ia32_vrcpph_v16hf_mask (__B, _mm256_setzero_ph (),
-					   __A);
+  return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
+				       __A);
 }
 
 /* Intrinsics vscalefph.  */
@@ -502,25 +502,25 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_scalef_ph (__m128h __A, __m128h __B)
 {
-  return __builtin_ia32_vscalefph_v8hf_mask (__A, __B,
-					     _mm_setzero_ph (),
-					     (__mmask8) -1);
+  return __builtin_ia32_scalefph128_mask (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_scalef_ph (__m256h __A, __m256h __B)
 {
-  return __builtin_ia32_vscalefph_v16hf_mask (__A, __B,
-					      _mm256_setzero_ph (),
-					      (__mmask16) -1);
+  return __builtin_ia32_scalefph256_mask (__A, __B,
+					  _mm256_setzero_ph (),
+					  (__mmask16) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
 {
-  return __builtin_ia32_vscalefph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
@@ -528,24 +528,24 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
 		       __m256h __D)
 {
-  return __builtin_ia32_vscalefph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
 {
-  return __builtin_ia32_vscalefph_v8hf_mask (__B, __C,
-					     _mm_setzero_ph (), __A);
+  return __builtin_ia32_scalefph128_mask (__B, __C,
+					  _mm_setzero_ph (), __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
 {
-  return __builtin_ia32_vscalefph_v16hf_mask (__B, __C,
-					      _mm256_setzero_ph (),
-					      __A);
+  return __builtin_ia32_scalefph256_mask (__B, __C,
+					  _mm256_setzero_ph (),
+					  __A);
 }
 
 /* Intrinsics vreduceph.  */
@@ -554,109 +554,109 @@  extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_reduce_ph (__m128h __A, int __B)
 {
-  return __builtin_ia32_vreduceph_v8hf_mask (__A, __B,
-					     _mm_setzero_ph (),
-					     (__mmask8) -1);
+  return __builtin_ia32_reduceph128_mask (__A, __B,
+					  _mm_setzero_ph (),
+					  (__mmask8) -1);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
 {
-  return __builtin_ia32_vreduceph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
 {
-  return __builtin_ia32_vreduceph_v8hf_mask (__B, __C,
-					     _mm_setzero_ph (), __A);
+  return __builtin_ia32_reduceph128_mask (__B, __C,
+					  _mm_setzero_ph (), __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_reduce_ph (__m256h __A, int __B)
 {
-  return __builtin_ia32_vreduceph_v16hf_mask (__A, __B,
-					      _mm256_setzero_ph (),
-					      (__mmask16) -1);
+  return __builtin_ia32_reduceph256_mask (__A, __B,
+					  _mm256_setzero_ph (),
+					  (__mmask16) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
 {
-  return __builtin_ia32_vreduceph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
 {
-  return __builtin_ia32_vreduceph_v16hf_mask (__B, __C,
-					      _mm256_setzero_ph (),
-					      __A);
+  return __builtin_ia32_reduceph256_mask (__B, __C,
+					  _mm256_setzero_ph (),
+					  __A);
 }
 
 #else
-#define _mm_reduce_ph(A, B)					\
-  (__builtin_ia32_vreduceph_v8hf_mask ((A), (B),\
-				       _mm_setzero_ph (),	\
-				       ((__mmask8)-1)))
+#define _mm_reduce_ph(A, B)				\
+  (__builtin_ia32_reduceph128_mask ((A), (B),		\
+				    _mm_setzero_ph (),	\
+				    ((__mmask8)-1)))
 
-#define _mm_mask_reduce_ph(A,  B,  C, D)		\
-  (__builtin_ia32_vreduceph_v8hf_mask ((C), (D), (A), (B)))
+#define _mm_mask_reduce_ph(A,  B,  C, D)			\
+  (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
 
-#define _mm_maskz_reduce_ph(A,  B, C)				\
-  (__builtin_ia32_vreduceph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
+#define _mm_maskz_reduce_ph(A,  B, C)					\
+  (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
 
 #define _mm256_reduce_ph(A, B)					\
-  (__builtin_ia32_vreduceph_v16hf_mask ((A), (B),\
-					_mm256_setzero_ph (),	\
-					((__mmask16)-1)))
+  (__builtin_ia32_reduceph256_mask ((A), (B),			\
+				    _mm256_setzero_ph (),	\
+				    ((__mmask16)-1)))
 
-#define _mm256_mask_reduce_ph(A, B, C, D)		\
-  (__builtin_ia32_vreduceph_v16hf_mask ((C), (D), (A), (B)))
+#define _mm256_mask_reduce_ph(A, B, C, D)			\
+  (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
 
-#define _mm256_maskz_reduce_ph(A, B, C)				\
-  (__builtin_ia32_vreduceph_v16hf_mask ((B), (C), _mm256_setzero_ph (), (A)))
+#define _mm256_maskz_reduce_ph(A, B, C)					\
+  (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
 
 #endif /* __OPTIMIZE__ */
 
 /* Intrinsics vrndscaleph.  */
 #ifdef __OPTIMIZE__
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscale_ph (__m128h __A, int __B)
-{
-  return __builtin_ia32_vrndscaleph_v8hf_mask (__A, __B,
-					       _mm_setzero_ph (),
-					       (__mmask8) -1);
-}
+  extern __inline __m128h
+  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+  _mm_roundscale_ph (__m128h __A, int __B)
+  {
+    return __builtin_ia32_rndscaleph128_mask (__A, __B,
+					      _mm_setzero_ph (),
+					      (__mmask8) -1);
+  }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
 {
-  return __builtin_ia32_vrndscaleph_v8hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
 {
-  return __builtin_ia32_vrndscaleph_v8hf_mask (__B, __C,
-					       _mm_setzero_ph (), __A);
+  return __builtin_ia32_rndscaleph128_mask (__B, __C,
+					    _mm_setzero_ph (), __A);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_roundscale_ph (__m256h __A, int __B)
 {
-  return __builtin_ia32_vrndscaleph_v16hf_mask (__A, __B,
-						_mm256_setzero_ph (),
-						(__mmask16) -1);
+  return __builtin_ia32_rndscaleph256_mask (__A, __B,
+					    _mm256_setzero_ph (),
+					    (__mmask16) -1);
 }
 
 extern __inline __m256h
@@ -664,40 +664,40 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
 			   int __D)
 {
-  return __builtin_ia32_vrndscaleph_v16hf_mask (__C, __D, __A, __B);
+  return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
 {
-  return __builtin_ia32_vrndscaleph_v16hf_mask (__B, __C,
-						_mm256_setzero_ph (),
-						__A);
+  return __builtin_ia32_rndscaleph256_mask (__B, __C,
+					    _mm256_setzero_ph (),
+					    __A);
 }
 
 #else
-#define _mm_roundscale_ph(A, B) \
-  (__builtin_ia32_vrndscaleph_v8hf_mask ((A), (B), _mm_setzero_ph (),	\
-					 ((__mmask8)-1)))
+#define _mm_roundscale_ph(A, B)						\
+  (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (),	\
+				      ((__mmask8)-1)))
 
-#define _mm_mask_roundscale_ph(A, B, C, D) \
-  (__builtin_ia32_vrndscaleph_v8hf_mask ((C), (D), (A), (B)))
+#define _mm_mask_roundscale_ph(A, B, C, D)			\
+  (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
 
-#define _mm_maskz_roundscale_ph(A, B, C) \
-  (__builtin_ia32_vrndscaleph_v8hf_mask ((B), (C), _mm_setzero_ph (), (A)))
+#define _mm_maskz_roundscale_ph(A, B, C)				\
+  (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
 
-#define _mm256_roundscale_ph(A, B) \
-  (__builtin_ia32_vrndscaleph_v16hf_mask ((A), (B),	      \
-					 _mm256_setzero_ph(), \
-					  ((__mmask16)-1)))
+#define _mm256_roundscale_ph(A, B)				\
+  (__builtin_ia32_rndscaleph256_mask ((A), (B),			\
+				      _mm256_setzero_ph(),	\
+				      ((__mmask16)-1)))
 
-#define _mm256_mask_roundscale_ph(A, B, C, D) \
-  (__builtin_ia32_vrndscaleph_v16hf_mask ((C), (D), (A), (B)))
+#define _mm256_mask_roundscale_ph(A, B, C, D)			\
+  (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
 
-#define _mm256_maskz_roundscale_ph(A, B, C) \
-  (__builtin_ia32_vrndscaleph_v16hf_mask ((B), (C),			\
-					  _mm256_setzero_ph (), (A)))
+#define _mm256_maskz_roundscale_ph(A, B, C)				\
+  (__builtin_ia32_rndscaleph256_mask ((B), (C),				\
+				      _mm256_setzero_ph (), (A)))
 
 #endif /* __OPTIMIZE__ */
 
@@ -705,7 +705,7 @@  _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
 #ifdef __OPTIMIZE__
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
+  _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
 {
   return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
 						      __imm, __U);
@@ -725,7 +725,7 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
 {
   return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
-						      __imm, __U);
+						       __imm, __U);
 }
 
 extern __inline __mmask16
@@ -733,26 +733,26 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fpclass_ph_mask (__m256h __A, const int __imm)
 {
   return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
-						      __imm,
-						      (__mmask16) -1);
+						       __imm,
+						       (__mmask16) -1);
 }
 
 #else
 #define _mm_fpclass_ph_mask(X, C)                                       \
-  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),  \
+  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),	\
 						(int) (C),(__mmask8)-1))
 
 #define _mm_mask_fpclass_ph_mask(u, X, C)                               \
-  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),  \
+  ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X),	\
 						(int) (C),(__mmask8)(u)))
 
 #define _mm256_fpclass_ph_mask(X, C)                                    \
-  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X),  \
-						(int) (C),(__mmask16)-1))
+  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
+						 (int) (C),(__mmask16)-1))
 
 #define _mm256_mask_fpclass_ph_mask(u, X, C)				\
-  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X),  \
-						(int) (C),(__mmask16)(u)))
+  ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
+						 (int) (C),(__mmask16)(u)))
 #endif /* __OPTIMIZE__ */
 
 /* Intrinsics vgetexpph, vgetexpsh.  */
@@ -761,9 +761,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_getexp_ph (__m256h __A)
 {
   return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
-						   (__v16hf)
-						   _mm256_setzero_ph (),
-						   (__mmask16) -1);
+						    (__v16hf)
+						    _mm256_setzero_ph (),
+						    (__mmask16) -1);
 }
 
 extern __inline __m256h
@@ -771,8 +771,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
 {
   return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
-						   (__v16hf) __W,
-						   (__mmask16) __U);
+						    (__v16hf) __W,
+						    (__mmask16) __U);
 }
 
 extern __inline __m256h
@@ -780,9 +780,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
 {
   return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
-						   (__v16hf)
-						   _mm256_setzero_ph (),
-						   (__mmask16) __U);
+						    (__v16hf)
+						    _mm256_setzero_ph (),
+						    (__mmask16) __U);
 }
 
 extern __inline __m128h
@@ -790,9 +790,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_getexp_ph (__m128h __A)
 {
   return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
-						   (__v8hf)
-						   _mm_setzero_ph (),
-						   (__mmask8) -1);
+						    (__v8hf)
+						    _mm_setzero_ph (),
+						    (__mmask8) -1);
 }
 
 extern __inline __m128h
@@ -800,8 +800,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
 {
   return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
-						   (__v8hf) __W,
-						   (__mmask8) __U);
+						    (__v8hf) __W,
+						    (__mmask8) __U);
 }
 
 extern __inline __m128h
@@ -809,9 +809,9 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
 {
   return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
-						   (__v8hf)
-						   _mm_setzero_ph (),
-						   (__mmask8) __U);
+						    (__v8hf)
+						    _mm_setzero_ph (),
+						    (__mmask8) __U);
 }
 
 
@@ -892,41 +892,41 @@  _mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
 }
 
 #else
-#define _mm256_getmant_ph(X, B, C)                                              \
-  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
-					 (int)(((C)<<2) | (B)),                 \
-					  (__v16hf)(__m256h)_mm256_setzero_ph (),\
-					  (__mmask16)-1))
-
-#define _mm256_mask_getmant_ph(W, U, X, B, C)                                   \
-  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
-					 (int)(((C)<<2) | (B)),                 \
-					  (__v16hf)(__m256h)(W),                 \
-					  (__mmask16)(U)))
-
-#define _mm256_maskz_getmant_ph(U, X, B, C)                                     \
-  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),           \
-					 (int)(((C)<<2) | (B)),                 \
-					  (__v16hf)(__m256h)_mm256_setzero_ph (),\
-					  (__mmask16)(U)))
-
-#define _mm_getmant_ph(X, B, C)                                                 \
-  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
-					 (int)(((C)<<2) | (B)),                 \
-					  (__v8hf)(__m128h)_mm_setzero_ph (),   \
-					  (__mmask8)-1))
-
-#define _mm_mask_getmant_ph(W, U, X, B, C)                                      \
-  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
-					 (int)(((C)<<2) | (B)),                 \
-					  (__v8hf)(__m128h)(W),                 \
-					  (__mmask8)(U)))
-
-#define _mm_maskz_getmant_ph(U, X, B, C)                                        \
-  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),           \
-					 (int)(((C)<<2) | (B)),                 \
-					  (__v8hf)(__m128h)_mm_setzero_ph (),   \
-					  (__mmask8)(U)))
+#define _mm256_getmant_ph(X, B, C)					\
+  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
+					       (int)(((C)<<2) | (B)),	\
+					       (__v16hf)(__m256h)_mm256_setzero_ph (), \
+					       (__mmask16)-1))
+
+#define _mm256_mask_getmant_ph(W, U, X, B, C)				\
+  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
+					       (int)(((C)<<2) | (B)),	\
+					       (__v16hf)(__m256h)(W),	\
+					       (__mmask16)(U)))
+
+#define _mm256_maskz_getmant_ph(U, X, B, C)				\
+  ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X),	\
+					       (int)(((C)<<2) | (B)),	\
+					       (__v16hf)(__m256h)_mm256_setzero_ph (), \
+					       (__mmask16)(U)))
+
+#define _mm_getmant_ph(X, B, C)						\
+  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
+					       (int)(((C)<<2) | (B)),	\
+					       (__v8hf)(__m128h)_mm_setzero_ph (), \
+					       (__mmask8)-1))
+
+#define _mm_mask_getmant_ph(W, U, X, B, C)				\
+  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
+					       (int)(((C)<<2) | (B)),	\
+					       (__v8hf)(__m128h)(W),	\
+					       (__mmask8)(U)))
+
+#define _mm_maskz_getmant_ph(U, X, B, C)				\
+  ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X),	\
+					       (int)(((C)<<2) | (B)),	\
+					       (__v8hf)(__m128h)_mm_setzero_ph (), \
+					       (__mmask8)(U)))
 
 #endif /* __OPTIMIZE__ */
 
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index bfa1d56a7b8..10f6fd87cbb 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2775,49 +2775,49 @@  BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__b
 BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
 
 /* AVX512FP16.  */
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_vaddph_v8hf_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_vaddph_v16hf_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_vaddph_v32hf_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_vsubph_v8hf_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_vsubph_v16hf_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_vsubph_v32hf_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_vmulph_v8hf_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_vmulph_v16hf_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_vmulph_v32hf_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_vdivph_v8hf_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_vdivph_v16hf_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_vdivph_v32hf_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_vaddsh_v8hf_mask", IX86_BUILTIN_VADDSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_vsubsh_v8hf_mask", IX86_BUILTIN_VSUBSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_vmulsh_v8hf_mask", IX86_BUILTIN_VMULSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_vdivsh_v8hf_mask", IX86_BUILTIN_VDIVSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_vmaxph_v8hf_mask", IX86_BUILTIN_VMAXPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_vmaxph_v16hf_mask", IX86_BUILTIN_VMAXPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_vmaxph_v32hf_mask", IX86_BUILTIN_VMAXPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_vminph_v8hf_mask", IX86_BUILTIN_VMINPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_vminph_v16hf_mask", IX86_BUILTIN_VMINPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_vminph_v32hf_mask", IX86_BUILTIN_VMINPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_vmaxsh_v8hf_mask", IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_vminsh_v8hf_mask", IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_vcmpph_v8hf_mask", IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_vcmpph_v16hf_mask", IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_vcmpph_v32hf_mask", IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_vsqrtph_v8hf_mask", IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_vsqrtph_v16hf_mask", IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_vrsqrtph_v8hf_mask", IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_vrsqrtph_v16hf_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_vrsqrtph_v32hf_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_vrsqrtsh_v8hf_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_vrcpph_v8hf_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_vrcpph_v16hf_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_vrcpph_v32hf_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_vrcpsh_v8hf_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_vscalefph_v8hf_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_vscalefph_v16hf_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_vreduceph_v8hf_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_vreduceph_v16hf_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_vrndscaleph_v8hf_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_vrndscaleph_v16hf_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_VADDSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_VSUBSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_VMULSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_VDIVSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_VMAXPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_VMAXPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_VMAXPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_VMINPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_VMINPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_VMINPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_VMAXSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_VMINSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_VCMPPH_V8HF_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_VCMPPH_V16HF_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_VCMPPH_V32HF_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_VSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_VSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_VRSQRTPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv8hf_mask, "__builtin_ia32_reduceph128_mask", IX86_BUILTIN_VREDUCEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv16hf_mask, "__builtin_ia32_reduceph256_mask", IX86_BUILTIN_VREDUCEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rndscalev8hf_mask, "__builtin_ia32_rndscaleph128_mask", IX86_BUILTIN_VRNDSCALEPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_VRNDSCALEPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
@@ -3027,28 +3027,28 @@  BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "_
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
 
 /* AVX512FP16.  */
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_vaddph_v32hf_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_vsubph_v32hf_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_vmulph_v32hf_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_vdivph_v32hf_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_vaddsh_v8hf_mask_round", IX86_BUILTIN_VADDSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_vsubsh_v8hf_mask_round", IX86_BUILTIN_VSUBSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_vmulsh_v8hf_mask_round", IX86_BUILTIN_VMULSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_vdivsh_v8hf_mask_round", IX86_BUILTIN_VDIVSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_vmaxph_v32hf_mask_round", IX86_BUILTIN_VMAXPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_vminph_v32hf_mask_round", IX86_BUILTIN_VMINPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_vmaxsh_v8hf_mask_round", IX86_BUILTIN_VMAXSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_vminsh_v8hf_mask_round", IX86_BUILTIN_VMINSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_vcmpph_v32hf_mask_round", IX86_BUILTIN_VCMPPH_V32HF_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_vcmpsh_v8hf_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_vsqrtph_v32hf_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_vsqrtsh_v8hf_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_vscalefph_v32hf_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_vscalefsh_v8hf_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_vreduceph_v32hf_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_vreducesh_v8hf_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_vrndscaleph_v32hf_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_vrndscalesh_v8hf_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_VADDSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_VSUBSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_VMULSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_VDIVSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_VMAXPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_VMINPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_VMAXSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_VMINSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_VCMPPH_V32HF_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_VREDUCEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_VREDUCESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_VRNDSCALEPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_VRNDSCALESH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index b3cffa0644f..3a96e586418 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -686,33 +686,33 @@ 
 #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
 
 /* avx512fp16intrin.h */
-#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
-#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
-#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
-#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
-#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
-#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
-#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
-#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
-#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
-#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
-#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
-#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
-#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
+#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
+#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
+#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
+#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
+#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
+#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
+#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
+#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
+#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
+#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
+#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
 #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
 #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
 #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
@@ -721,8 +721,8 @@ 
 #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
 
 /* avx512fp16vlintrin.h */
-#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
-#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
 #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
 #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
 #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 67ef567e437..aafcd414530 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -703,33 +703,33 @@ 
 #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
 
 /* avx512fp16intrin.h */
-#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
-#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
-#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
-#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
-#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
-#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
-#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
-#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
-#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
-#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
-#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
-#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
-#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
+#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
+#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
+#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
+#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
+#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
+#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
+#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
+#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
+#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
+#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
+#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
 #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
 #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
 #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
@@ -738,8 +738,8 @@ 
 #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
 
 /* avx512fp16vlintrin.h */
-#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
-#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
 #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
 #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
 #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index b3f07587acb..8b600282c67 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -704,33 +704,33 @@ 
 #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E)  __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
 
 /* avx512fp16intrin.h */
-#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vminph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vmaxsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vminsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vcmpph_v32hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v32hf_mask(A, B, 1, D)
-#define __builtin_ia32_vcmpph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpph_v32hf_mask_round(A, B, 1, D, 8)
-#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
-#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
-#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
-#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
-#define __builtin_ia32_vreduceph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vreduceph_v32hf_mask_round(A, 123, C, D, 8)
-#define __builtin_ia32_vreduceph_v8hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v8hf_mask(A, 123, C, D)
-#define __builtin_ia32_vreduceph_v16hf_mask(A, B, C, D) __builtin_ia32_vreduceph_v16hf_mask(A, 123, C, D)
-#define __builtin_ia32_vreducesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vreducesh_v8hf_mask_round(A, B, 123, D, E, 8)
-#define __builtin_ia32_vrndscaleph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vrndscaleph_v32hf_mask_round(A, 123, C, D, 8)
-#define __builtin_ia32_vrndscaleph_v8hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v8hf_mask(A, 123, C, D)
-#define __builtin_ia32_vrndscaleph_v16hf_mask(A, B, C, D) __builtin_ia32_vrndscaleph_v16hf_mask(A, 123, C, D)
-#define __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, C, D, E, F) __builtin_ia32_vrndscalesh_v8hf_mask_round(A, B, 123, D, E, 8)
+#define __builtin_ia32_addph512_mask_round(A, B, C, D, E) __builtin_ia32_addph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_subph512_mask_round(A, B, C, D, E) __builtin_ia32_subph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_mulph512_mask_round(A, B, C, D, E) __builtin_ia32_mulph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_divph512_mask_round(A, B, C, D, E) __builtin_ia32_divph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_addsh_mask_round(A, B, C, D, E) __builtin_ia32_addsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_subsh_mask_round(A, B, C, D, E) __builtin_ia32_subsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_mulsh_mask_round(A, B, C, D, E) __builtin_ia32_mulsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_divsh_mask_round(A, B, C, D, E) __builtin_ia32_divsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_maxph512_mask_round(A, B, C, D, E) __builtin_ia32_maxph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_minph512_mask_round(A, B, C, D, E) __builtin_ia32_minph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_maxsh_mask_round(A, B, C, D, E) __builtin_ia32_maxsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_minsh_mask_round(A, B, C, D, E) __builtin_ia32_minsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_cmpph512_mask(A, B, C, D) __builtin_ia32_cmpph512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph512_mask_round(A, B, C, D, E) __builtin_ia32_cmpph512_mask_round(A, B, 1, D, 8)
+#define __builtin_ia32_cmpsh_mask_round(A, B, C, D, E) __builtin_ia32_cmpsh_mask_round(A, B, 1, D, 8)
+#define __builtin_ia32_sqrtph512_mask_round(C, A, B, D) __builtin_ia32_sqrtph512_mask_round(C, A, B, 8)
+#define __builtin_ia32_sqrtsh_mask_round(D, C, A, B, E) __builtin_ia32_sqrtsh_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_scalefph512_mask_round(A, B, C, D, E) __builtin_ia32_scalefph512_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_scalefsh_mask_round(A, B, C, D, E) __builtin_ia32_scalefsh_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_reduceph512_mask_round(A, B, C, D, E) __builtin_ia32_reduceph512_mask_round(A, 123, C, D, 8)
+#define __builtin_ia32_reduceph128_mask(A, B, C, D) __builtin_ia32_reduceph128_mask(A, 123, C, D)
+#define __builtin_ia32_reduceph256_mask(A, B, C, D) __builtin_ia32_reduceph256_mask(A, 123, C, D)
+#define __builtin_ia32_reducesh_mask_round(A, B, C, D, E, F) __builtin_ia32_reducesh_mask_round(A, B, 123, D, E, 8)
+#define __builtin_ia32_rndscaleph512_mask_round(A, B, C, D, E) __builtin_ia32_rndscaleph512_mask_round(A, 123, C, D, 8)
+#define __builtin_ia32_rndscaleph128_mask(A, B, C, D) __builtin_ia32_rndscaleph128_mask(A, 123, C, D)
+#define __builtin_ia32_rndscaleph256_mask(A, B, C, D) __builtin_ia32_rndscaleph256_mask(A, 123, C, D)
+#define __builtin_ia32_rndscalesh_mask_round(A, B, C, D, E, F) __builtin_ia32_rndscalesh_mask_round(A, B, 123, D, E, 8)
 #define __builtin_ia32_fpclassph512_mask(A, D, C) __builtin_ia32_fpclassph512_mask(A, 1, C)
 #define __builtin_ia32_fpclasssh_mask(A, D, U) __builtin_ia32_fpclasssh_mask(A, 1, U)
 #define __builtin_ia32_getexpph512_mask(A, B, C, D) __builtin_ia32_getexpph512_mask(A, B, C, 8)
@@ -739,8 +739,8 @@ 
 #define __builtin_ia32_getmantsh_mask_round(A, B, C, W, U, D) __builtin_ia32_getmantsh_mask_round(A, B, 1, W, U, 4)
 
 /* avx512fp16vlintrin.h */
-#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
-#define __builtin_ia32_vcmpph_v16hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v16hf_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpph256_mask(A, B, C, D) __builtin_ia32_cmpph256_mask(A, B, 1, D)
 #define __builtin_ia32_fpclassph256_mask(A, D, C) __builtin_ia32_fpclassph256_mask(A, 1, C)
 #define __builtin_ia32_fpclassph128_mask(A, D, C) __builtin_ia32_fpclassph128_mask(A, 1, C)
 #define __builtin_ia32_getmantph256_mask(A, E, C, D) __builtin_ia32_getmantph256_mask(A, 1, C, D)