Message ID | 20210701061648.9447-41-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Support all AVX512FP16 intrinsics. | expand |
Hi: I'm going to check the 9 patches below. Bootstrapped and regtest on x86_64-pc-linux-gnu{-m32,}. Newly added testcase passed on sde{-m32,}. [PATCH 40/62] AVX512FP16: Add vfmaddsub[132, 213, 231]ph/vfmsubadd[132, 213, 231]ph. [PATCH 41/62] AVX512FP16: Add testcase for vfmaddsub[132, 213, 231]ph/vfmsubadd[132, 213, 231]ph. [PATCH 42/62] AVX512FP16: Add FP16 fma instructions. [PATCH 43/62] AVX512FP16: Add testcase for fma instructions [PATCH 44/62] AVX512FP16: Add scalar/vector bitwise operations, including [PATCH 45/62] AVX512FP16: Add testcase for fp16 bitwise operations. [PATCH 46/62] AVX512FP16: Enable FP16 mask load/store. [PATCH 47/62] AVX512FP16: Add scalar fma instructions. [PATCH 48/62] AVX512FP16: Add testcase for scalar FMA instructions. On Thu, Jul 1, 2021 at 2:17 PM liuhongt <hongtao.liu@intel.com> wrote: > > gcc/ChangeLog: > > * config/i386/avx512fp16intrin.h (_mm512_fmaddsub_ph): > New intrinsic. > (_mm512_mask_fmaddsub_ph): Likewise. > (_mm512_mask3_fmaddsub_ph): Likewise. > (_mm512_maskz_fmaddsub_ph): Likewise. > (_mm512_fmaddsub_round_ph): Likewise. > (_mm512_mask_fmaddsub_round_ph): Likewise. > (_mm512_mask3_fmaddsub_round_ph): Likewise. > (_mm512_maskz_fmaddsub_round_ph): Likewise. > (_mm512_mask_fmsubadd_ph): Likewise. > (_mm512_mask3_fmsubadd_ph): Likewise. > (_mm512_maskz_fmsubadd_ph): Likewise. > (_mm512_fmsubadd_round_ph): Likewise. > (_mm512_mask_fmsubadd_round_ph): Likewise. > (_mm512_mask3_fmsubadd_round_ph): Likewise. > (_mm512_maskz_fmsubadd_round_ph): Likewise. > * config/i386/avx512fp16vlintrin.h (_mm256_fmaddsub_ph): > New intrinsic. > (_mm256_mask_fmaddsub_ph): Likewise. > (_mm256_mask3_fmaddsub_ph): Likewise. > (_mm256_maskz_fmaddsub_ph): Likewise. > (_mm_fmaddsub_ph): Likewise. > (_mm_mask_fmaddsub_ph): Likewise. > (_mm_mask3_fmaddsub_ph): Likewise. > (_mm_maskz_fmaddsub_ph): Likewise. > (_mm256_fmsubadd_ph): Likewise. > (_mm256_mask_fmsubadd_ph): Likewise. > (_mm256_mask3_fmsubadd_ph): Likewise. > (_mm256_maskz_fmsubadd_ph): Likewise. > (_mm_fmsubadd_ph): Likewise. > (_mm_mask_fmsubadd_ph): Likewise. > (_mm_mask3_fmsubadd_ph): Likewise. > (_mm_maskz_fmsubadd_ph): Likewise. > * config/i386/i386-builtin.def: Add corresponding new builtins. > * config/i386/sse.md (VFH_SF_AVX512VL): New mode iterator. > * (<avx512>_fmsubadd_<mode>_maskz<round_expand_name>): New expander. > * (<avx512>_fmaddsub_<mode>_maskz<round_expand_name>): Use > VFH_SF_AVX512VL. > * (<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>): > Ditto. > * (<avx512>_fmaddsub_<mode>_mask<round_name>): Ditto. > * (<avx512>_fmaddsub_<mode>_mask3<round_name>): Ditto. > * (<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>): > Ditto. > * (<avx512>_fmsubadd_<mode>_mask<round_name>): Ditto. > * (<avx512>_fmsubadd_<mode>_mask3<round_name>): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx-1.c: Add test for new builtins. > * gcc.target/i386/sse-13.c: Ditto. > * gcc.target/i386/sse-23.c: Ditto. > * gcc.target/i386/sse-14.c: Add test for new intrinsics. > * gcc.target/i386/sse-22.c: Ditto. > --- > gcc/config/i386/avx512fp16intrin.h | 228 +++++++++++++++++++++++++ > gcc/config/i386/avx512fp16vlintrin.h | 182 ++++++++++++++++++++ > gcc/config/i386/i386-builtin.def | 18 ++ > gcc/config/i386/sse.md | 103 ++++++----- > gcc/testsuite/gcc.target/i386/avx-1.c | 6 + > gcc/testsuite/gcc.target/i386/sse-13.c | 6 + > gcc/testsuite/gcc.target/i386/sse-14.c | 8 + > gcc/testsuite/gcc.target/i386/sse-22.c | 8 + > gcc/testsuite/gcc.target/i386/sse-23.c | 6 + > 9 files changed, 524 insertions(+), 41 deletions(-) > > diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h > index ddb227529fa..4092663b504 100644 > --- a/gcc/config/i386/avx512fp16intrin.h > +++ b/gcc/config/i386/avx512fp16intrin.h > @@ -5037,6 +5037,234 @@ _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C, > > #endif /* __OPTIMIZE__ */ > > +/* Intrinsics vfmaddsub[132,213,231]ph. */ > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) -1, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +#ifdef __OPTIMIZE__ > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) -1, __R); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, > + __m512h __C, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, __R); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, > + __mmask32 __U, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, __R); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, > + __m512h __C, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, __R); > +} > + > +#else > +#define _mm512_fmaddsub_round_ph(A, B, C, R) \ > + ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R))) > + > +#define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \ > + ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R))) > + > +#define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \ > + ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R))) > + > +#define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \ > + ((__m512h)__builtin_ia32_vfmaddsubph512_maskz((A), (B), (C), (U), (R))) > + > +#endif /* __OPTIMIZE__ */ > + > +/* Intrinsics vfmsubadd[132,213,231]ph. */ > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > + _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) -1, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U, > + __m512h __B, __m512h __C) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B, > + __m512h __C, __mmask32 __U) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A, > + __m512h __B, __m512h __C) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +#ifdef __OPTIMIZE__ > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_fmsubadd_round_ph (__m512h __A, __m512h __B, > + __m512h __C, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) -1, __R); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, > + __m512h __C, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, __R); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C, > + __mmask32 __U, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, __R); > +} > + > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, > + __m512h __C, const int __R) > +{ > + return (__m512h) > + __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, > + (__v32hf) __B, > + (__v32hf) __C, > + (__mmask32) __U, __R); > +} > + > +#else > +#define _mm512_fmsubadd_round_ph(A, B, C, R) \ > + ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R))) > + > +#define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \ > + ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R))) > + > +#define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \ > + ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R))) > + > +#define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \ > + ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R))) > + > +#endif /* __OPTIMIZE__ */ > + > #ifdef __DISABLE_AVX512FP16__ > #undef __DISABLE_AVX512FP16__ > #pragma GCC pop_options > diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h > index bcbe4523357..8825fae52aa 100644 > --- a/gcc/config/i386/avx512fp16vlintrin.h > +++ b/gcc/config/i386/avx512fp16vlintrin.h > @@ -2269,6 +2269,188 @@ _mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B) > __A); > } > > +/* Intrinsics vfmaddsub[132,213,231]ph. */ > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C) > +{ > + return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A, > + (__v16hf)__B, > + (__v16hf)__C, > + (__mmask16)-1); > +} > + > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B, > + __m256h __C) > +{ > + return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A, > + (__v16hf) __B, > + (__v16hf) __C, > + (__mmask16) __U); > +} > + > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C, > + __mmask16 __U) > +{ > + return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A, > + (__v16hf) __B, > + (__v16hf) __C, > + (__mmask16) > + __U); > +} > + > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B, > + __m256h __C) > +{ > + return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A, > + (__v16hf) __B, > + (__v16hf) __C, > + (__mmask16) > + __U); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C) > +{ > + return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A, > + (__v8hf)__B, > + (__v8hf)__C, > + (__mmask8)-1); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B, > + __m128h __C) > +{ > + return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A, > + (__v8hf) __B, > + (__v8hf) __C, > + (__mmask8) __U); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C, > + __mmask8 __U) > +{ > + return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A, > + (__v8hf) __B, > + (__v8hf) __C, > + (__mmask8) > + __U); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B, > + __m128h __C) > +{ > + return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A, > + (__v8hf) __B, > + (__v8hf) __C, > + (__mmask8) > + __U); > +} > + > +/* Intrinsics vfmsubadd[132,213,231]ph. */ > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C) > +{ > + return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, > + (__v16hf) __B, > + (__v16hf) __C, > + (__mmask16) -1); > +} > + > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B, > + __m256h __C) > +{ > + return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, > + (__v16hf) __B, > + (__v16hf) __C, > + (__mmask16) __U); > +} > + > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C, > + __mmask16 __U) > +{ > + return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A, > + (__v16hf) __B, > + (__v16hf) __C, > + (__mmask16) > + __U); > +} > + > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B, > + __m256h __C) > +{ > + return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A, > + (__v16hf) __B, > + (__v16hf) __C, > + (__mmask16) > + __U); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C) > +{ > + return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, > + (__v8hf) __B, > + (__v8hf) __C, > + (__mmask8) -1); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B, > + __m128h __C) > +{ > + return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, > + (__v8hf) __B, > + (__v8hf) __C, > + (__mmask8) __U); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C, > + __mmask8 __U) > +{ > + return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A, > + (__v8hf) __B, > + (__v8hf) __C, > + (__mmask8) > + __U); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B, > + __m128h __C) > +{ > + return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A, > + (__v8hf) __B, > + (__v8hf) __C, > + (__mmask8) > + __U); > +} > + > #ifdef __DISABLE_AVX512FP16VL__ > #undef __DISABLE_AVX512FP16VL__ > #pragma GCC pop_options > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > index 4bb48bc21dc..42bba719ec3 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -2875,6 +2875,18 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 > BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v8sf_mask, "__builtin_ia32_vcvtps2ph_v8sf_mask", IX86_BUILTIN_VCVTPS2PH_V8SF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SF_V8HF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v2df_mask, "__builtin_ia32_vcvtpd2ph_v2df_mask", IX86_BUILTIN_VCVTPD2PH_V2DF_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v4df_mask, "__builtin_ia32_vcvtpd2ph_v4df_mask", IX86_BUILTIN_VCVTPD2PH_V4DF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DF_V8HF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmaddsub_v16hf_mask, "__builtin_ia32_vfmaddsubph256_mask", IX86_BUILTIN_VFMADDSUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmaddsub_v16hf_mask3, "__builtin_ia32_vfmaddsubph256_mask3", IX86_BUILTIN_VFMADDSUBPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmaddsub_v16hf_maskz, "__builtin_ia32_vfmaddsubph256_maskz", IX86_BUILTIN_VFMADDSUBPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmaddsub_v8hf_mask, "__builtin_ia32_vfmaddsubph128_mask", IX86_BUILTIN_VFMADDSUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmaddsub_v8hf_mask3, "__builtin_ia32_vfmaddsubph128_mask3", IX86_BUILTIN_VFMADDSUBPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmaddsub_v8hf_maskz, "__builtin_ia32_vfmaddsubph128_maskz", IX86_BUILTIN_VFMADDSUBPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsubadd_v16hf_mask, "__builtin_ia32_vfmsubaddph256_mask", IX86_BUILTIN_VFMSUBADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsubadd_v16hf_mask3, "__builtin_ia32_vfmsubaddph256_mask3", IX86_BUILTIN_VFMSUBADDPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsubadd_v16hf_maskz, "__builtin_ia32_vfmsubaddph256_maskz", IX86_BUILTIN_VFMSUBADDPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_mask, "__builtin_ia32_vfmsubaddph128_mask", IX86_BUILTIN_VFMSUBADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_mask3, "__builtin_ia32_vfmsubaddph128_mask3", IX86_BUILTIN_VFMSUBADDPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_maskz, "__builtin_ia32_vfmsubaddph128_maskz", IX86_BUILTIN_VFMSUBADDPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) > > /* Builtins with rounding support. */ > BDESC_END (ARGS, ROUND_ARGS) > @@ -3140,6 +3152,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, > BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT) > BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT) > BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) > > BDESC_END (ROUND_ARGS, MULTI_ARG) > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 95f4a82c9cd..847684e232e 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -4542,6 +4542,13 @@ (define_mode_iterator VF_SF_AVX512VL > [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") > DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) > > +(define_mode_iterator VFH_SF_AVX512VL > + [(V32HF "TARGET_AVX512FP16") > + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") > + (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") > + SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") > + DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) > + > (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" > [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") > (fma:VF_SF_AVX512VL > @@ -4848,10 +4855,10 @@ (define_expand "fmaddsub_<mode>" > "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") > > (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>" > - [(match_operand:VF_AVX512VL 0 "register_operand") > - (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") > - (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") > - (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") > + [(match_operand:VFH_AVX512VL 0 "register_operand") > + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") > + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") > + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") > (match_operand:<avx512fmaskmode> 4 "register_operand")] > "TARGET_AVX512F" > { > @@ -4861,6 +4868,20 @@ (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>" > DONE; > }) > > +(define_expand "<avx512>_fmsubadd_<mode>_maskz<round_expand_name>" > + [(match_operand:VFH_AVX512VL 0 "register_operand") > + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") > + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") > + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") > + (match_operand:<avx512fmaskmode> 4 "register_operand")] > + "TARGET_AVX512F" > +{ > + emit_insn (gen_fma_fmsubadd_<mode>_maskz_1<round_expand_name> ( > + operands[0], operands[1], operands[2], operands[3], > + CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); > + DONE; > +}) > + > (define_insn "*fma_fmaddsub_<mode>" > [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") > (unspec:VF_128_256 > @@ -4880,11 +4901,11 @@ (define_insn "*fma_fmaddsub_<mode>" > (set_attr "mode" "<MODE>")]) > > (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" > - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") > - (unspec:VF_SF_AVX512VL > - [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") > - (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") > - (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")] > + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") > + (unspec:VFH_SF_AVX512VL > + [(match_operand:VFH_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") > + (match_operand:VFH_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") > + (match_operand:VFH_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")] > UNSPEC_FMADDSUB))] > "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" > "@ > @@ -4895,12 +4916,12 @@ (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" > (set_attr "mode" "<MODE>")]) > > (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>" > - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") > - (vec_merge:VF_AVX512VL > - (unspec:VF_AVX512VL > - [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") > - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") > - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")] > + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") > + (vec_merge:VFH_AVX512VL > + (unspec:VFH_AVX512VL > + [(match_operand:VFH_AVX512VL 1 "register_operand" "0,0") > + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") > + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")] > UNSPEC_FMADDSUB) > (match_dup 1) > (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] > @@ -4912,12 +4933,12 @@ (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>" > (set_attr "mode" "<MODE>")]) > > (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>" > - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") > - (vec_merge:VF_AVX512VL > - (unspec:VF_AVX512VL > - [(match_operand:VF_AVX512VL 1 "register_operand" "v") > - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") > - (match_operand:VF_AVX512VL 3 "register_operand" "0")] > + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") > + (vec_merge:VFH_AVX512VL > + (unspec:VFH_AVX512VL > + [(match_operand:VFH_AVX512VL 1 "register_operand" "v") > + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") > + (match_operand:VFH_AVX512VL 3 "register_operand" "0")] > UNSPEC_FMADDSUB) > (match_dup 3) > (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] > @@ -4946,12 +4967,12 @@ (define_insn "*fma_fmsubadd_<mode>" > (set_attr "mode" "<MODE>")]) > > (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" > - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") > - (unspec:VF_SF_AVX512VL > - [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") > - (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") > - (neg:VF_SF_AVX512VL > - (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))] > + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") > + (unspec:VFH_SF_AVX512VL > + [(match_operand:VFH_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") > + (match_operand:VFH_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") > + (neg:VFH_SF_AVX512VL > + (match_operand:VFH_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))] > UNSPEC_FMADDSUB))] > "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" > "@ > @@ -4962,13 +4983,13 @@ (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" > (set_attr "mode" "<MODE>")]) > > (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>" > - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") > - (vec_merge:VF_AVX512VL > - (unspec:VF_AVX512VL > - [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") > - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") > - (neg:VF_AVX512VL > - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))] > + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") > + (vec_merge:VFH_AVX512VL > + (unspec:VFH_AVX512VL > + [(match_operand:VFH_AVX512VL 1 "register_operand" "0,0") > + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") > + (neg:VFH_AVX512VL > + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))] > UNSPEC_FMADDSUB) > (match_dup 1) > (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] > @@ -4980,13 +5001,13 @@ (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>" > (set_attr "mode" "<MODE>")]) > > (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>" > - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") > - (vec_merge:VF_AVX512VL > - (unspec:VF_AVX512VL > - [(match_operand:VF_AVX512VL 1 "register_operand" "v") > - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") > - (neg:VF_AVX512VL > - (match_operand:VF_AVX512VL 3 "register_operand" "0"))] > + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") > + (vec_merge:VFH_AVX512VL > + (unspec:VFH_AVX512VL > + [(match_operand:VFH_AVX512VL 1 "register_operand" "v") > + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") > + (neg:VFH_AVX512VL > + (match_operand:VFH_AVX512VL 3 "register_operand" "0"))] > UNSPEC_FMADDSUB) > (match_dup 3) > (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] > diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c > index deb25098f25..51a0cf2fe87 100644 > --- a/gcc/testsuite/gcc.target/i386/avx-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx-1.c > @@ -757,6 +757,12 @@ > #define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) > #define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) > #define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) > > /* avx512fp16vlintrin.h */ > #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) > diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c > index dbe206bd1bb..a53f4653908 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-13.c > +++ b/gcc/testsuite/gcc.target/i386/sse-13.c > @@ -774,6 +774,12 @@ > #define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) > #define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) > #define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) > > /* avx512fp16vlintrin.h */ > #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) > diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c > index e64321d8afa..48895e0dd0d 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-14.c > +++ b/gcc/testsuite/gcc.target/i386/sse-14.c > @@ -836,6 +836,8 @@ test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8) > test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8) > test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) > test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) > +test_3 (_mm512_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) > +test_3 (_mm512_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) > test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) > test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) > test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) > @@ -868,6 +870,12 @@ test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8) > test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8) > test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8) > test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8) > +test_4 (_mm512_mask_fmaddsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) > +test_4 (_mm512_mask3_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) > +test_4 (_mm512_maskz_fmaddsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) > +test_4 (_mm512_mask3_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) > +test_4 (_mm512_mask_fmsubadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) > +test_4 (_mm512_maskz_fmsubadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) > test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) > test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) > test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) > diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c > index d92898fdd11..bc530da388b 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-22.c > +++ b/gcc/testsuite/gcc.target/i386/sse-22.c > @@ -939,6 +939,8 @@ test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8) > test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8) > test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) > test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) > +test_3 (_mm512_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) > +test_3 (_mm512_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) > test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) > test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) > test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) > @@ -970,6 +972,12 @@ test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8) > test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8) > test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8) > test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8) > +test_4 (_mm512_mask_fmaddsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) > +test_4 (_mm512_mask3_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) > +test_4 (_mm512_maskz_fmaddsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) > +test_4 (_mm512_mask3_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) > +test_4 (_mm512_mask_fmsubadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) > +test_4 (_mm512_maskz_fmsubadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) > test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) > test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) > test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) > diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c > index 2f5027ba36f..df43931ca97 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-23.c > +++ b/gcc/testsuite/gcc.target/i386/sse-23.c > @@ -775,6 +775,12 @@ > #define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) > #define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) > #define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) > > /* avx512fp16vlintrin.h */ > #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) > -- > 2.18.1 >
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index ddb227529fa..4092663b504 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -5037,6 +5037,234 @@ _mm_maskz_cvt_roundsd_sh (__mmask8 __A, __m128h __B, __m128d __C, #endif /* __OPTIMIZE__ */ +/* Intrinsics vfmaddsub[132,213,231]ph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddsub_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmaddsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +#else +#define _mm512_fmaddsub_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), -1, (R))) + +#define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \ + ((__m512h)__builtin_ia32_vfmaddsubph512_mask ((A), (B), (C), (U), (R))) + +#define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \ + ((__m512h)__builtin_ia32_vfmaddsubph512_mask3 ((A), (B), (C), (U), (R))) + +#define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmaddsubph512_maskz((A), (B), (C), (U), (R))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vfmsubadd[132,213,231]ph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm512_fmsubadd_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_ph (__m512h __A, __mmask32 __U, + __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_ph (__m512h __A, __m512h __B, + __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_ph (__mmask32 __U, __m512h __A, + __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsubadd_round_ph (__m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubadd_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) + __builtin_ia32_vfmsubaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +#else +#define _mm512_fmsubadd_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), -1, (R))) + +#define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \ + ((__m512h)__builtin_ia32_vfmsubaddph512_mask ((A), (B), (C), (U), (R))) + +#define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \ + ((__m512h)__builtin_ia32_vfmsubaddph512_mask3 ((A), (B), (C), (U), (R))) + +#define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmsubaddph512_maskz ((A), (B), (C), (U), (R))) + +#endif /* __OPTIMIZE__ */ + #ifdef __DISABLE_AVX512FP16__ #undef __DISABLE_AVX512FP16__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index bcbe4523357..8825fae52aa 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -2269,6 +2269,188 @@ _mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B) __A); } +/* Intrinsics vfmaddsub[132,213,231]ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A, + (__v16hf)__B, + (__v16hf)__C, + (__mmask16)-1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A, + (__v8hf)__B, + (__v8hf)__C, + (__mmask8)-1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +/* Intrinsics vfmsubadd[132,213,231]ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + #ifdef __DISABLE_AVX512FP16VL__ #undef __DISABLE_AVX512FP16VL__ #pragma GCC pop_options diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 4bb48bc21dc..42bba719ec3 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2875,6 +2875,18 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v8sf_mask, "__builtin_ia32_vcvtps2ph_v8sf_mask", IX86_BUILTIN_VCVTPS2PH_V8SF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8SF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v2df_mask, "__builtin_ia32_vcvtpd2ph_v2df_mask", IX86_BUILTIN_VCVTPD2PH_V2DF_MASK, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v4df_mask, "__builtin_ia32_vcvtpd2ph_v4df_mask", IX86_BUILTIN_VCVTPD2PH_V4DF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4DF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmaddsub_v16hf_mask, "__builtin_ia32_vfmaddsubph256_mask", IX86_BUILTIN_VFMADDSUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmaddsub_v16hf_mask3, "__builtin_ia32_vfmaddsubph256_mask3", IX86_BUILTIN_VFMADDSUBPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmaddsub_v16hf_maskz, "__builtin_ia32_vfmaddsubph256_maskz", IX86_BUILTIN_VFMADDSUBPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmaddsub_v8hf_mask, "__builtin_ia32_vfmaddsubph128_mask", IX86_BUILTIN_VFMADDSUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmaddsub_v8hf_mask3, "__builtin_ia32_vfmaddsubph128_mask3", IX86_BUILTIN_VFMADDSUBPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmaddsub_v8hf_maskz, "__builtin_ia32_vfmaddsubph128_maskz", IX86_BUILTIN_VFMADDSUBPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsubadd_v16hf_mask, "__builtin_ia32_vfmsubaddph256_mask", IX86_BUILTIN_VFMSUBADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsubadd_v16hf_mask3, "__builtin_ia32_vfmsubaddph256_mask3", IX86_BUILTIN_VFMSUBADDPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsubadd_v16hf_maskz, "__builtin_ia32_vfmsubaddph256_maskz", IX86_BUILTIN_VFMSUBADDPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_mask, "__builtin_ia32_vfmsubaddph128_mask", IX86_BUILTIN_VFMSUBADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_mask3, "__builtin_ia32_vfmsubaddph128_mask3", IX86_BUILTIN_VFMSUBADDPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_maskz, "__builtin_ia32_vfmsubaddph128_maskz", IX86_BUILTIN_VFMSUBADDPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) @@ -3140,6 +3152,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC_END (ROUND_ARGS, MULTI_ARG) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 95f4a82c9cd..847684e232e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4542,6 +4542,13 @@ (define_mode_iterator VF_SF_AVX512VL [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) +(define_mode_iterator VFH_SF_AVX512VL + [(V32HF "TARGET_AVX512FP16") + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") (fma:VF_SF_AVX512VL @@ -4848,10 +4855,10 @@ (define_expand "fmaddsub_<mode>" "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>" - [(match_operand:VF_AVX512VL 0 "register_operand") - (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") + [(match_operand:VFH_AVX512VL 0 "register_operand") + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F" { @@ -4861,6 +4868,20 @@ (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>" DONE; }) +(define_expand "<avx512>_fmsubadd_<mode>_maskz<round_expand_name>" + [(match_operand:VFH_AVX512VL 0 "register_operand") + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") + (match_operand:<avx512fmaskmode> 4 "register_operand")] + "TARGET_AVX512F" +{ + emit_insn (gen_fma_fmsubadd_<mode>_maskz_1<round_expand_name> ( + operands[0], operands[1], operands[2], operands[3], + CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); + DONE; +}) + (define_insn "*fma_fmaddsub_<mode>" [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") (unspec:VF_128_256 @@ -4880,11 +4901,11 @@ (define_insn "*fma_fmaddsub_<mode>" (set_attr "mode" "<MODE>")]) (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") - (unspec:VF_SF_AVX512VL - [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") - (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") - (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")] + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") + (unspec:VFH_SF_AVX512VL + [(match_operand:VFH_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") + (match_operand:VFH_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (match_operand:VFH_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")] UNSPEC_FMADDSUB))] "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ @@ -4895,12 +4916,12 @@ (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512VL - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")] + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:VFH_AVX512VL + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "register_operand" "0,0") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")] UNSPEC_FMADDSUB) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] @@ -4912,12 +4933,12 @@ (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>" (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (vec_merge:VF_AVX512VL - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "register_operand" "v") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") - (match_operand:VF_AVX512VL 3 "register_operand" "0")] + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (vec_merge:VFH_AVX512VL + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "register_operand" "v") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") + (match_operand:VFH_AVX512VL 3 "register_operand" "0")] UNSPEC_FMADDSUB) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] @@ -4946,12 +4967,12 @@ (define_insn "*fma_fmsubadd_<mode>" (set_attr "mode" "<MODE>")]) (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") - (unspec:VF_SF_AVX512VL - [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") - (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") - (neg:VF_SF_AVX512VL - (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))] + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") + (unspec:VFH_SF_AVX512VL + [(match_operand:VFH_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") + (match_operand:VFH_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (neg:VFH_SF_AVX512VL + (match_operand:VFH_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))] UNSPEC_FMADDSUB))] "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ @@ -4962,13 +4983,13 @@ (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512VL - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))] + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:VFH_AVX512VL + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "register_operand" "0,0") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))] UNSPEC_FMADDSUB) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] @@ -4980,13 +5001,13 @@ (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>" (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (vec_merge:VF_AVX512VL - (unspec:VF_AVX512VL - [(match_operand:VF_AVX512VL 1 "register_operand" "v") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 3 "register_operand" "0"))] + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (vec_merge:VFH_AVX512VL + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "register_operand" "v") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 3 "register_operand" "0"))] UNSPEC_FMADDSUB) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index deb25098f25..51a0cf2fe87 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -757,6 +757,12 @@ #define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) #define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) #define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index dbe206bd1bb..a53f4653908 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -774,6 +774,12 @@ #define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) #define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) #define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index e64321d8afa..48895e0dd0d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -836,6 +836,8 @@ test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8) test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8) test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) +test_3 (_mm512_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) @@ -868,6 +870,12 @@ test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8) test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8) test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8) test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8) +test_4 (_mm512_mask_fmaddsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fmaddsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_mask_fmsubadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_maskz_fmsubadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index d92898fdd11..bc530da388b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -939,6 +939,8 @@ test_3 (_mm_maskz_cvt_roundsh_ss, __m128, __mmask8, __m128, __m128h, 8) test_3 (_mm_maskz_cvt_roundsh_sd, __m128d, __mmask8, __m128d, __m128h, 8) test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) +test_3 (_mm512_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) @@ -970,6 +972,12 @@ test_4 (_mm_mask_cvt_roundsh_ss, __m128, __m128, __mmask8, __m128, __m128h, 8) test_4 (_mm_mask_cvt_roundsh_sd, __m128d, __m128d, __mmask8, __m128d, __m128h, 8) test_4 (_mm_mask_cvt_roundss_sh, __m128h, __m128h, __mmask8, __m128h, __m128, 8) test_4 (_mm_mask_cvt_roundsd_sh, __m128h, __m128h, __mmask8, __m128h, __m128d, 8) +test_4 (_mm512_mask_fmaddsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fmaddsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_mask_fmsubadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_maskz_fmsubadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 2f5027ba36f..df43931ca97 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -775,6 +775,12 @@ #define __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsh2sd_mask_round(A, B, C, D, 8) #define __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtss2sh_mask_round(A, B, C, D, 8) #define __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, E) __builtin_ia32_vcvtsd2sh_mask_round(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)