Message ID | 20240726081048.4023660-1-haochen.jiang@intel.com |
---|---|
State | New |
Headers | show |
Series | [v2] i386: Fix AVX512 intrin macro typo | expand |
On Fri, Jul 26, 2024 at 10:14 AM Haochen Jiang <haochen.jiang@intel.com> wrote: > > Hi all, > > I have added related testcases into the patch. > > Ok for trunk and backport to GCC 14, GCC 13 and GCC 12? Hmm, it might be OK for 14.2 still, even without a new RC. But please wait until after 14.2 is released unless Jakub also agrees. Thanks, Richard. > > Thx, > Haochen > > --- > > Changes in v2: Add related testcases > > --- > > There are several typo in AVX512 intrins macro define. Correct them to solve > errors when compiled with -O0. > > gcc/ChangeLog: > > * config/i386/avx512dqintrin.h > (_mm_mask_fpclass_ss_mask): Correct operand order. > (_mm_mask_fpclass_sd_mask): Ditto. > (_mm_reduce_round_sd): Use -1 as mask since it is non-mask. > (_mm_reduce_round_ss): Ditto. > * config/i386/avx512vlbwintrin.h > (_mm256_mask_alignr_epi8): Correct operand usage. > (_mm_mask_alignr_epi8): Ditto. > * config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512bw-vpalignr-1b.c: New test. > * gcc.target/i386/avx512dq-vfpclasssd-1b.c: Ditto. > * gcc.target/i386/avx512dq-vfpclassss-1b.c: Ditto. > * gcc.target/i386/avx512dq-vreducesd-1b.c: Ditto. > * gcc.target/i386/avx512dq-vreducess-1b.c: Ditto. > * gcc.target/i386/avx512vl-valignq-1b.c: Ditto. > --- > gcc/config/i386/avx512dqintrin.h | 16 +++++++++------- > gcc/config/i386/avx512vlbwintrin.h | 4 ++-- > gcc/config/i386/avx512vlintrin.h | 2 +- > .../gcc.target/i386/avx512bw-vpalignr-1b.c | 18 ++++++++++++++++++ > .../gcc.target/i386/avx512dq-vfpclasssd-1b.c | 14 ++++++++++++++ > .../gcc.target/i386/avx512dq-vfpclassss-1b.c | 14 ++++++++++++++ > .../gcc.target/i386/avx512dq-vreducesd-1b.c | 16 ++++++++++++++++ > .../gcc.target/i386/avx512dq-vreducess-1b.c | 16 ++++++++++++++++ > .../gcc.target/i386/avx512vl-valignq-1b.c | 15 +++++++++++++++ > 9 files changed, 105 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c > > diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h > index 3beed7e649a..d9890c6da1d 100644 > --- a/gcc/config/i386/avx512dqintrin.h > +++ b/gcc/config/i386/avx512dqintrin.h > @@ -572,11 +572,11 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) > ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ > (int) (C), (__mmask8) (-1))) \ > > -#define _mm_mask_fpclass_ss_mask(X, C, U) \ > +#define _mm_mask_fpclass_ss_mask(U, X, C) \ > ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ > (int) (C), (__mmask8) (U))) > > -#define _mm_mask_fpclass_sd_mask(X, C, U) \ > +#define _mm_mask_fpclass_sd_mask(U, X, C) \ > ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ > (int) (C), (__mmask8) (U))) > #define _mm_reduce_sd(A, B, C) \ > @@ -594,8 +594,9 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) > (__mmask8)(U))) > > #define _mm_reduce_round_sd(A, B, C, R) \ > - ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A), \ > - (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R))) > + ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ > + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (), \ > + (__mmask8)(-1), (int)(R))) > > #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ > ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ > @@ -622,8 +623,9 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) > (__mmask8)(U))) > > #define _mm_reduce_round_ss(A, B, C, R) \ > - ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A), \ > - (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R))) > + ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ > + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (), \ > + (__mmask8)(-1), (int)(R))) > > #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ > ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ > @@ -631,7 +633,7 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) > (__mmask8)(U), (int)(R))) > > #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ > - ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A), \ > + ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ > (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (), \ > (__mmask8)(U), (int)(R))) > > diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h > index 56740054aa1..98b9099e343 100644 > --- a/gcc/config/i386/avx512vlbwintrin.h > +++ b/gcc/config/i386/avx512vlbwintrin.h > @@ -2089,7 +2089,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) > #define _mm256_mask_alignr_epi8(W, U, X, Y, N) \ > ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \ > (__v4di)(__m256i)(Y), (int)((N) * 8), \ > - (__v4di)(__m256i)(X), (__mmask32)(U))) > + (__v4di)(__m256i)(W), (__mmask32)(U))) > > #define _mm256_mask_srli_epi16(W, U, A, B) \ > ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \ > @@ -2172,7 +2172,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) > #define _mm_mask_alignr_epi8(W, U, X, Y, N) \ > ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \ > (__v2di)(__m128i)(Y), (int)((N) * 8), \ > - (__v2di)(__m128i)(X), (__mmask16)(U))) > + (__v2di)(__m128i)(W), (__mmask16)(U))) > > #define _mm_maskz_alignr_epi8(U, X, Y, N) \ > ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \ > diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h > index 409a5d166b3..ca3b578f113 100644 > --- a/gcc/config/i386/avx512vlintrin.h > +++ b/gcc/config/i386/avx512vlintrin.h > @@ -13404,7 +13404,7 @@ _mm256_permutex_pd (__m256d __X, const int __M) > > #define _mm_mask_alignr_epi64(W, U, X, Y, C) \ > ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ > - (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1)) > + (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U))) > > #define _mm_maskz_alignr_epi64(U, X, Y, C) \ > ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ > diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c > new file mode 100644 > index 00000000000..2b42aa90b91 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -mavx512bw -mavx512vl" } */ > +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > + > +#include <immintrin.h> > + > +volatile __m256i y; > +volatile __m128i x; > +volatile __mmask32 m2; > +volatile __mmask16 m3; > + > +void extern > +avx512bw_test (void) > +{ > + y = _mm256_mask_alignr_epi8 (y, m2, y, y, 10); > + x = _mm_mask_alignr_epi8 (x, m3, x, x, 10); > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c > new file mode 100644 > index 00000000000..8c7f96fb7a7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512dq -O0" } */ > +/* { dg-final { scan-assembler-times "vfpclasssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > + > +#include <immintrin.h> > + > +volatile __m128d x128; > +volatile __mmask8 m8; > + > +void extern > +avx512dq_test (void) > +{ > + m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13); > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c > new file mode 100644 > index 00000000000..3196fd60d64 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512dq -O0" } */ > +/* { dg-final { scan-assembler-times "vfpclassss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > + > +#include <immintrin.h> > + > +volatile __m128 x128; > +volatile __mmask8 m8; > + > +void extern > +avx512dq_test (void) > +{ > + m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13); > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c > new file mode 100644 > index 00000000000..9ae8259d373 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512dq -O0" } */ > +/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > + > +#include <immintrin.h> > + > +#define IMM 123 > + > +volatile __m128d x1, x2, xx1, xx2; > +volatile __mmask8 m; > + > +void extern > +avx512dq_test (void) > +{ > + xx1 = _mm_reduce_round_sd (xx1, xx2, IMM, _MM_FROUND_NO_EXC); > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c > new file mode 100644 > index 00000000000..47bf48fb617 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512dq -O0" } */ > +/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > + > +#include <immintrin.h> > + > +#define IMM 123 > + > +volatile __m128 x1, x2, xx1, xx2; > +volatile __mmask8 m; > + > +void extern > +avx512dq_test (void) > +{ > + xx1 = _mm_reduce_round_ss (xx1, xx2, IMM, _MM_FROUND_NO_EXC); > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c > new file mode 100644 > index 00000000000..0ab16b27733 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -mavx512vl" } */ > +/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > + > +#include <immintrin.h> > + > +volatile __m256i y; > +volatile __m128i x; > +volatile __mmask8 m; > + > +void extern > +avx512vl_test (void) > +{ > + x = _mm_mask_alignr_epi64 (x, m, x, x, 1); > +} > -- > 2.31.1 >
On Fri, Jul 26, 2024 at 04:10:48PM +0800, Haochen Jiang wrote: > * config/i386/avx512dqintrin.h > (_mm_mask_fpclass_ss_mask): Correct operand order. > (_mm_mask_fpclass_sd_mask): Ditto. > (_mm_reduce_round_sd): Use -1 as mask since it is non-mask. > (_mm_reduce_round_ss): Ditto. You haven't mentioned the (_mm_maskz_reduce_round_ss): Use __builtin_ia32_reducess_mask_round instead of __builtin_ia32_reducesd_mask_round. change here. > * config/i386/avx512vlbwintrin.h > (_mm256_mask_alignr_epi8): Correct operand usage. > (_mm_mask_alignr_epi8): Ditto. > * config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512bw-vpalignr-1b.c: New test. > * gcc.target/i386/avx512dq-vfpclasssd-1b.c: Ditto. > * gcc.target/i386/avx512dq-vfpclassss-1b.c: Ditto. > * gcc.target/i386/avx512dq-vreducesd-1b.c: Ditto. > * gcc.target/i386/avx512dq-vreducess-1b.c: Ditto. > * gcc.target/i386/avx512vl-valignq-1b.c: Ditto. I went through all the cases and agree with all the changes. LGTM with the above ChangeLog nit fixed, for trunk/release branches, even for 14.2 if committed RSN. Jakub
> -----Original Message----- > From: Jakub Jelinek <jakub@redhat.com> > Sent: Friday, July 26, 2024 7:59 PM > To: Jiang, Haochen <haochen.jiang@intel.com> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>; > ubizjak@gmail.com > Subject: Re: [PATCH v2] i386: Fix AVX512 intrin macro typo > > On Fri, Jul 26, 2024 at 04:10:48PM +0800, Haochen Jiang wrote: > > * config/i386/avx512dqintrin.h > > (_mm_mask_fpclass_ss_mask): Correct operand order. > > (_mm_mask_fpclass_sd_mask): Ditto. > > (_mm_reduce_round_sd): Use -1 as mask since it is non-mask. > > (_mm_reduce_round_ss): Ditto. > > You haven't mentioned the > (_mm_maskz_reduce_round_ss): Use > __builtin_ia32_reducess_mask_round > instead of __builtin_ia32_reducesd_mask_round. > change here. > > > * config/i386/avx512vlbwintrin.h > > (_mm256_mask_alignr_epi8): Correct operand usage. > > (_mm_mask_alignr_epi8): Ditto. > > * config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/avx512bw-vpalignr-1b.c: New test. > > * gcc.target/i386/avx512dq-vfpclasssd-1b.c: Ditto. > > * gcc.target/i386/avx512dq-vfpclassss-1b.c: Ditto. > > * gcc.target/i386/avx512dq-vreducesd-1b.c: Ditto. > > * gcc.target/i386/avx512dq-vreducess-1b.c: Ditto. > > * gcc.target/i386/avx512vl-valignq-1b.c: Ditto. > > I went through all the cases and agree with all the changes. > > LGTM with the above ChangeLog nit fixed, for trunk/release branches, even for > 14.2 if committed RSN. Ok. I will commit them and backport them to GCC13 and GCC12 now. For GCC14, we could wait for GCC14.3 since it has been a weekend passed and not that RSN. But if it could be in GCC14.2, I will also happy for that. Thx, Haochen > > Jakub
On Mon, Jul 29, 2024 at 02:07:24AM +0000, Jiang, Haochen wrote: > > LGTM with the above ChangeLog nit fixed, for trunk/release branches, even for > > 14.2 if committed RSN. > > Ok. I will commit them and backport them to GCC13 and GCC12 now. For GCC14, > we could wait for GCC14.3 since it has been a weekend passed and not that RSN. > But if it could be in GCC14.2, I will also happy for that. Please commit it to 14.2 ASAP. Jakub
> -----Original Message----- > From: Jakub Jelinek <jakub@redhat.com> > Sent: Monday, July 29, 2024 4:41 PM > To: Jiang, Haochen <haochen.jiang@intel.com> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>; > ubizjak@gmail.com > Subject: Re: [PATCH v2] i386: Fix AVX512 intrin macro typo > > On Mon, Jul 29, 2024 at 02:07:24AM +0000, Jiang, Haochen wrote: > > > LGTM with the above ChangeLog nit fixed, for trunk/release branches, > > > even for > > > 14.2 if committed RSN. > > > > Ok. I will commit them and backport them to GCC13 and GCC12 now. For > > GCC14, we could wait for GCC14.3 since it has been a weekend passed and > not that RSN. > > But if it could be in GCC14.2, I will also happy for that. > > Please commit it to 14.2 ASAP. Pushed to GCC14.2 Thx, Haochen > > Jakub
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 3beed7e649a..d9890c6da1d 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -572,11 +572,11 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ (int) (C), (__mmask8) (-1))) \ -#define _mm_mask_fpclass_ss_mask(X, C, U) \ +#define _mm_mask_fpclass_ss_mask(U, X, C) \ ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), \ (int) (C), (__mmask8) (U))) -#define _mm_mask_fpclass_sd_mask(X, C, U) \ +#define _mm_mask_fpclass_sd_mask(U, X, C) \ ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), \ (int) (C), (__mmask8) (U))) #define _mm_reduce_sd(A, B, C) \ @@ -594,8 +594,9 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) (__mmask8)(U))) #define _mm_reduce_round_sd(A, B, C, R) \ - ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R))) + ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (), \ + (__mmask8)(-1), (int)(R))) #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \ @@ -622,8 +623,9 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) (__mmask8)(U))) #define _mm_reduce_round_ss(A, B, C, R) \ - ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R))) + ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (), \ + (__mmask8)(-1), (int)(R))) #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ @@ -631,7 +633,7 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) (__mmask8)(U), (int)(R))) #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ - ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A), \ + ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (), \ (__mmask8)(U), (int)(R))) diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 56740054aa1..98b9099e343 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -2089,7 +2089,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) #define _mm256_mask_alignr_epi8(W, U, X, Y, N) \ ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \ (__v4di)(__m256i)(Y), (int)((N) * 8), \ - (__v4di)(__m256i)(X), (__mmask32)(U))) + (__v4di)(__m256i)(W), (__mmask32)(U))) #define _mm256_mask_srli_epi16(W, U, A, B) \ ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \ @@ -2172,7 +2172,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) #define _mm_mask_alignr_epi8(W, U, X, Y, N) \ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \ (__v2di)(__m128i)(Y), (int)((N) * 8), \ - (__v2di)(__m128i)(X), (__mmask16)(U))) + (__v2di)(__m128i)(W), (__mmask16)(U))) #define _mm_maskz_alignr_epi8(U, X, Y, N) \ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 409a5d166b3..ca3b578f113 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -13404,7 +13404,7 @@ _mm256_permutex_pd (__m256d __X, const int __M) #define _mm_mask_alignr_epi64(W, U, X, Y, C) \ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ - (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1)) + (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U))) #define _mm_maskz_alignr_epi64(U, X, Y, C) \ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c new file mode 100644 index 00000000000..2b42aa90b91 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i y; +volatile __m128i x; +volatile __mmask32 m2; +volatile __mmask16 m3; + +void extern +avx512bw_test (void) +{ + y = _mm256_mask_alignr_epi8 (y, m2, y, y, 10); + x = _mm_mask_alignr_epi8 (x, m3, x, x, 10); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c new file mode 100644 index 00000000000..8c7f96fb7a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vfpclasssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128d x128; +volatile __mmask8 m8; + +void extern +avx512dq_test (void) +{ + m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c new file mode 100644 index 00000000000..3196fd60d64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vfpclassss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m128 x128; +volatile __mmask8 m8; + +void extern +avx512dq_test (void) +{ + m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c new file mode 100644 index 00000000000..9ae8259d373 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +#define IMM 123 + +volatile __m128d x1, x2, xx1, xx2; +volatile __mmask8 m; + +void extern +avx512dq_test (void) +{ + xx1 = _mm_reduce_round_sd (xx1, xx2, IMM, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c new file mode 100644 index 00000000000..47bf48fb617 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O0" } */ +/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +#define IMM 123 + +volatile __m128 x1, x2, xx1, xx2; +volatile __mmask8 m; + +void extern +avx512dq_test (void) +{ + xx1 = _mm_reduce_round_ss (xx1, xx2, IMM, _MM_FROUND_NO_EXC); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c new file mode 100644 index 00000000000..0ab16b27733 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512vl" } */ +/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256i y; +volatile __m128i x; +volatile __mmask8 m; + +void extern +avx512vl_test (void) +{ + x = _mm_mask_alignr_epi64 (x, m, x, x, 1); +}