diff mbox series

[v2] i386: Fix AVX512 intrin macro typo

Message ID 20240726081048.4023660-1-haochen.jiang@intel.com
State New
Headers show
Series [v2] i386: Fix AVX512 intrin macro typo | expand

Commit Message

Haochen Jiang July 26, 2024, 8:10 a.m. UTC
Hi all,

I have added related testcases into the patch.

Ok for trunk and backport to GCC 14, GCC 13 and GCC 12?


Thx,
Haochen

---

Changes in v2: Add related testcases

---

There are several typo in AVX512 intrins macro define. Correct them to solve
errors when compiled with -O0.

gcc/ChangeLog:

	* config/i386/avx512dqintrin.h
	(_mm_mask_fpclass_ss_mask): Correct operand order.
	(_mm_mask_fpclass_sd_mask): Ditto.
	(_mm_reduce_round_sd): Use -1 as mask since it is non-mask.
	(_mm_reduce_round_ss): Ditto.
	* config/i386/avx512vlbwintrin.h
	(_mm256_mask_alignr_epi8): Correct operand usage.
	(_mm_mask_alignr_epi8): Ditto.
	* config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512bw-vpalignr-1b.c: New test.
	* gcc.target/i386/avx512dq-vfpclasssd-1b.c: Ditto.
	* gcc.target/i386/avx512dq-vfpclassss-1b.c: Ditto.
	* gcc.target/i386/avx512dq-vreducesd-1b.c: Ditto.
	* gcc.target/i386/avx512dq-vreducess-1b.c: Ditto.
	* gcc.target/i386/avx512vl-valignq-1b.c: Ditto.
---
 gcc/config/i386/avx512dqintrin.h               | 16 +++++++++-------
 gcc/config/i386/avx512vlbwintrin.h             |  4 ++--
 gcc/config/i386/avx512vlintrin.h               |  2 +-
 .../gcc.target/i386/avx512bw-vpalignr-1b.c     | 18 ++++++++++++++++++
 .../gcc.target/i386/avx512dq-vfpclasssd-1b.c   | 14 ++++++++++++++
 .../gcc.target/i386/avx512dq-vfpclassss-1b.c   | 14 ++++++++++++++
 .../gcc.target/i386/avx512dq-vreducesd-1b.c    | 16 ++++++++++++++++
 .../gcc.target/i386/avx512dq-vreducess-1b.c    | 16 ++++++++++++++++
 .../gcc.target/i386/avx512vl-valignq-1b.c      | 15 +++++++++++++++
 9 files changed, 105 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c

Comments

Richard Biener July 26, 2024, 11:42 a.m. UTC | #1
On Fri, Jul 26, 2024 at 10:14 AM Haochen Jiang <haochen.jiang@intel.com> wrote:
>
> Hi all,
>
> I have added related testcases into the patch.
>
> Ok for trunk and backport to GCC 14, GCC 13 and GCC 12?

Hmm, it might be OK for 14.2 still, even without a new RC.  But please
wait until
after 14.2 is released unless Jakub also agrees.

Thanks,
Richard.

>
> Thx,
> Haochen
>
> ---
>
> Changes in v2: Add related testcases
>
> ---
>
> There are several typo in AVX512 intrins macro define. Correct them to solve
> errors when compiled with -O0.
>
> gcc/ChangeLog:
>
>         * config/i386/avx512dqintrin.h
>         (_mm_mask_fpclass_ss_mask): Correct operand order.
>         (_mm_mask_fpclass_sd_mask): Ditto.
>         (_mm_reduce_round_sd): Use -1 as mask since it is non-mask.
>         (_mm_reduce_round_ss): Ditto.
>         * config/i386/avx512vlbwintrin.h
>         (_mm256_mask_alignr_epi8): Correct operand usage.
>         (_mm_mask_alignr_epi8): Ditto.
>         * config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx512bw-vpalignr-1b.c: New test.
>         * gcc.target/i386/avx512dq-vfpclasssd-1b.c: Ditto.
>         * gcc.target/i386/avx512dq-vfpclassss-1b.c: Ditto.
>         * gcc.target/i386/avx512dq-vreducesd-1b.c: Ditto.
>         * gcc.target/i386/avx512dq-vreducess-1b.c: Ditto.
>         * gcc.target/i386/avx512vl-valignq-1b.c: Ditto.
> ---
>  gcc/config/i386/avx512dqintrin.h               | 16 +++++++++-------
>  gcc/config/i386/avx512vlbwintrin.h             |  4 ++--
>  gcc/config/i386/avx512vlintrin.h               |  2 +-
>  .../gcc.target/i386/avx512bw-vpalignr-1b.c     | 18 ++++++++++++++++++
>  .../gcc.target/i386/avx512dq-vfpclasssd-1b.c   | 14 ++++++++++++++
>  .../gcc.target/i386/avx512dq-vfpclassss-1b.c   | 14 ++++++++++++++
>  .../gcc.target/i386/avx512dq-vreducesd-1b.c    | 16 ++++++++++++++++
>  .../gcc.target/i386/avx512dq-vreducess-1b.c    | 16 ++++++++++++++++
>  .../gcc.target/i386/avx512vl-valignq-1b.c      | 15 +++++++++++++++
>  9 files changed, 105 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c
>
> diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
> index 3beed7e649a..d9890c6da1d 100644
> --- a/gcc/config/i386/avx512dqintrin.h
> +++ b/gcc/config/i386/avx512dqintrin.h
> @@ -572,11 +572,11 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
>    ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),   \
>                                              (int) (C), (__mmask8) (-1))) \
>
> -#define _mm_mask_fpclass_ss_mask(X, C, U)                              \
> +#define _mm_mask_fpclass_ss_mask(U, X, C)                              \
>    ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X),    \
>                                              (int) (C), (__mmask8) (U)))
>
> -#define _mm_mask_fpclass_sd_mask(X, C, U)                              \
> +#define _mm_mask_fpclass_sd_mask(U, X, C)                              \
>    ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),   \
>                                              (int) (C), (__mmask8) (U)))
>  #define _mm_reduce_sd(A, B, C)                                         \
> @@ -594,8 +594,9 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
>      (__mmask8)(U)))
>
>  #define _mm_reduce_round_sd(A, B, C, R)                                       \
> -  ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A),      \
> -    (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R)))
> +  ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
> +    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (), \
> +    (__mmask8)(-1), (int)(R)))
>
>  #define _mm_mask_reduce_round_sd(W, U, A, B, C, R)                    \
>    ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
> @@ -622,8 +623,9 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
>      (__mmask8)(U)))
>
>  #define _mm_reduce_round_ss(A, B, C, R)                                       \
> -  ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A),               \
> -    (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R)))
> +  ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
> +    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),  \
> +    (__mmask8)(-1), (int)(R)))
>
>  #define _mm_mask_reduce_round_ss(W, U, A, B, C, R)                    \
>    ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
> @@ -631,7 +633,7 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
>      (__mmask8)(U), (int)(R)))
>
>  #define _mm_maskz_reduce_round_ss(U, A, B, C, R)                      \
> -  ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A),   \
> +  ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
>      (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),         \
>      (__mmask8)(U), (int)(R)))
>
> diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
> index 56740054aa1..98b9099e343 100644
> --- a/gcc/config/i386/avx512vlbwintrin.h
> +++ b/gcc/config/i386/avx512vlbwintrin.h
> @@ -2089,7 +2089,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
>  #define _mm256_mask_alignr_epi8(W, U, X, Y, N)                                     \
>    ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),                 \
>                                             (__v4di)(__m256i)(Y), (int)((N) * 8),   \
> -                                           (__v4di)(__m256i)(X), (__mmask32)(U)))
> +                                           (__v4di)(__m256i)(W), (__mmask32)(U)))
>
>  #define _mm256_mask_srli_epi16(W, U, A, B)                              \
>    ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
> @@ -2172,7 +2172,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
>  #define _mm_mask_alignr_epi8(W, U, X, Y, N)                                        \
>    ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),                 \
>                                             (__v2di)(__m128i)(Y), (int)((N) * 8),   \
> -                                           (__v2di)(__m128i)(X), (__mmask16)(U)))
> +                                           (__v2di)(__m128i)(W), (__mmask16)(U)))
>
>  #define _mm_maskz_alignr_epi8(U, X, Y, N)                                          \
>    ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),                 \
> diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
> index 409a5d166b3..ca3b578f113 100644
> --- a/gcc/config/i386/avx512vlintrin.h
> +++ b/gcc/config/i386/avx512vlintrin.h
> @@ -13404,7 +13404,7 @@ _mm256_permutex_pd (__m256d __X, const int __M)
>
>  #define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
>      ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
> -        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
> +        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U)))
>
>  #define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
>      ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c
> new file mode 100644
> index 00000000000..2b42aa90b91
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mavx512bw -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m256i y;
> +volatile __m128i x;
> +volatile __mmask32 m2;
> +volatile __mmask16 m3;
> +
> +void extern
> +avx512bw_test (void)
> +{
> +  y = _mm256_mask_alignr_epi8 (y, m2, y, y, 10);
> +  x = _mm_mask_alignr_epi8 (x, m3, x, x, 10);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c
> new file mode 100644
> index 00000000000..8c7f96fb7a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512dq -O0" } */
> +/* { dg-final { scan-assembler-times "vfpclasssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m128d x128;
> +volatile __mmask8 m8;
> +
> +void extern
> +avx512dq_test (void)
> +{
> +  m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c
> new file mode 100644
> index 00000000000..3196fd60d64
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512dq -O0" } */
> +/* { dg-final { scan-assembler-times "vfpclassss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m128 x128;
> +volatile __mmask8 m8;
> +
> +void extern
> +avx512dq_test (void)
> +{
> +  m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c
> new file mode 100644
> index 00000000000..9ae8259d373
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512dq -O0" } */
> +/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +#include <immintrin.h>
> +
> +#define IMM 123
> +
> +volatile __m128d x1, x2, xx1, xx2;
> +volatile __mmask8 m;
> +
> +void extern
> +avx512dq_test (void)
> +{
> +  xx1 = _mm_reduce_round_sd (xx1, xx2, IMM, _MM_FROUND_NO_EXC);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c
> new file mode 100644
> index 00000000000..47bf48fb617
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512dq -O0" } */
> +/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +#include <immintrin.h>
> +
> +#define IMM 123
> +
> +volatile __m128 x1, x2, xx1, xx2;
> +volatile __mmask8 m;
> +
> +void extern
> +avx512dq_test (void)
> +{
> +  xx1 = _mm_reduce_round_ss (xx1, xx2, IMM, _MM_FROUND_NO_EXC);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c
> new file mode 100644
> index 00000000000..0ab16b27733
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +
> +#include <immintrin.h>
> +
> +volatile __m256i y;
> +volatile __m128i x;
> +volatile __mmask8 m;
> +
> +void extern
> +avx512vl_test (void)
> +{
> +  x = _mm_mask_alignr_epi64 (x, m, x, x, 1);
> +}
> --
> 2.31.1
>
Jakub Jelinek July 26, 2024, 11:58 a.m. UTC | #2
On Fri, Jul 26, 2024 at 04:10:48PM +0800, Haochen Jiang wrote:
> 	* config/i386/avx512dqintrin.h
> 	(_mm_mask_fpclass_ss_mask): Correct operand order.
> 	(_mm_mask_fpclass_sd_mask): Ditto.
> 	(_mm_reduce_round_sd): Use -1 as mask since it is non-mask.
> 	(_mm_reduce_round_ss): Ditto.

You haven't mentioned the
	(_mm_maskz_reduce_round_ss): Use __builtin_ia32_reducess_mask_round
	instead of __builtin_ia32_reducesd_mask_round.
change here.

> 	* config/i386/avx512vlbwintrin.h
> 	(_mm256_mask_alignr_epi8): Correct operand usage.
> 	(_mm_mask_alignr_epi8): Ditto.
> 	* config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/i386/avx512bw-vpalignr-1b.c: New test.
> 	* gcc.target/i386/avx512dq-vfpclasssd-1b.c: Ditto.
> 	* gcc.target/i386/avx512dq-vfpclassss-1b.c: Ditto.
> 	* gcc.target/i386/avx512dq-vreducesd-1b.c: Ditto.
> 	* gcc.target/i386/avx512dq-vreducess-1b.c: Ditto.
> 	* gcc.target/i386/avx512vl-valignq-1b.c: Ditto.

I went through all the cases and agree with all the changes.

LGTM with the above ChangeLog nit fixed, for trunk/release branches,
even for 14.2 if committed RSN.

	Jakub
Haochen Jiang July 29, 2024, 2:07 a.m. UTC | #3
> -----Original Message-----
> From: Jakub Jelinek <jakub@redhat.com>
> Sent: Friday, July 26, 2024 7:59 PM
> To: Jiang, Haochen <haochen.jiang@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>;
> ubizjak@gmail.com
> Subject: Re: [PATCH v2] i386: Fix AVX512 intrin macro typo
> 
> On Fri, Jul 26, 2024 at 04:10:48PM +0800, Haochen Jiang wrote:
> > 	* config/i386/avx512dqintrin.h
> > 	(_mm_mask_fpclass_ss_mask): Correct operand order.
> > 	(_mm_mask_fpclass_sd_mask): Ditto.
> > 	(_mm_reduce_round_sd): Use -1 as mask since it is non-mask.
> > 	(_mm_reduce_round_ss): Ditto.
> 
> You haven't mentioned the
> 	(_mm_maskz_reduce_round_ss): Use
> __builtin_ia32_reducess_mask_round
> 	instead of __builtin_ia32_reducesd_mask_round.
> change here.
> 
> > 	* config/i386/avx512vlbwintrin.h
> > 	(_mm256_mask_alignr_epi8): Correct operand usage.
> > 	(_mm_mask_alignr_epi8): Ditto.
> > 	* config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 	* gcc.target/i386/avx512bw-vpalignr-1b.c: New test.
> > 	* gcc.target/i386/avx512dq-vfpclasssd-1b.c: Ditto.
> > 	* gcc.target/i386/avx512dq-vfpclassss-1b.c: Ditto.
> > 	* gcc.target/i386/avx512dq-vreducesd-1b.c: Ditto.
> > 	* gcc.target/i386/avx512dq-vreducess-1b.c: Ditto.
> > 	* gcc.target/i386/avx512vl-valignq-1b.c: Ditto.
> 
> I went through all the cases and agree with all the changes.
> 
> LGTM with the above ChangeLog nit fixed, for trunk/release branches, even for
> 14.2 if committed RSN.

Ok. I will commit them and backport them to GCC13 and GCC12 now. For GCC14,
we could wait for GCC14.3 since it has been a weekend passed and not that RSN.
But if it could be in GCC14.2, I will also happy for that.

Thx,
Haochen

> 
> 	Jakub
Jakub Jelinek July 29, 2024, 8:40 a.m. UTC | #4
On Mon, Jul 29, 2024 at 02:07:24AM +0000, Jiang, Haochen wrote:
> > LGTM with the above ChangeLog nit fixed, for trunk/release branches, even for
> > 14.2 if committed RSN.
> 
> Ok. I will commit them and backport them to GCC13 and GCC12 now. For GCC14,
> we could wait for GCC14.3 since it has been a weekend passed and not that RSN.
> But if it could be in GCC14.2, I will also happy for that.

Please commit it to 14.2 ASAP.

	Jakub
Haochen Jiang July 29, 2024, 8:43 a.m. UTC | #5
> -----Original Message-----
> From: Jakub Jelinek <jakub@redhat.com>
> Sent: Monday, July 29, 2024 4:41 PM
> To: Jiang, Haochen <haochen.jiang@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>;
> ubizjak@gmail.com
> Subject: Re: [PATCH v2] i386: Fix AVX512 intrin macro typo
> 
> On Mon, Jul 29, 2024 at 02:07:24AM +0000, Jiang, Haochen wrote:
> > > LGTM with the above ChangeLog nit fixed, for trunk/release branches,
> > > even for
> > > 14.2 if committed RSN.
> >
> > Ok. I will commit them and backport them to GCC13 and GCC12 now. For
> > GCC14, we could wait for GCC14.3 since it has been a weekend passed and
> not that RSN.
> > But if it could be in GCC14.2, I will also happy for that.
> 
> Please commit it to 14.2 ASAP.

Pushed to GCC14.2

Thx,
Haochen

> 
> 	Jakub
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 3beed7e649a..d9890c6da1d 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -572,11 +572,11 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
   ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),	\
 					     (int) (C), (__mmask8) (-1))) \
 
-#define _mm_mask_fpclass_ss_mask(X, C, U)				\
+#define _mm_mask_fpclass_ss_mask(U, X, C)				\
   ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X),	\
 					     (int) (C), (__mmask8) (U)))
 
-#define _mm_mask_fpclass_sd_mask(X, C, U)				\
+#define _mm_mask_fpclass_sd_mask(U, X, C)				\
   ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),	\
 					     (int) (C), (__mmask8) (U)))
 #define _mm_reduce_sd(A, B, C)						\
@@ -594,8 +594,9 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
     (__mmask8)(U)))
 
 #define _mm_reduce_round_sd(A, B, C, R)				       \
-  ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A),      \
-    (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R)))
+  ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (), \
+    (__mmask8)(-1), (int)(R)))
 
 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R)		       \
   ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
@@ -622,8 +623,9 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
     (__mmask8)(U)))
 
 #define _mm_reduce_round_ss(A, B, C, R)				       \
-  ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A),	       \
-    (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R)))
+  ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),  \
+    (__mmask8)(-1), (int)(R)))
 
 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R)		       \
   ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
@@ -631,7 +633,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
     (__mmask8)(U), (int)(R)))
 
 #define _mm_maskz_reduce_round_ss(U, A, B, C, R)		       \
-  ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A),   \
+  ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),	       \
     (__mmask8)(U), (int)(R)))
 
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 56740054aa1..98b9099e343 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -2089,7 +2089,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm256_mask_alignr_epi8(W, U, X, Y, N)					    \
   ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
 					    (__v4di)(__m256i)(Y), (int)((N) * 8),   \
-					    (__v4di)(__m256i)(X), (__mmask32)(U)))
+					    (__v4di)(__m256i)(W), (__mmask32)(U)))
 
 #define _mm256_mask_srli_epi16(W, U, A, B)                              \
   ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
@@ -2172,7 +2172,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm_mask_alignr_epi8(W, U, X, Y, N)					    \
   ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
 					    (__v2di)(__m128i)(Y), (int)((N) * 8),   \
-					    (__v2di)(__m128i)(X), (__mmask16)(U)))
+					    (__v2di)(__m128i)(W), (__mmask16)(U)))
 
 #define _mm_maskz_alignr_epi8(U, X, Y, N)					    \
   ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 409a5d166b3..ca3b578f113 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -13404,7 +13404,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
-        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
+        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U)))
 
 #define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c
new file mode 100644
index 00000000000..2b42aa90b91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1b.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m2;
+volatile __mmask16 m3;
+
+void extern
+avx512bw_test (void)
+{
+  y = _mm256_mask_alignr_epi8 (y, m2, y, y, 10);
+  x = _mm_mask_alignr_epi8 (x, m3, x, x, 10);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c
new file mode 100644
index 00000000000..8c7f96fb7a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1b.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O0" } */
+/* { dg-final { scan-assembler-times "vfpclasssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x128;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+  m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c
new file mode 100644
index 00000000000..3196fd60d64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1b.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O0" } */
+/* { dg-final { scan-assembler-times "vfpclassss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x128;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+  m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c
new file mode 100644
index 00000000000..9ae8259d373
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1b.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O0" } */
+/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+#define IMM 123
+
+volatile __m128d x1, x2, xx1, xx2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+  xx1 = _mm_reduce_round_sd (xx1, xx2, IMM, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c
new file mode 100644
index 00000000000..47bf48fb617
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1b.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O0" } */
+/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+#define IMM 123
+
+volatile __m128 x1, x2, xx1, xx2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+  xx1 = _mm_reduce_round_ss (xx1, xx2, IMM, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c
new file mode 100644
index 00000000000..0ab16b27733
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1b.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+  x = _mm_mask_alignr_epi64 (x, m, x, x, 1);
+}