diff mbox series

i386: Fix AVX512 intrin macro typo

Message ID 20240726062522.3853519-1-haochen.jiang@intel.com
State New
Headers show
Series i386: Fix AVX512 intrin macro typo | expand

Commit Message

Haochen Jiang July 26, 2024, 6:25 a.m. UTC
Hi all,

There are several typo in AVX512 intrins macro define. They will eventually
result in errors with -O0. This patch will fix that.

Bootstrapped on x86-64-pc-linux-gnu. Ok for trunk and backport to GCC14,
GCC 13 and GCC 12?

Thx,
Haochen

gcc/ChangeLog:

	* config/i386/avx512dqintrin.h
	(_mm_mask_fpclass_ss_mask): Correct operand order.
	(_mm_mask_fpclass_sd_mask): Ditto.
	(_mm_reduce_round_sd): Use -1 as mask since it is non-mask.
	(_mm_reduce_round_ss): Ditto.
	* config/i386/avx512vlbwintrin.h
	(_mm256_mask_alignr_epi8): Correct operand usage.
	(_mm_mask_alignr_epi8): Ditto.
	* config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto.
---
 gcc/config/i386/avx512dqintrin.h   | 16 +++++++++-------
 gcc/config/i386/avx512vlbwintrin.h |  4 ++--
 gcc/config/i386/avx512vlintrin.h   |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

Comments

Jakub Jelinek July 26, 2024, 6:30 a.m. UTC | #1
On Fri, Jul 26, 2024 at 02:25:22PM +0800, Haochen Jiang wrote:
> Hi all,
> 
> There are several typo in AVX512 intrins macro define. They will eventually
> result in errors with -O0. This patch will fix that.

Add a testcase that verifies that?

> Bootstrapped on x86-64-pc-linux-gnu. Ok for trunk and backport to GCC14,
> GCC 13 and GCC 12?
> 
> Thx,
> Haochen
> 
> gcc/ChangeLog:
> 
> 	* config/i386/avx512dqintrin.h
> 	(_mm_mask_fpclass_ss_mask): Correct operand order.
> 	(_mm_mask_fpclass_sd_mask): Ditto.
> 	(_mm_reduce_round_sd): Use -1 as mask since it is non-mask.
> 	(_mm_reduce_round_ss): Ditto.
> 	* config/i386/avx512vlbwintrin.h
> 	(_mm256_mask_alignr_epi8): Correct operand usage.
> 	(_mm_mask_alignr_epi8): Ditto.
> 	* config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto.

	Jakub
Haochen Jiang July 26, 2024, 6:36 a.m. UTC | #2
> -----Original Message-----
> From: Jakub Jelinek <jakub@redhat.com>
> Sent: Friday, July 26, 2024 2:31 PM
> To: Jiang, Haochen <haochen.jiang@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>;
> ubizjak@gmail.com
> Subject: Re: [PATCH] i386: Fix AVX512 intrin macro typo
> 
> On Fri, Jul 26, 2024 at 02:25:22PM +0800, Haochen Jiang wrote:
> > Hi all,
> >
> > There are several typo in AVX512 intrins macro define. They will
> > eventually result in errors with -O0. This patch will fix that.
> 
> Add a testcase that verifies that?

Ok, I will add testcases with -O0 for them.

Thx,
Haochen

> 
> > Bootstrapped on x86-64-pc-linux-gnu. Ok for trunk and backport to
> > GCC14, GCC 13 and GCC 12?
> >
> > Thx,
> > Haochen
> >
> > gcc/ChangeLog:
> >
> > 	* config/i386/avx512dqintrin.h
> > 	(_mm_mask_fpclass_ss_mask): Correct operand order.
> > 	(_mm_mask_fpclass_sd_mask): Ditto.
> > 	(_mm_reduce_round_sd): Use -1 as mask since it is non-mask.
> > 	(_mm_reduce_round_ss): Ditto.
> > 	* config/i386/avx512vlbwintrin.h
> > 	(_mm256_mask_alignr_epi8): Correct operand usage.
> > 	(_mm_mask_alignr_epi8): Ditto.
> > 	* config/i386/avx512vlintrin.h (_mm_mask_alignr_epi64): Ditto.
> 
> 	Jakub
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 3beed7e649a..d9890c6da1d 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -572,11 +572,11 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
   ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),	\
 					     (int) (C), (__mmask8) (-1))) \
 
-#define _mm_mask_fpclass_ss_mask(X, C, U)				\
+#define _mm_mask_fpclass_ss_mask(U, X, C)				\
   ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X),	\
 					     (int) (C), (__mmask8) (U)))
 
-#define _mm_mask_fpclass_sd_mask(X, C, U)				\
+#define _mm_mask_fpclass_sd_mask(U, X, C)				\
   ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),	\
 					     (int) (C), (__mmask8) (U)))
 #define _mm_reduce_sd(A, B, C)						\
@@ -594,8 +594,9 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
     (__mmask8)(U)))
 
 #define _mm_reduce_round_sd(A, B, C, R)				       \
-  ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A),      \
-    (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R)))
+  ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_avx512_setzero_pd (), \
+    (__mmask8)(-1), (int)(R)))
 
 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R)		       \
   ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
@@ -622,8 +623,9 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
     (__mmask8)(U)))
 
 #define _mm_reduce_round_ss(A, B, C, R)				       \
-  ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A),	       \
-    (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R)))
+  ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),  \
+    (__mmask8)(-1), (int)(R)))
 
 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R)		       \
   ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
@@ -631,7 +633,7 @@  _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
     (__mmask8)(U), (int)(R)))
 
 #define _mm_maskz_reduce_round_ss(U, A, B, C, R)		       \
-  ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A),   \
+  ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_avx512_setzero_ps (),	       \
     (__mmask8)(U), (int)(R)))
 
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 56740054aa1..98b9099e343 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -2089,7 +2089,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm256_mask_alignr_epi8(W, U, X, Y, N)					    \
   ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X),		    \
 					    (__v4di)(__m256i)(Y), (int)((N) * 8),   \
-					    (__v4di)(__m256i)(X), (__mmask32)(U)))
+					    (__v4di)(__m256i)(W), (__mmask32)(U)))
 
 #define _mm256_mask_srli_epi16(W, U, A, B)                              \
   ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A),      \
@@ -2172,7 +2172,7 @@  _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
 #define _mm_mask_alignr_epi8(W, U, X, Y, N)					    \
   ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
 					    (__v2di)(__m128i)(Y), (int)((N) * 8),   \
-					    (__v2di)(__m128i)(X), (__mmask16)(U)))
+					    (__v2di)(__m128i)(W), (__mmask16)(U)))
 
 #define _mm_maskz_alignr_epi8(U, X, Y, N)					    \
   ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),		    \
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 409a5d166b3..ca3b578f113 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -13404,7 +13404,7 @@  _mm256_permutex_pd (__m256d __X, const int __M)
 
 #define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
-        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
+        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(W), (__mmask8)(U)))
 
 #define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \