diff mbox series

[04/18,3/5] Push evex512 target for 512 bit intrins

Message ID 20230921072013.2124750-5-lin1.hu@intel.com
State New
Headers show
Series Support -mevex512 for AVX512 | expand

Commit Message

Hu, Lin1 Sept. 21, 2023, 7:19 a.m. UTC
From: Haochen Jiang <haochen.jiang@intel.com>

gcc/ChangeLog:

	* config/i386/avx512bwintrin.h: Add evex512 target for 512 bit
	intrins.
---
 gcc/config/i386/avx512bwintrin.h | 291 ++++++++++++++++---------------
 1 file changed, 153 insertions(+), 138 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index d1cd549ce18..925bae1457c 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -34,16 +34,6 @@ 
 #define __DISABLE_AVX512BW__
 #endif /* __AVX512BW__ */
 
-/* Internal data types for implementing the intrinsics.  */
-typedef short __v32hi __attribute__ ((__vector_size__ (64)));
-typedef short __v32hi_u __attribute__ ((__vector_size__ (64),	\
-					__may_alias__, __aligned__ (1)));
-typedef char __v64qi __attribute__ ((__vector_size__ (64)));
-typedef char __v64qi_u __attribute__ ((__vector_size__ (64),	\
-				       __may_alias__, __aligned__ (1)));
-
-typedef unsigned long long __mmask64;
-
 extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _ktest_mask32_u8  (__mmask32 __A,  __mmask32 __B, unsigned char *__CF)
@@ -54,229 +44,292 @@  _ktest_mask32_u8  (__mmask32 __A,  __mmask32 __B, unsigned char *__CF)
 
 extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktest_mask64_u8  (__mmask64 __A,  __mmask64 __B, unsigned char *__CF)
+_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
 {
-  *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
-  return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
+  return (unsigned char) __builtin_ia32_ktestzsi (__A, __B);
 }
 
 extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
 {
-  return (unsigned char) __builtin_ia32_ktestzsi (__A, __B);
+  return (unsigned char) __builtin_ia32_ktestcsi (__A, __B);
 }
 
 extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_kortest_mask32_u8  (__mmask32 __A,  __mmask32 __B, unsigned char *__CF)
 {
-  return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
+  *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
+  return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
 }
 
 extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
 {
-  return (unsigned char) __builtin_ia32_ktestcsi (__A, __B);
+  return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
 }
 
 extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
 {
-  return (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
+  return (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
 }
 
-extern __inline unsigned char
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortest_mask32_u8  (__mmask32 __A,  __mmask32 __B, unsigned char *__CF)
+_kadd_mask32 (__mmask32 __A, __mmask32 __B)
 {
-  *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
-  return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
+  return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B);
 }
 
-extern __inline unsigned char
+extern __inline unsigned int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortest_mask64_u8  (__mmask64 __A,  __mmask64 __B, unsigned char *__CF)
+_cvtmask32_u32 (__mmask32 __A)
 {
-  *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
-  return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+  return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A);
 }
 
-extern __inline unsigned char
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_cvtu32_mask32 (unsigned int __A)
 {
-  return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
+  return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A);
 }
 
-extern __inline unsigned char
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_load_mask32 (__mmask32 *__A)
 {
-  return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+  return (__mmask32) __builtin_ia32_kmovd (*__A);
 }
 
-extern __inline unsigned char
+extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_store_mask32 (__mmask32 *__A, __mmask32 __B)
 {
-  return (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
+  *(__mmask32 *) __A = __builtin_ia32_kmovd (__B);
 }
 
-extern __inline unsigned char
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_knot_mask32 (__mmask32 __A)
 {
-  return (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
+  return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A);
 }
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kadd_mask32 (__mmask32 __A, __mmask32 __B)
+_kor_mask32 (__mmask32 __A, __mmask32 __B)
 {
-  return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B);
+  return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B);
 }
 
-extern __inline __mmask64
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kadd_mask64 (__mmask64 __A, __mmask64 __B)
+_kxnor_mask32 (__mmask32 __A, __mmask32 __B)
 {
-  return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B);
+  return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B);
 }
 
-extern __inline unsigned int
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtmask32_u32 (__mmask32 __A)
+_kxor_mask32 (__mmask32 __A, __mmask32 __B)
 {
-  return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A);
+  return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B);
 }
 
-extern __inline unsigned long long
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtmask64_u64 (__mmask64 __A)
+_kand_mask32 (__mmask32 __A, __mmask32 __B)
 {
-  return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A);
+  return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B);
 }
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtu32_mask32 (unsigned int __A)
+_kandn_mask32 (__mmask32 __A, __mmask32 __B)
 {
-  return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A);
+  return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B);
 }
 
-extern __inline __mmask64
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtu64_mask64 (unsigned long long __A)
+_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
 {
-  return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A);
+  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+					      (__mmask32) __B);
 }
 
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_load_mask32 (__mmask32 *__A)
+_kunpackw_mask32 (__mmask16 __A, __mmask16 __B)
 {
-  return (__mmask32) __builtin_ia32_kmovd (*__A);
+  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+					      (__mmask32) __B);
 }
 
-extern __inline __mmask64
+#if __OPTIMIZE__
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_load_mask64 (__mmask64 *__A)
+_kshiftli_mask32 (__mmask32 __A, unsigned int __B)
 {
-  return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A);
+  return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A,
+						(__mmask8) __B);
 }
 
-extern __inline void
+extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_store_mask32 (__mmask32 *__A, __mmask32 __B)
+_kshiftri_mask32 (__mmask32 __A, unsigned int __B)
 {
-  *(__mmask32 *) __A = __builtin_ia32_kmovd (__B);
+  return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A,
+						(__mmask8) __B);
 }
 
-extern __inline void
+#else
+#define _kshiftli_mask32(X, Y)							\
+  ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask32(X, Y)							\
+  ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#endif
+
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BW__ */
+
+#if !defined (__AVX512BW__) || !defined (__EVEX512__)
+#pragma GCC push_options
+#pragma GCC target("avx512bw,evex512")
+#define __DISABLE_AVX512BW_512__
+#endif /* __AVX512BW_512__ */
+
+/* Internal data types for implementing the intrinsics.  */
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi_u __attribute__ ((__vector_size__ (64),	\
+					__may_alias__, __aligned__ (1)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi_u __attribute__ ((__vector_size__ (64),	\
+				       __may_alias__, __aligned__ (1)));
+
+typedef unsigned long long __mmask64;
+
+extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_store_mask64 (__mmask64 *__A, __mmask64 __B)
+_ktest_mask64_u8  (__mmask64 __A,  __mmask64 __B, unsigned char *__CF)
 {
-  *(__mmask64 *) __A = __builtin_ia32_kmovq (__B);
+  *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
+  return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
 }
 
-extern __inline __mmask32
+extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_knot_mask32 (__mmask32 __A)
+_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
 {
-  return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A);
+  return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
 }
 
-extern __inline __mmask64
+extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_knot_mask64 (__mmask64 __A)
+_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
 {
-  return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A);
+  return (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
 }
 
-extern __inline __mmask32
+extern __inline unsigned char
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kor_mask32 (__mmask32 __A, __mmask32 __B)
+_kortest_mask64_u8  (__mmask64 __A,  __mmask64 __B, unsigned char *__CF)
 {
-  return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B);
+  *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
+  return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
+{
+  return (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
 }
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kor_mask64 (__mmask64 __A, __mmask64 __B)
+_kadd_mask64 (__mmask64 __A, __mmask64 __B)
 {
-  return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B);
+  return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B);
 }
 
-extern __inline __mmask32
+extern __inline unsigned long long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxnor_mask32 (__mmask32 __A, __mmask32 __B)
+_cvtmask64_u64 (__mmask64 __A)
 {
-  return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B);
+  return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A);
 }
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxnor_mask64 (__mmask64 __A, __mmask64 __B)
+_cvtu64_mask64 (unsigned long long __A)
 {
-  return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B);
+  return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A);
 }
 
-extern __inline __mmask32
+extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxor_mask32 (__mmask32 __A, __mmask32 __B)
+_load_mask64 (__mmask64 *__A)
 {
-  return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B);
+  return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_store_mask64 (__mmask64 *__A, __mmask64 __B)
+{
+  *(__mmask64 *) __A = __builtin_ia32_kmovq (__B);
 }
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxor_mask64 (__mmask64 __A, __mmask64 __B)
+_knot_mask64 (__mmask64 __A)
 {
-  return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B);
+  return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A);
 }
 
-extern __inline __mmask32
+extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kand_mask32 (__mmask32 __A, __mmask32 __B)
+_kor_mask64 (__mmask64 __A, __mmask64 __B)
 {
-  return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B);
+  return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B);
 }
 
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kand_mask64 (__mmask64 __A, __mmask64 __B)
+_kxnor_mask64 (__mmask64 __A, __mmask64 __B)
 {
-  return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B);
+  return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B);
 }
 
-extern __inline __mmask32
+extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kandn_mask32 (__mmask32 __A, __mmask32 __B)
+_kxor_mask64 (__mmask64 __A, __mmask64 __B)
 {
-  return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B);
+  return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B);
 }
 
 extern __inline __mmask64
@@ -366,22 +419,6 @@  _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
 						    (__mmask64) __U);
 }
 
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
-  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
-					      (__mmask32) __B);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kunpackw_mask32 (__mmask16 __A, __mmask16 __B)
-{
-  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
-					      (__mmask32) __B);
-}
-
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
@@ -2776,14 +2813,6 @@  _mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
 }
 
 #ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kshiftli_mask32 (__mmask32 __A, unsigned int __B)
-{
-  return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A,
-						(__mmask8) __B);
-}
-
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _kshiftli_mask64 (__mmask64 __A, unsigned int __B)
@@ -2792,14 +2821,6 @@  _kshiftli_mask64 (__mmask64 __A, unsigned int __B)
 						(__mmask8) __B);
 }
 
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kshiftri_mask32 (__mmask32 __A, unsigned int __B)
-{
-  return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A,
-						(__mmask8) __B);
-}
-
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _kshiftri_mask64 (__mmask64 __A, unsigned int __B)
@@ -3145,15 +3166,9 @@  _mm512_bsrli_epi128 (__m512i __A, const int __N)
 }
 
 #else
-#define _kshiftli_mask32(X, Y)							\
-  ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y)))
-
 #define _kshiftli_mask64(X, Y)							\
   ((__mmask64) __builtin_ia32_kshiftlidi ((__mmask64)(X), (__mmask8)(Y)))
 
-#define _kshiftri_mask32(X, Y)							\
-  ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y)))
-
 #define _kshiftri_mask64(X, Y)							\
   ((__mmask64) __builtin_ia32_kshiftridi ((__mmask64)(X), (__mmask8)(Y)))
 
@@ -3328,9 +3343,9 @@  _mm512_bsrli_epi128 (__m512i __A, const int __N)
 
 #endif
 
-#ifdef __DISABLE_AVX512BW__
-#undef __DISABLE_AVX512BW__
+#ifdef __DISABLE_AVX512BW_512__
+#undef __DISABLE_AVX512BW_512__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512BW__ */
+#endif /* __DISABLE_AVX512BW_512__ */
 
 #endif /* _AVX512BWINTRIN_H_INCLUDED */