@@ -34,16 +34,6 @@
#define __DISABLE_AVX512BW__
#endif /* __AVX512BW__ */
-/* Internal data types for implementing the intrinsics. */
-typedef short __v32hi __attribute__ ((__vector_size__ (64)));
-typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \
- __may_alias__, __aligned__ (1)));
-typedef char __v64qi __attribute__ ((__vector_size__ (64)));
-typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \
- __may_alias__, __aligned__ (1)));
-
-typedef unsigned long long __mmask64;
-
extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF)
@@ -54,229 +44,292 @@ _ktest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF)
extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF)
+_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
{
- *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
- return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
+ return (unsigned char) __builtin_ia32_ktestzsi (__A, __B);
}
extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
{
- return (unsigned char) __builtin_ia32_ktestzsi (__A, __B);
+ return (unsigned char) __builtin_ia32_ktestcsi (__A, __B);
}
extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF)
{
- return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
+ *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
+ return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
}
extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
{
- return (unsigned char) __builtin_ia32_ktestcsi (__A, __B);
+ return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
}
extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
{
- return (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
+ return (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
}
-extern __inline unsigned char
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortest_mask32_u8 (__mmask32 __A, __mmask32 __B, unsigned char *__CF)
+_kadd_mask32 (__mmask32 __A, __mmask32 __B)
{
- *__CF = (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
- return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
+ return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B);
}
-extern __inline unsigned char
+extern __inline unsigned int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF)
+_cvtmask32_u32 (__mmask32 __A)
{
- *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
- return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+ return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A);
}
-extern __inline unsigned char
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestz_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_cvtu32_mask32 (unsigned int __A)
{
- return (unsigned char) __builtin_ia32_kortestzsi (__A, __B);
+ return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A);
}
-extern __inline unsigned char
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_load_mask32 (__mmask32 *__A)
{
- return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+ return (__mmask32) __builtin_ia32_kmovd (*__A);
}
-extern __inline unsigned char
+extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestc_mask32_u8 (__mmask32 __A, __mmask32 __B)
+_store_mask32 (__mmask32 *__A, __mmask32 __B)
{
- return (unsigned char) __builtin_ia32_kortestcsi (__A, __B);
+ *(__mmask32 *) __A = __builtin_ia32_kmovd (__B);
}
-extern __inline unsigned char
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
+_knot_mask32 (__mmask32 __A)
{
- return (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
+ return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A);
}
extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kadd_mask32 (__mmask32 __A, __mmask32 __B)
+_kor_mask32 (__mmask32 __A, __mmask32 __B)
{
- return (__mmask32) __builtin_ia32_kaddsi ((__mmask32) __A, (__mmask32) __B);
+ return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B);
}
-extern __inline __mmask64
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kadd_mask64 (__mmask64 __A, __mmask64 __B)
+_kxnor_mask32 (__mmask32 __A, __mmask32 __B)
{
- return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B);
+ return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B);
}
-extern __inline unsigned int
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtmask32_u32 (__mmask32 __A)
+_kxor_mask32 (__mmask32 __A, __mmask32 __B)
{
- return (unsigned int) __builtin_ia32_kmovd ((__mmask32) __A);
+ return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B);
}
-extern __inline unsigned long long
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtmask64_u64 (__mmask64 __A)
+_kand_mask32 (__mmask32 __A, __mmask32 __B)
{
- return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A);
+ return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B);
}
extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtu32_mask32 (unsigned int __A)
+_kandn_mask32 (__mmask32 __A, __mmask32 __B)
{
- return (__mmask32) __builtin_ia32_kmovd ((__mmask32) __A);
+ return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B);
}
-extern __inline __mmask64
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_cvtu64_mask64 (unsigned long long __A)
+_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
{
- return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A);
+ return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+ (__mmask32) __B);
}
extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_load_mask32 (__mmask32 *__A)
+_kunpackw_mask32 (__mmask16 __A, __mmask16 __B)
{
- return (__mmask32) __builtin_ia32_kmovd (*__A);
+ return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+ (__mmask32) __B);
}
-extern __inline __mmask64
+#if __OPTIMIZE__
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_load_mask64 (__mmask64 *__A)
+_kshiftli_mask32 (__mmask32 __A, unsigned int __B)
{
- return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A);
+ return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A,
+ (__mmask8) __B);
}
-extern __inline void
+extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_store_mask32 (__mmask32 *__A, __mmask32 __B)
+_kshiftri_mask32 (__mmask32 __A, unsigned int __B)
{
- *(__mmask32 *) __A = __builtin_ia32_kmovd (__B);
+ return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A,
+ (__mmask8) __B);
}
-extern __inline void
+#else
+#define _kshiftli_mask32(X, Y) \
+ ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#define _kshiftri_mask32(X, Y) \
+ ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y)))
+
+#endif
+
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BW__ */
+
+#if !defined (__AVX512BW__) || !defined (__EVEX512__)
+#pragma GCC push_options
+#pragma GCC target("avx512bw,evex512")
+#define __DISABLE_AVX512BW_512__
+#endif /* __AVX512BW_512__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \
+ __may_alias__, __aligned__ (1)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi_u __attribute__ ((__vector_size__ (64), \
+ __may_alias__, __aligned__ (1)));
+
+typedef unsigned long long __mmask64;
+
+extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_store_mask64 (__mmask64 *__A, __mmask64 __B)
+_ktest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF)
{
- *(__mmask64 *) __A = __builtin_ia32_kmovq (__B);
+ *__CF = (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
+ return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
}
-extern __inline __mmask32
+extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_knot_mask32 (__mmask32 __A)
+_ktestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
{
- return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A);
+ return (unsigned char) __builtin_ia32_ktestzdi (__A, __B);
}
-extern __inline __mmask64
+extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_knot_mask64 (__mmask64 __A)
+_ktestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
{
- return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A);
+ return (unsigned char) __builtin_ia32_ktestcdi (__A, __B);
}
-extern __inline __mmask32
+extern __inline unsigned char
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kor_mask32 (__mmask32 __A, __mmask32 __B)
+_kortest_mask64_u8 (__mmask64 __A, __mmask64 __B, unsigned char *__CF)
{
- return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B);
+ *__CF = (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
+ return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestz_mask64_u8 (__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char) __builtin_ia32_kortestzdi (__A, __B);
+}
+
+extern __inline unsigned char
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kortestc_mask64_u8 (__mmask64 __A, __mmask64 __B)
+{
+ return (unsigned char) __builtin_ia32_kortestcdi (__A, __B);
}
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kor_mask64 (__mmask64 __A, __mmask64 __B)
+_kadd_mask64 (__mmask64 __A, __mmask64 __B)
{
- return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B);
+ return (__mmask64) __builtin_ia32_kadddi ((__mmask64) __A, (__mmask64) __B);
}
-extern __inline __mmask32
+extern __inline unsigned long long
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxnor_mask32 (__mmask32 __A, __mmask32 __B)
+_cvtmask64_u64 (__mmask64 __A)
{
- return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B);
+ return (unsigned long long) __builtin_ia32_kmovq ((__mmask64) __A);
}
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxnor_mask64 (__mmask64 __A, __mmask64 __B)
+_cvtu64_mask64 (unsigned long long __A)
{
- return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B);
+ return (__mmask64) __builtin_ia32_kmovq ((__mmask64) __A);
}
-extern __inline __mmask32
+extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxor_mask32 (__mmask32 __A, __mmask32 __B)
+_load_mask64 (__mmask64 *__A)
{
- return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B);
+ return (__mmask64) __builtin_ia32_kmovq (*(__mmask64 *) __A);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_store_mask64 (__mmask64 *__A, __mmask64 __B)
+{
+ *(__mmask64 *) __A = __builtin_ia32_kmovq (__B);
}
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kxor_mask64 (__mmask64 __A, __mmask64 __B)
+_knot_mask64 (__mmask64 __A)
{
- return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B);
+ return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A);
}
-extern __inline __mmask32
+extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kand_mask32 (__mmask32 __A, __mmask32 __B)
+_kor_mask64 (__mmask64 __A, __mmask64 __B)
{
- return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B);
+ return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B);
}
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kand_mask64 (__mmask64 __A, __mmask64 __B)
+_kxnor_mask64 (__mmask64 __A, __mmask64 __B)
{
- return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B);
+ return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B);
}
-extern __inline __mmask32
+extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kandn_mask32 (__mmask32 __A, __mmask32 __B)
+_kxor_mask64 (__mmask64 __A, __mmask64 __B)
{
- return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B);
+ return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask64 (__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B);
}
extern __inline __mmask64
@@ -366,22 +419,6 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
(__mmask64) __U);
}
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
- return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
- (__mmask32) __B);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kunpackw_mask32 (__mmask16 __A, __mmask16 __B)
-{
- return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
- (__mmask32) __B);
-}
-
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
@@ -2776,14 +2813,6 @@ _mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
}
#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kshiftli_mask32 (__mmask32 __A, unsigned int __B)
-{
- return (__mmask32) __builtin_ia32_kshiftlisi ((__mmask32) __A,
- (__mmask8) __B);
-}
-
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_kshiftli_mask64 (__mmask64 __A, unsigned int __B)
@@ -2792,14 +2821,6 @@ _kshiftli_mask64 (__mmask64 __A, unsigned int __B)
(__mmask8) __B);
}
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_kshiftri_mask32 (__mmask32 __A, unsigned int __B)
-{
- return (__mmask32) __builtin_ia32_kshiftrisi ((__mmask32) __A,
- (__mmask8) __B);
-}
-
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_kshiftri_mask64 (__mmask64 __A, unsigned int __B)
@@ -3145,15 +3166,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N)
}
#else
-#define _kshiftli_mask32(X, Y) \
- ((__mmask32) __builtin_ia32_kshiftlisi ((__mmask32)(X), (__mmask8)(Y)))
-
#define _kshiftli_mask64(X, Y) \
((__mmask64) __builtin_ia32_kshiftlidi ((__mmask64)(X), (__mmask8)(Y)))
-#define _kshiftri_mask32(X, Y) \
- ((__mmask32) __builtin_ia32_kshiftrisi ((__mmask32)(X), (__mmask8)(Y)))
-
#define _kshiftri_mask64(X, Y) \
((__mmask64) __builtin_ia32_kshiftridi ((__mmask64)(X), (__mmask8)(Y)))
@@ -3328,9 +3343,9 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N)
#endif
-#ifdef __DISABLE_AVX512BW__
-#undef __DISABLE_AVX512BW__
+#ifdef __DISABLE_AVX512BW_512__
+#undef __DISABLE_AVX512BW_512__
#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BW__ */
+#endif /* __DISABLE_AVX512BW_512__ */
#endif /* _AVX512BWINTRIN_H_INCLUDED */
From: Haochen Jiang <haochen.jiang@intel.com> gcc/ChangeLog: * config/i386/avx512bwintrin.h: Add evex512 target for 512 bit intrins. --- gcc/config/i386/avx512bwintrin.h | 291 ++++++++++++++++--------------- 1 file changed, 153 insertions(+), 138 deletions(-)