@@ -468,100 +468,100 @@ _mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
__U);
}
-/* Intrinsics vrndscalepbf16. */
+/* Intrinsics vrndscalebf16. */
#ifdef __OPTIMIZE__
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscalene_pbh (__m512bh __A, int B)
+_mm512_roundscale_pbh (__m512bh __A, int B)
{
return (__m512bh)
- __builtin_ia32_rndscalenepbf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
+ __builtin_ia32_rndscalebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
}
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscalene_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
+_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
{
return (__m512bh)
- __builtin_ia32_rndscalenepbf16512_mask (__A, B, __W, __U);
+ __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U);
}
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscalene_pbh (__mmask32 __U, __m512bh __A, int B)
+_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
{
return (__m512bh)
- __builtin_ia32_rndscalenepbf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
+ __builtin_ia32_rndscalebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
}
#else
-#define _mm512_roundscalene_pbh(A, B) \
- (__builtin_ia32_rndscalenepbf16512_mask ((A), (B), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
+#define _mm512_roundscale_pbh(A, B) \
+ (__builtin_ia32_rndscalebf16512_mask ((A), (B), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
-#define _mm512_mask_roundscalene_pbh(A, B, C, D) \
- (__builtin_ia32_rndscalenepbf16512_mask ((C), (D), (A), (B)))
+#define _mm512_mask_roundscale_pbh(A, B, C, D) \
+ (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
-#define _mm512_maskz_roundscalene_pbh(A, B, C) \
- (__builtin_ia32_rndscalenepbf16512_mask ((B), (C), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
+#define _mm512_maskz_roundscale_pbh(A, B, C) \
+ (__builtin_ia32_rndscalebf16512_mask ((B), (C), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
#endif /* __OPTIMIZE__ */
-/* Intrinsics vreducepbf16. */
+/* Intrinsics vreducebf16. */
#ifdef __OPTIMIZE__
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reducene_pbh (__m512bh __A, int B)
+_mm512_reduce_pbh (__m512bh __A, int B)
{
return (__m512bh)
- __builtin_ia32_reducenepbf16512_mask (__A, B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
+ __builtin_ia32_reducebf16512_mask (__A, B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
}
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reducene_pbh (__m512bh __W, __mmask32 __U,
- __m512bh __A, int B)
+_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
+ __m512bh __A, int B)
{
return (__m512bh)
- __builtin_ia32_reducenepbf16512_mask (__A, B, __W, __U);
+ __builtin_ia32_reducebf16512_mask (__A, B, __W, __U);
}
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reducene_pbh (__mmask32 __U, __m512bh __A, int B)
+_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
{
return (__m512bh)
- __builtin_ia32_reducenepbf16512_mask (__A, B,
+ __builtin_ia32_reducebf16512_mask (__A, B,
(__v32bf) _mm512_setzero_si512 (),
__U);
}
#else
-#define _mm512_reducene_pbh(A, B) \
- (__builtin_ia32_reducenepbf16512_mask ((A), (B), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
+#define _mm512_reduce_pbh(A, B) \
+ (__builtin_ia32_reducebf16512_mask ((A), (B), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
-#define _mm512_mask_reducene_pbh(A, B, C, D) \
- (__builtin_ia32_reducenepbf16512_mask ((C), (D), (A), (B)))
+#define _mm512_mask_reduce_pbh(A, B, C, D) \
+ (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
-#define _mm512_maskz_reducene_pbh(A, B, C) \
- (__builtin_ia32_reducenepbf16512_mask ((B), (C), \
- (__v32bf) _mm512_setzero_si512 (), \
- (A)))
+#define _mm512_maskz_reduce_pbh(A, B, C) \
+ (__builtin_ia32_reducebf16512_mask ((B), (C), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (A)))
#endif /* __OPTIMIZE__ */
-/* Intrinsics vgetmantpbf16. */
+/* Intrinsics vgetmantbf16. */
#ifdef __OPTIMIZE__
extern __inline__ __m512bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -569,9 +569,9 @@ _mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m512bh)
- __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B,
- (__v32bf) _mm512_setzero_si512 (),
- (__mmask32) -1);
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ (__mmask32) -1);
}
extern __inline__ __m512bh
@@ -581,8 +581,8 @@ _mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m512bh)
- __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B,
- __W, __U);
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ __W, __U);
}
extern __inline__ __m512bh
@@ -592,23 +592,23 @@ _mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m512bh)
- __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B,
- (__v32bf) _mm512_setzero_si512 (),
- __U);
+ __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+ (__v32bf) _mm512_setzero_si512 (),
+ __U);
}
#else
#define _mm512_getmant_pbh(A, B, C) \
- (__builtin_ia32_getmantpbf16512_mask ((A), (int)(((C)<<2) | (B)), \
- (__v32bf) _mm512_setzero_si512 (), \
- (__mmask32) -1))
+ (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \
+ (__v32bf) _mm512_setzero_si512 (), \
+ (__mmask32) -1))
#define _mm512_mask_getmant_pbh(A, B, C, D, E) \
- (__builtin_ia32_getmantpbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+ (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
#define _mm512_maskz_getmant_pbh(A, B, C, D) \
- (__builtin_ia32_getmantpbf16512_mask ((B), (int)(((C)<<2) | (D)), \
- (__v32bf) _mm512_setzero_si512 (), \
+ (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \
+ (__v32bf) _mm512_setzero_si512 (), \
(A)))
#endif /* __OPTIMIZE__ */
@@ -901,186 +901,186 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A)
__U);
}
-/* Intrinsics vrndscalepbf16. */
+/* Intrinsics vrndscalebf16. */
#ifdef __OPTIMIZE__
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_roundscalene_pbh (__m256bh __A, int B)
+_mm256_roundscale_pbh (__m256bh __A, int B)
{
return (__m256bh)
- __builtin_ia32_rndscalenepbf16256_mask (__A, B,
- (__v16bf) _mm256_setzero_si256 (),
- (__mmask16) -1);
+ __builtin_ia32_rndscalebf16256_mask (__A, B,
+ (__v16bf) _mm256_setzero_si256 (),
+ (__mmask16) -1);
}
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_roundscalene_pbh (__m256bh __W, __mmask16 __U,
- __m256bh __A, int B)
+_mm256_mask_roundscale_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, int B)
{
return (__m256bh)
- __builtin_ia32_rndscalenepbf16256_mask (__A, B, __W, __U);
+ __builtin_ia32_rndscalebf16256_mask (__A, B, __W, __U);
}
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_roundscalene_pbh (__mmask16 __U, __m256bh __A, int B)
+_mm256_maskz_roundscale_pbh (__mmask16 __U, __m256bh __A, int B)
{
return (__m256bh)
- __builtin_ia32_rndscalenepbf16256_mask (__A, B,
- (__v16bf) _mm256_setzero_si256 (),
- __U);
+ __builtin_ia32_rndscalebf16256_mask (__A, B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscalene_pbh (__m128bh __A, int B)
+_mm_roundscale_pbh (__m128bh __A, int B)
{
return (__m128bh)
- __builtin_ia32_rndscalenepbf16128_mask (__A, B,
- (__v8bf) _mm_setzero_si128 (),
- (__mmask8) -1);
+ __builtin_ia32_rndscalebf16128_mask (__A, B,
+ (__v8bf) _mm_setzero_si128 (),
+ (__mmask8) -1);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscalene_pbh (__m128bh __W, __mmask8 __U,
- __m128bh __A, int B)
+_mm_mask_roundscale_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, int B)
{
return (__m128bh)
- __builtin_ia32_rndscalenepbf16128_mask (__A, B, __W, __U);
+ __builtin_ia32_rndscalebf16128_mask (__A, B, __W, __U);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscalene_pbh (__mmask8 __U, __m128bh __A, int B)
+_mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
{
return (__m128bh)
- __builtin_ia32_rndscalenepbf16128_mask (__A, B,
- (__v8bf) _mm_setzero_si128 (),
- __U);
+ __builtin_ia32_rndscalebf16128_mask (__A, B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
}
#else
-#define _mm256_roundscalene_pbh(A, B) \
- (__builtin_ia32_rndscalenepbf16256_mask ((A), (B), \
- (__v16bf) _mm256_setzero_si256 (), \
- (__mmask16) -1))
+#define _mm256_roundscale_pbh(A, B) \
+ (__builtin_ia32_rndscalebf16256_mask ((A), (B), \
+ (__v16bf) _mm256_setzero_si256 (), \
+ (__mmask16) -1))
-#define _mm256_mask_roundscalene_pbh(A, B, C, D) \
- (__builtin_ia32_rndscalenepbf16256_mask ((C), (D), (A), (B)))
+#define _mm256_mask_roundscale_pbh(A, B, C, D) \
+ (__builtin_ia32_rndscalebf16256_mask ((C), (D), (A), (B)))
-#define _mm256_maskz_roundscalene_pbh(A, B, C) \
- (__builtin_ia32_rndscalenepbf16256_mask ((B), (C), \
- (__v16bf) _mm256_setzero_si256 (), \
- (A)))
+#define _mm256_maskz_roundscale_pbh(A, B, C) \
+ (__builtin_ia32_rndscalebf16256_mask ((B), (C), \
+ (__v16bf) _mm256_setzero_si256 (), \
+ (A)))
-#define _mm_roundscalene_pbh(A, B) \
- (__builtin_ia32_rndscalenepbf16128_mask ((A), (B), \
- (__v8bf) _mm_setzero_si128 (), \
- (__mmask8) -1))
+#define _mm_roundscale_pbh(A, B) \
+ (__builtin_ia32_rndscalebf16128_mask ((A), (B), \
+ (__v8bf) _mm_setzero_si128 (), \
+ (__mmask8) -1))
-#define _mm_mask_roundscalene_pbh(A, B, C, D) \
- (__builtin_ia32_rndscalenepbf16128_mask ((C), (D), (A), (B)))
+#define _mm_mask_roundscale_pbh(A, B, C, D) \
+ (__builtin_ia32_rndscalebf16128_mask ((C), (D), (A), (B)))
-#define _mm_maskz_roundscalene_pbh(A, B, C) \
- (__builtin_ia32_rndscalenepbf16128_mask ((B), (C), \
- (__v8bf) _mm_setzero_si128 (), \
- (A)))
+#define _mm_maskz_roundscale_pbh(A, B, C) \
+ (__builtin_ia32_rndscalebf16128_mask ((B), (C), \
+ (__v8bf) _mm_setzero_si128 (), \
+ (A)))
#endif /* __OPTIMIZE__ */
-/* Intrinsics vreducepbf16. */
+/* Intrinsics vreducebf16. */
#ifdef __OPTIMIZE__
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_reducene_pbh (__m256bh __A, int B)
+_mm256_reduce_pbh (__m256bh __A, int B)
{
return (__m256bh)
- __builtin_ia32_reducenepbf16256_mask (__A, B,
- (__v16bf) _mm256_setzero_si256 (),
- (__mmask16) -1);
+ __builtin_ia32_reducebf16256_mask (__A, B,
+ (__v16bf) _mm256_setzero_si256 (),
+ (__mmask16) -1);
}
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_reducene_pbh (__m256bh __W, __mmask16 __U,
- __m256bh __A, int B)
+_mm256_mask_reduce_pbh (__m256bh __W, __mmask16 __U,
+ __m256bh __A, int B)
{
return (__m256bh)
- __builtin_ia32_reducenepbf16256_mask (__A, B, __W, __U);
+ __builtin_ia32_reducebf16256_mask (__A, B, __W, __U);
}
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_reducene_pbh (__mmask16 __U, __m256bh __A, int B)
+_mm256_maskz_reduce_pbh (__mmask16 __U, __m256bh __A, int B)
{
return (__m256bh)
- __builtin_ia32_reducenepbf16256_mask (__A, B,
- (__v16bf) _mm256_setzero_si256 (),
- __U);
+ __builtin_ia32_reducebf16256_mask (__A, B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_reducene_pbh (__m128bh __A, int B)
+_mm_reduce_pbh (__m128bh __A, int B)
{
return (__m128bh)
- __builtin_ia32_reducenepbf16128_mask (__A, B,
- (__v8bf) _mm_setzero_si128 (),
- (__mmask8) -1);
+ __builtin_ia32_reducebf16128_mask (__A, B,
+ (__v8bf) _mm_setzero_si128 (),
+ (__mmask8) -1);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_reducene_pbh (__m128bh __W, __mmask8 __U,
- __m128bh __A, int B)
+_mm_mask_reduce_pbh (__m128bh __W, __mmask8 __U,
+ __m128bh __A, int B)
{
return (__m128bh)
- __builtin_ia32_reducenepbf16128_mask (__A, B, __W, __U);
+ __builtin_ia32_reducebf16128_mask (__A, B, __W, __U);
}
extern __inline__ __m128bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_reducene_pbh (__mmask8 __U, __m128bh __A, int B)
+_mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
{
return (__m128bh)
- __builtin_ia32_reducenepbf16128_mask (__A, B,
- (__v8bf) _mm_setzero_si128 (),
- __U);
+ __builtin_ia32_reducebf16128_mask (__A, B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
}
#else
-#define _mm256_reducene_pbh(A, B) \
- (__builtin_ia32_reducenepbf16256_mask ((A), (B), \
- (__v16bf) _mm256_setzero_si256 (), \
- (__mmask16) -1))
+#define _mm256_reduce_pbh(A, B) \
+ (__builtin_ia32_reducebf16256_mask ((A), (B), \
+ (__v16bf) _mm256_setzero_si256 (), \
+ (__mmask16) -1))
-#define _mm256_mask_reducene_pbh(A, B, C, D) \
- (__builtin_ia32_reducenepbf16256_mask ((C), (D), (A), (B)))
+#define _mm256_mask_reduce_pbh(A, B, C, D) \
+ (__builtin_ia32_reducebf16256_mask ((C), (D), (A), (B)))
-#define _mm256_maskz_reducene_pbh(A, B, C) \
- (__builtin_ia32_reducenepbf16256_mask ((B), (C), \
- (__v16bf) _mm256_setzero_si256 (), \
- (A)))
+#define _mm256_maskz_reduce_pbh(A, B, C) \
+ (__builtin_ia32_reducebf16256_mask ((B), (C), \
+ (__v16bf) _mm256_setzero_si256 (), \
+ (A)))
-#define _mm_reducene_pbh(A, B) \
- (__builtin_ia32_reducenepbf16128_mask ((A), (B), \
- (__v8bf) _mm_setzero_si128 (), \
- (__mmask8) -1))
+#define _mm_reduce_pbh(A, B) \
+ (__builtin_ia32_reducebf16128_mask ((A), (B), \
+ (__v8bf) _mm_setzero_si128 (), \
+ (__mmask8) -1))
-#define _mm_mask_reducene_pbh(A, B, C, D) \
- (__builtin_ia32_reducenepbf16128_mask ((C), (D), (A), (B)))
+#define _mm_mask_reduce_pbh(A, B, C, D) \
+ (__builtin_ia32_reducebf16128_mask ((C), (D), (A), (B)))
-#define _mm_maskz_reducene_pbh(A, B, C) \
- (__builtin_ia32_reducenepbf16128_mask ((B), (C), \
- (__v8bf) _mm_setzero_si128 (), \
- (A)))
+#define _mm_maskz_reduce_pbh(A, B, C) \
+ (__builtin_ia32_reducebf16128_mask ((B), (C), \
+ (__v8bf) _mm_setzero_si128 (), \
+ (A)))
#endif /* __OPTIMIZE__ */
-/* Intrinsics vgetmantpbf16. */
+/* Intrinsics vgetmantbf16. */
#ifdef __OPTIMIZE__
extern __inline__ __m256bh
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1088,9 +1088,9 @@ _mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m256bh)
- __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B,
- (__v16bf) _mm256_setzero_si256 (),
- (__mmask16) -1);
+ __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ (__mmask16) -1);
}
extern __inline__ __m256bh
@@ -1100,8 +1100,8 @@ _mm256_mask_getmant_pbh (__m256bh __W, __mmask16 __U, __m256bh __A,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m256bh)
- __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B,
- __W, __U);
+ __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B,
+ __W, __U);
}
extern __inline__ __m256bh
@@ -1111,9 +1111,9 @@ _mm256_maskz_getmant_pbh (__mmask16 __U, __m256bh __A,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m256bh)
- __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B,
- (__v16bf) _mm256_setzero_si256 (),
- __U);
+ __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B,
+ (__v16bf) _mm256_setzero_si256 (),
+ __U);
}
extern __inline__ __m128bh
@@ -1122,9 +1122,9 @@ _mm_getmant_pbh (__m128bh __A, _MM_MANTISSA_NORM_ENUM __B,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m128bh)
- __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B,
- (__v8bf) _mm_setzero_si128 (),
- (__mmask8) -1);
+ __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B,
+ (__v8bf) _mm_setzero_si128 (),
+ (__mmask8) -1);
}
extern __inline__ __m128bh
@@ -1134,8 +1134,8 @@ _mm_mask_getmant_pbh (__m128bh __W, __mmask8 __U, __m128bh __A,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m128bh)
- __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B,
- __W, __U);
+ __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B,
+ __W, __U);
}
extern __inline__ __m128bh
@@ -1145,36 +1145,36 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
_MM_MANTISSA_SIGN_ENUM __C)
{
return (__m128bh)
- __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B,
- (__v8bf) _mm_setzero_si128 (),
- __U);
+ __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B,
+ (__v8bf) _mm_setzero_si128 (),
+ __U);
}
#else
#define _mm256_getmant_pbh(A, B, C) \
- (__builtin_ia32_getmantpbf16256_mask ((A), (int)(((C)<<2) | (B)), \
- (__v16bf) _mm256_setzero_si256 (), \
- (__mmask16) (-1)))
+ (__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)), \
+ (__v16bf) _mm256_setzero_si256 (), \
+ (__mmask16) (-1)))
#define _mm256_mask_getmant_pbh(A, B, C, D, E) \
- (__builtin_ia32_getmantpbf16256_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+ (__builtin_ia32_getmantbf16256_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
#define _mm256_maskz_getmant_pbh(A, B, C, D) \
- (__builtin_ia32_getmantpbf16256_mask ((B), (int)(((C)<<2) | (D)), \
- (__v16bf) _mm256_setzero_si256 (), \
- (A)))
+ (__builtin_ia32_getmantbf16256_mask ((B), (int)(((C)<<2) | (D)), \
+ (__v16bf) _mm256_setzero_si256 (), \
+ (A)))
#define _mm_getmant_pbh(A, B, C) \
- (__builtin_ia32_getmantpbf16128_mask ((A), (int)(((C)<<2) | (B)), \
- (__v8bf) _mm_setzero_si128 (), \
- (__mmask8) (-1)))
+ (__builtin_ia32_getmantbf16128_mask ((A), (int)(((C)<<2) | (B)), \
+ (__v8bf) _mm_setzero_si128 (), \
+ (__mmask8) (-1)))
#define _mm_mask_getmant_pbh(A, B, C, D, E) \
- (__builtin_ia32_getmantpbf16128_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+ (__builtin_ia32_getmantbf16128_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
#define _mm_maskz_getmant_pbh(A, B, C, D) \
- (__builtin_ia32_getmantpbf16128_mask ((B), (int)(((C)<<2) | (D)), \
- (__v8bf) _mm_setzero_si128 (), (A)))
+ (__builtin_ia32_getmantbf16128_mask ((B), (int)(((C)<<2) | (D)), \
+ (__v8bf) _mm_setzero_si128 (), (A)))
#endif /* __OPTIMIZE__ */
@@ -3269,15 +3269,15 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rcpbf16_v8bf_mask, "__b
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getexppbf16_v32bf_mask, "__builtin_ia32_getexppbf16512_mask", IX86_BUILTIN_GETEXPPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getexppbf16_v16bf_mask, "__builtin_ia32_getexppbf16256_mask", IX86_BUILTIN_GETEXPPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getexppbf16_v8bf_mask, "__builtin_ia32_getexppbf16128_mask", IX86_BUILTIN_GETEXPPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rndscalenepbf16_v32bf_mask, "__builtin_ia32_rndscalenepbf16512_mask", IX86_BUILTIN_RNDSCALENEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalenepbf16_v16bf_mask, "__builtin_ia32_rndscalenepbf16256_mask", IX86_BUILTIN_RNDSCALENEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalenepbf16_v8bf_mask, "__builtin_ia32_rndscalenepbf16128_mask", IX86_BUILTIN_RNDSCALENEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_reducenepbf16_v32bf_mask, "__builtin_ia32_reducenepbf16512_mask", IX86_BUILTIN_REDUCENEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducenepbf16_v16bf_mask, "__builtin_ia32_reducenepbf16256_mask", IX86_BUILTIN_REDUCENEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducenepbf16_v8bf_mask, "__builtin_ia32_reducenepbf16128_mask", IX86_BUILTIN_REDUCENEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getmantpbf16_v32bf_mask, "__builtin_ia32_getmantpbf16512_mask", IX86_BUILTIN_GETMANTPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantpbf16_v16bf_mask, "__builtin_ia32_getmantpbf16256_mask", IX86_BUILTIN_GETMANTPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantpbf16_v8bf_mask, "__builtin_ia32_getmantpbf16128_mask", IX86_BUILTIN_GETMANTPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rndscalebf16_v32bf_mask, "__builtin_ia32_rndscalebf16512_mask", IX86_BUILTIN_RNDSCALEBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalebf16_v16bf_mask, "__builtin_ia32_rndscalebf16256_mask", IX86_BUILTIN_RNDSCALEBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalebf16_v8bf_mask, "__builtin_ia32_rndscalebf16128_mask", IX86_BUILTIN_RNDSCALEBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_reducebf16_v32bf_mask, "__builtin_ia32_reducebf16512_mask", IX86_BUILTIN_REDUCEBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducebf16_v16bf_mask, "__builtin_ia32_reducebf16256_mask", IX86_BUILTIN_REDUCEBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducebf16_v8bf_mask, "__builtin_ia32_reducebf16128_mask", IX86_BUILTIN_REDUCEBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getmantbf16_v32bf_mask, "__builtin_ia32_getmantbf16512_mask", IX86_BUILTIN_GETMANTBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantbf16_v16bf_mask, "__builtin_ia32_getmantbf16256_mask", IX86_BUILTIN_GETMANTBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantbf16_v8bf_mask, "__builtin_ia32_getmantbf16128_mask", IX86_BUILTIN_GETMANTBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fpclasspbf16_v32bf_mask, "__builtin_ia32_fpclasspbf16512_mask", IX86_BUILTIN_FPCLASSPBF16512_MASK, UNKNOWN, (int) SI_FTYPE_V32BF_INT_USI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v16bf_mask, "__builtin_ia32_fpclasspbf16256_mask", IX86_BUILTIN_FPCLASSPBF16256_MASK, UNKNOWN, (int) HI_FTYPE_V16BF_INT_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v8bf_mask, "__builtin_ia32_fpclasspbf16128_mask", IX86_BUILTIN_FPCLASSPBF16128_MASK, UNKNOWN, (int) QI_FTYPE_V8BF_INT_UQI)
@@ -230,9 +230,9 @@
UNSPEC_VCVTNEPH2HF8S
UNSPEC_VCVTHF82PH
UNSPEC_VSCALEFPBF16
- UNSPEC_VRNDSCALENEPBF16
- UNSPEC_VREDUCENEPBF16
- UNSPEC_VGETMANTPBF16
+ UNSPEC_VRNDSCALEBF16
+ UNSPEC_VREDUCEBF16
+ UNSPEC_VGETMANTBF16
UNSPEC_VFPCLASSPBF16
UNSPEC_VCOMSBF16
UNSPEC_VCVTNEBF162IBS
@@ -32407,23 +32407,23 @@
[(set_attr "prefix" "evex")])
(define_int_iterator BF16IMMOP
- [UNSPEC_VRNDSCALENEPBF16
- UNSPEC_VREDUCENEPBF16
- UNSPEC_VGETMANTPBF16])
+ [UNSPEC_VRNDSCALEBF16
+ UNSPEC_VREDUCEBF16
+ UNSPEC_VGETMANTBF16])
(define_int_attr bf16immop
- [(UNSPEC_VRNDSCALENEPBF16 "rndscalene")
- (UNSPEC_VREDUCENEPBF16 "reducene")
- (UNSPEC_VGETMANTPBF16 "getmant")])
+ [(UNSPEC_VRNDSCALEBF16 "rndscale")
+ (UNSPEC_VREDUCEBF16 "reduce")
+ (UNSPEC_VGETMANTBF16 "getmant")])
-(define_insn "avx10_2_<bf16immop>pbf16_<mode><mask_name>"
+(define_insn "avx10_2_<bf16immop>bf16_<mode><mask_name>"
[(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
(unspec:VBF_AVX10_2
[(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")]
BF16IMMOP))]
"TARGET_AVX10_2_256"
- "v<bf16immop>pbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ "v<bf16immop>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")])
(define_insn "avx10_2_fpclasspbf16_<mode><mask_scalar_merge_name>"
@@ -1017,19 +1017,19 @@
#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
/* avx10_2-512bf16intrin.h */
-#define __builtin_ia32_rndscalenepbf16512_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16512_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D)
-#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D)
+#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C)
#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D)
/* avx10_2bf16intrin.h */
-#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D)
-#define __builtin_ia32_rndscalenepbf16128_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16128_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16256_mask(A, B, C, D) __builtin_ia32_reducenepbf16256_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16128_mask(A, B, C, D) __builtin_ia32_reducenepbf16128_mask(A, 123, C, D)
-#define __builtin_ia32_getmantpbf16256_mask(A, B, C, D) __builtin_ia32_getmantpbf16256_mask(A, 1, C, D)
-#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D)
+#define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D)
+#define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D)
+#define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C)
#define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C)
#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D)
@@ -49,15 +49,15 @@
/* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
@@ -125,12 +125,12 @@ avx10_2_512_test (void)
res = _mm512_mask_getexp_pbh (res, m32, x1);
res = _mm512_maskz_getexp_pbh (m32, x1);
- res = _mm512_roundscalene_pbh (x1, IMM);
- res = _mm512_mask_roundscalene_pbh (res, m32, x1, IMM);
- res = _mm512_maskz_roundscalene_pbh (m32, x1, IMM);
- res = _mm512_reducene_pbh (x1, IMM);
- res = _mm512_mask_reducene_pbh (res, m32, x1, IMM);
- res = _mm512_maskz_reducene_pbh (m32, x1, IMM);
+ res = _mm512_roundscale_pbh (x1, IMM);
+ res = _mm512_mask_roundscale_pbh (res, m32, x1, IMM);
+ res = _mm512_maskz_roundscale_pbh (m32, x1, IMM);
+ res = _mm512_reduce_pbh (x1, IMM);
+ res = _mm512_mask_reduce_pbh (res, m32, x1, IMM);
+ res = _mm512_maskz_reduce_pbh (m32, x1, IMM);
res = _mm512_getmant_pbh (x1, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
res = _mm512_mask_getmant_pbh (res, m32, x1, _MM_MANT_NORM_p75_1p5,
_MM_MANT_SIGN_src);
similarity index 100%
rename from gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c
rename to gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantbf16-2.c
similarity index 87%
rename from gcc/testsuite/gcc.target/i386/avx10_2-512-vreducenepbf16-2.c
rename to gcc/testsuite/gcc.target/i386/avx10_2-512-vreducebf16-2.c
@@ -34,9 +34,9 @@ TEST (void)
res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res);
}
- res1.x = INTRINSIC (_reducene_pbh) (src1.x, 0x10);
- res2.x = INTRINSIC (_mask_reducene_pbh) (res2.x, mask, src1.x, 0x10);
- res3.x = INTRINSIC (_maskz_reducene_pbh) (mask, src1.x, 0x10);
+ res1.x = INTRINSIC (_reduce_pbh) (src1.x, 0x10);
+ res2.x = INTRINSIC (_mask_reduce_pbh) (res2.x, mask, src1.x, 0x10);
+ res3.x = INTRINSIC (_maskz_reduce_pbh) (mask, src1.x, 0x10);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
similarity index 84%
rename from gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c
rename to gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalebf16-2.c
@@ -30,9 +30,9 @@ TEST (void)
res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (rndscale (x));
}
- res1.x = INTRINSIC (_roundscalene_pbh) (src1.x, 0x10);
- res2.x = INTRINSIC (_mask_roundscalene_pbh) (res2.x, mask, src1.x, 0x10);
- res3.x = INTRINSIC (_maskz_roundscalene_pbh) (mask, src1.x, 0x10);
+ res1.x = INTRINSIC (_roundscale_pbh) (src1.x, 0x10);
+ res2.x = INTRINSIC (_mask_roundscale_pbh) (res2.x, mask, src1.x, 0x10);
+ res3.x = INTRINSIC (_maskz_roundscale_pbh) (mask, src1.x, 0x10);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
@@ -98,24 +98,24 @@
/* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16y\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16y\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vfpclasspbf16x\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
@@ -249,19 +249,19 @@ avx10_2_test (void)
res1 = _mm_mask_getexp_pbh (res1, m8, x3);
res1 = _mm_maskz_getexp_pbh (m8, x3);
- res = _mm256_roundscalene_pbh (x1, IMM);
- res = _mm256_mask_roundscalene_pbh (res, m16, x1, IMM);
- res = _mm256_maskz_roundscalene_pbh (m16, x1, IMM);
- res1 = _mm_roundscalene_pbh (x3, IMM);
- res1 = _mm_mask_roundscalene_pbh (res1, m8, x3, IMM);
- res1 = _mm_maskz_roundscalene_pbh (m8, x3, IMM);
+ res = _mm256_roundscale_pbh (x1, IMM);
+ res = _mm256_mask_roundscale_pbh (res, m16, x1, IMM);
+ res = _mm256_maskz_roundscale_pbh (m16, x1, IMM);
+ res1 = _mm_roundscale_pbh (x3, IMM);
+ res1 = _mm_mask_roundscale_pbh (res1, m8, x3, IMM);
+ res1 = _mm_maskz_roundscale_pbh (m8, x3, IMM);
- res = _mm256_reducene_pbh (x1, IMM);
- res = _mm256_mask_reducene_pbh (res, m16, x1, IMM);
- res = _mm256_maskz_reducene_pbh (m16, x1, IMM);
- res1 = _mm_reducene_pbh (x3, IMM);
- res1 = _mm_mask_reducene_pbh (res1, m8, x3, IMM);
- res1 = _mm_maskz_reducene_pbh (m8, x3, IMM);
+ res = _mm256_reduce_pbh (x1, IMM);
+ res = _mm256_mask_reduce_pbh (res, m16, x1, IMM);
+ res = _mm256_maskz_reduce_pbh (m16, x1, IMM);
+ res1 = _mm_reduce_pbh (x3, IMM);
+ res1 = _mm_mask_reduce_pbh (res1, m8, x3, IMM);
+ res1 = _mm_maskz_reduce_pbh (m8, x3, IMM);
res = _mm256_getmant_pbh (x1, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
res = _mm256_mask_getmant_pbh (res, m16, x1, _MM_MANT_NORM_p75_1p5,
similarity index 78%
rename from gcc/testsuite/gcc.target/i386/avx10_2-vgetmantpbf16-2.c
rename to gcc/testsuite/gcc.target/i386/avx10_2-vgetmantbf16-2.c
@@ -6,11 +6,11 @@
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vgetmantpbf16-2.c"
+#include "avx10_2-512-vgetmantbf16-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vgetmantpbf16-2.c"
+#include "avx10_2-512-vgetmantbf16-2.c"
similarity index 78%
rename from gcc/testsuite/gcc.target/i386/avx10_2-vreducenepbf16-2.c
rename to gcc/testsuite/gcc.target/i386/avx10_2-vreducebf16-2.c
@@ -6,11 +6,11 @@
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vreducenepbf16-2.c"
+#include "avx10_2-512-vreducebf16-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vreducenepbf16-2.c"
+#include "avx10_2-512-vreducebf16-2.c"
similarity index 77%
rename from gcc/testsuite/gcc.target/i386/avx10_2-vrndscalenepbf16-2.c
rename to gcc/testsuite/gcc.target/i386/avx10_2-vrndscalebf16-2.c
@@ -6,11 +6,11 @@
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vrndscalenepbf16-2.c"
+#include "avx10_2-512-vrndscalebf16-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
-#include "avx10_2-512-vrndscalenepbf16-2.c"
+#include "avx10_2-512-vrndscalebf16-2.c"
@@ -1025,19 +1025,19 @@
#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
/* avx10_2-512bf16intrin.h */
-#define __builtin_ia32_rndscalenepbf16512_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16512_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D)
-#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D)
+#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C)
#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D)
/* avx10_2bf16intrin.h */
-#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D)
-#define __builtin_ia32_rndscalenepbf16128_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16128_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16256_mask(A, B, C, D) __builtin_ia32_reducenepbf16256_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16128_mask(A, B, C, D) __builtin_ia32_reducenepbf16128_mask(A, 123, C, D)
-#define __builtin_ia32_getmantpbf16256_mask(A, B, C, D) __builtin_ia32_getmantpbf16256_mask(A, 1, C, D)
-#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D)
+#define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D)
+#define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D)
+#define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C)
#define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C)
#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D)
@@ -1390,12 +1390,12 @@ test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4)
test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4)
/* avx10_2-512bf16intrin.h */
-test_1 (_mm512_roundscalene_pbh, __m512bh, __m512bh, 123)
-test_2 (_mm512_maskz_roundscalene_pbh, __m512bh, __mmask32, __m512bh, 123)
-test_3 (_mm512_mask_roundscalene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
-test_1 (_mm512_reducene_pbh, __m512bh, __m512bh, 123)
-test_2 (_mm512_maskz_reducene_pbh, __m512bh, __mmask32, __m512bh, 123)
-test_3 (_mm512_mask_reducene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
+test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123)
+test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123)
+test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
+test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123)
+test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123)
+test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1)
test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1)
test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1)
@@ -1405,18 +1405,18 @@ test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1)
test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1)
/* avx10_2bf16intrin.h */
-test_1 (_mm256_roundscalene_pbh, __m256bh, __m256bh, 123)
-test_1 (_mm_roundscalene_pbh, __m128bh, __m128bh, 123)
-test_2 (_mm256_maskz_roundscalene_pbh, __m256bh, __mmask16, __m256bh, 123)
-test_2 (_mm_maskz_roundscalene_pbh, __m128bh, __mmask8, __m128bh, 123)
-test_3 (_mm256_mask_roundscalene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
-test_3 (_mm_mask_roundscalene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
-test_1 (_mm256_reducene_pbh, __m256bh, __m256bh, 123)
-test_1 (_mm_reducene_pbh, __m128bh, __m128bh, 123)
-test_2 (_mm256_maskz_reducene_pbh, __m256bh, __mmask16, __m256bh, 123)
-test_2 (_mm_maskz_reducene_pbh, __m128bh, __mmask8, __m128bh, 123)
-test_3 (_mm256_mask_reducene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
-test_3 (_mm_mask_reducene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
+test_1 (_mm256_roundscale_pbh, __m256bh, __m256bh, 123)
+test_1 (_mm_roundscale_pbh, __m128bh, __m128bh, 123)
+test_2 (_mm256_maskz_roundscale_pbh, __m256bh, __mmask16, __m256bh, 123)
+test_2 (_mm_maskz_roundscale_pbh, __m128bh, __mmask8, __m128bh, 123)
+test_3 (_mm256_mask_roundscale_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
+test_3 (_mm_mask_roundscale_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
+test_1 (_mm256_reduce_pbh, __m256bh, __m256bh, 123)
+test_1 (_mm_reduce_pbh, __m128bh, __m128bh, 123)
+test_2 (_mm256_maskz_reduce_pbh, __m256bh, __mmask16, __m256bh, 123)
+test_2 (_mm_maskz_reduce_pbh, __m128bh, __mmask8, __m128bh, 123)
+test_3 (_mm256_mask_reduce_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
+test_3 (_mm_mask_reduce_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
test_1x (_mm256_getmant_pbh, __m256bh, __m256bh, 1, 1)
test_1x (_mm_getmant_pbh, __m128bh, __m128bh, 1, 1)
test_2x (_mm256_maskz_getmant_pbh, __m256bh, __mmask16,__m256bh, 1, 1)
@@ -1429,12 +1429,12 @@ test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4)
test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4)
/* avx10_2-512bf16intrin.h */
-test_1 (_mm512_roundscalene_pbh, __m512bh, __m512bh, 123)
-test_2 (_mm512_maskz_roundscalene_pbh, __m512bh, __mmask32, __m512bh, 123)
-test_3 (_mm512_mask_roundscalene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
-test_1 (_mm512_reducene_pbh, __m512bh, __m512bh, 123)
-test_2 (_mm512_maskz_reducene_pbh, __m512bh, __mmask32, __m512bh, 123)
-test_3 (_mm512_mask_reducene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
+test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123)
+test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123)
+test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
+test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123)
+test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123)
+test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123)
test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1)
test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1)
test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1)
@@ -1444,18 +1444,18 @@ test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1)
test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1)
/* avx10_2bf16intrin.h */
-test_1 (_mm256_roundscalene_pbh, __m256bh, __m256bh, 123)
-test_1 (_mm_roundscalene_pbh, __m128bh, __m128bh, 123)
-test_2 (_mm256_maskz_roundscalene_pbh, __m256bh, __mmask16, __m256bh, 123)
-test_2 (_mm_maskz_roundscalene_pbh, __m128bh, __mmask8, __m128bh, 123)
-test_3 (_mm256_mask_roundscalene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
-test_3 (_mm_mask_roundscalene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
-test_1 (_mm256_reducene_pbh, __m256bh, __m256bh, 123)
-test_1 (_mm_reducene_pbh, __m128bh, __m128bh, 123)
-test_2 (_mm256_maskz_reducene_pbh, __m256bh, __mmask16, __m256bh, 123)
-test_2 (_mm_maskz_reducene_pbh, __m128bh, __mmask8, __m128bh, 123)
-test_3 (_mm256_mask_reducene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
-test_3 (_mm_mask_reducene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
+test_1 (_mm256_roundscale_pbh, __m256bh, __m256bh, 123)
+test_1 (_mm_roundscale_pbh, __m128bh, __m128bh, 123)
+test_2 (_mm256_maskz_roundscale_pbh, __m256bh, __mmask16, __m256bh, 123)
+test_2 (_mm_maskz_roundscale_pbh, __m128bh, __mmask8, __m128bh, 123)
+test_3 (_mm256_mask_roundscale_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
+test_3 (_mm_mask_roundscale_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
+test_1 (_mm256_reduce_pbh, __m256bh, __m256bh, 123)
+test_1 (_mm_reduce_pbh, __m128bh, __m128bh, 123)
+test_2 (_mm256_maskz_reduce_pbh, __m256bh, __mmask16, __m256bh, 123)
+test_2 (_mm_maskz_reduce_pbh, __m128bh, __mmask8, __m128bh, 123)
+test_3 (_mm256_mask_reduce_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123)
+test_3 (_mm_mask_reduce_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123)
test_1x (_mm256_getmant_pbh, __m256bh, __m256bh, 1, 1)
test_1x (_mm_getmant_pbh, __m128bh, __m128bh, 1, 1)
test_2x (_mm256_maskz_getmant_pbh, __m256bh, __mmask16,__m256bh, 1, 1)
@@ -999,19 +999,19 @@
#define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8)
/* avx10_2-512bf16intrin.h */
-#define __builtin_ia32_rndscalenepbf16512_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16512_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D)
-#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D)
+#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C)
#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D)
/* avx10_2bf16intrin.h */
-#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D)
-#define __builtin_ia32_rndscalenepbf16128_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16128_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16256_mask(A, B, C, D) __builtin_ia32_reducenepbf16256_mask(A, 123, C, D)
-#define __builtin_ia32_reducenepbf16128_mask(A, B, C, D) __builtin_ia32_reducenepbf16128_mask(A, 123, C, D)
-#define __builtin_ia32_getmantpbf16256_mask(A, B, C, D) __builtin_ia32_getmantpbf16256_mask(A, 1, C, D)
-#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D)
+#define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D)
+#define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D)
+#define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D)
+#define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D)
#define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C)
#define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C)
#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D)