@@ -5,16 +5,28 @@
#include <immintrin.h>
-extern __m256i i1, i2, i3, i4;
-extern __m256d d1, d2;
-extern __m256 f1, f2;
+__m256i
+foo0 (__m256i i3, __m256i i1, __m256i i2)
+{
+ return _mm256_permutevar8x32_epi32 (i1, i2);
+}
+
+__m256i
+foo1 (__m256i i2, __m256i i1)
+{
+ return _mm256_permute4x64_epi64 (i1, 12);
+}
+
+__m256d
+foo2 (__m256d d2, __m256d d1)
+{
+ return _mm256_permute4x64_pd (d1, 12);
+}
-void vperm_test (void)
+__m256
+foo3 (__m256 f2, __m256i i2, __m256 f1)
{
- i3 = _mm256_permutevar8x32_epi32 (i1, i2);
- i4 = _mm256_permute4x64_epi64 (i1, 12);
- d2 = _mm256_permute4x64_pd (d1, 12);
- f2 = _mm256_permutevar8x32_ps (f1, i2);
+ return _mm256_permutevar8x32_ps (f1, i2);
}
/* { dg-final { scan-assembler-times "vxorps" 4 } } */
@@ -13,56 +13,219 @@ extern __m512 f1, f11;
extern __m256 f2;
extern __m128 f3, f33;
-__mmask32 m32;
__mmask16 m16;
__mmask8 m8;
-void mullo_test (void)
-{
- i1 = _mm512_mullo_epi64 (i1, i1);
- i1 = _mm512_mask_mullo_epi64 (i1, m8, i1, i1);
- i1 = _mm512_maskz_mullo_epi64 (m8, i1, i1);
- i2 = _mm256_mullo_epi64 (i2, i2);
- i2 = _mm256_mask_mullo_epi64 (i2, m8, i2, i2);
- i2 = _mm256_maskz_mullo_epi64 (m8, i2, i2);
- i3 = _mm_mullo_epi64 (i3, i3);
- i3 = _mm_mask_mullo_epi64 (i3, m8, i3, i3);
- i3 = _mm_maskz_mullo_epi64 (m8, i3, i3);
-}
-
-void range_test (void)
-{
- d1 = _mm512_range_pd (d1, d11, 15);
- d11 = _mm512_range_round_pd (d11, d1, 15, 8);
- d1 = _mm512_mask_range_pd (d1, m8, d11, d11, 15);
- d11 = _mm512_mask_range_round_pd (d11, m8, d1, d1, 15, 8);
- d1 = _mm512_maskz_range_pd (m8, d11, d11, 15);
- d11 = _mm512_maskz_range_round_pd (m8, d1, d1, 15, 8);
- d2 = _mm256_range_pd (d2, d2, 15);
- d2 = _mm256_mask_range_pd (d2, m8, d2, d2, 15);
- d2 = _mm256_maskz_range_pd (m8, d2, d2, 15);
- d3 = _mm_range_pd (d3, d3, 15);
- d3 = _mm_mask_range_pd (d3, m8, d3, d3, 15);
- d3 = _mm_maskz_range_pd (m8, d3, d3, 15);
- d33 = _mm_range_sd (d33, d33, 15);
- d33 = _mm_mask_range_sd (d33, m8, d33, d33, 15);
- d33 = _mm_maskz_range_sd (m8, d33, d33, 15);
-
- f1 = _mm512_range_ps (f1, f11, 15);
- f11 = _mm512_range_round_ps (f11, f1, 15, 8);
- f1 = _mm512_mask_range_ps (f1, m16, f11, f11, 15);
- f11 = _mm512_mask_range_round_ps (f11, m16, f1, f1, 15, 8);
- f1 = _mm512_maskz_range_ps (m16, f11, f11, 15);
- f11 = _mm512_maskz_range_round_ps (m16, f1, f1, 15, 8);
- f2 = _mm256_range_ps (f2, f2, 15);
- f2 = _mm256_mask_range_ps (f2, m8, f2, f2, 15);
- f2 = _mm256_maskz_range_ps (m8, f2, f2, 15);
- f3 = _mm_range_ps (f3, f3, 15);
- f3 = _mm_mask_range_ps (f3, m8, f3, f3, 15);
- f3 = _mm_maskz_range_ps (m8, f3, f3, 15);
- f33 = _mm_range_ss (f33, f33, 15);
- f33 = _mm_mask_range_ss (f33, m8, f33, f33, 15);
- f33 = _mm_maskz_range_ss (m8, f33, f33, 15);
+#define MULLO(func, type) \
+ type \
+ mullo##type (type i2, type i1) \
+ { \
+ return func (i1, i1); \
+ }
+
+#define MULLO_MASK(func, type) \
+ type \
+ mullo_mask##type (type i2, type i1) \
+ { \
+ return func (i1, m8, i1, i1); \
+ }
+
+#define MULLO_MASKZ(func, type) \
+ type \
+ mullo_maksz##type (type i2, type i1) \
+ { \
+ return func (m8, i1, i1); \
+ }
+
+MULLO (_mm512_mullo_epi64, __m512i);
+MULLO_MASK (_mm512_mask_mullo_epi64, __m512i);
+MULLO_MASKZ (_mm512_maskz_mullo_epi64, __m512i);
+MULLO (_mm256_mullo_epi64, __m256i);
+MULLO_MASK (_mm256_mask_mullo_epi64, __m256i);
+MULLO_MASKZ (_mm256_maskz_mullo_epi64, __m256i);
+MULLO (_mm_mullo_epi64, __m128i);
+MULLO_MASK (_mm_mask_mullo_epi64, __m128i);
+MULLO_MASKZ (_mm_maskz_mullo_epi64, __m128i);
+
+
+__m512d
+foo1 (__m512d d2, __m512d d1, __m512d d11)
+{
+ return _mm512_range_pd (d1, d11, 15);
+}
+
+__m512d
+foo2 (__m512d d2, __m512d d1, __m512d d11)
+{
+ return _mm512_range_round_pd (d11, d1, 15, 8);
+}
+
+__m512d
+foo3 (__m512d d2, __m512d d1, __m512d d11)
+{
+ return _mm512_mask_range_pd (d1, m8, d11, d11, 15);
+}
+
+__m512d
+foo4 (__m512d d2, __m512d d1, __m512d d11)
+{
+ return _mm512_mask_range_round_pd (d11, m8, d1, d1, 15, 8);
+}
+
+__m512d
+foo5 (__m512d d2, __m512d d1, __m512d d11)
+{
+ return _mm512_maskz_range_pd (m8, d11, d11, 15);
+}
+
+__m512d
+foo6 (__m512d d2, __m512d d1, __m512d d11)
+{
+ return _mm512_maskz_range_round_pd (m8, d1, d1, 15, 8);
+}
+
+__m256d
+foo7 (__m256d d1, __m256d d2)
+{
+ return _mm256_range_pd (d2, d2, 15);
+}
+
+__m256d
+foo8 (__m256d d1, __m256d d2)
+{
+ return _mm256_mask_range_pd (d2, m8, d2, d2, 15);
+}
+
+__m256d
+foo9 (__m256d d1, __m256d d2)
+{
+ return _mm256_maskz_range_pd (m8, d2, d2, 15);
+}
+
+__m128d
+foo10 (__m128d d1, __m128d d3)
+{
+ return _mm_range_pd (d3, d3, 15);
+}
+
+__m128d
+foo11 (__m128d d1, __m128d d3)
+{
+ return _mm_mask_range_pd (d3, m8, d3, d3, 15);
+}
+
+__m128d
+foo12 (__m128d d1, __m128d d3)
+{
+ return _mm_maskz_range_pd (m8, d3, d3, 15);
+}
+
+__m128d
+foo13 (__m128d d1, __m128d d33)
+{
+ return _mm_range_sd (d33, d33, 15);
+}
+
+__m128d
+foo14 (__m128d d1, __m128d d33)
+{
+ return _mm_mask_range_sd (d33, m8, d33, d33, 15);
+}
+
+__m128d
+foo15 (__m128d d1, __m128d d33)
+{
+ return _mm_maskz_range_sd (m8, d33, d33, 15);
+}
+
+__m512
+bar1 (__m512 d2, __m512 d1, __m512 d11)
+{
+ return _mm512_range_ps (d1, d11, 15);
+}
+
+__m512
+bar2 (__m512 d2, __m512 d1, __m512 d11)
+{
+ return _mm512_range_round_ps (d11, d1, 15, 8);
+}
+
+__m512
+bar3 (__m512 d2, __m512 d1, __m512 d11)
+{
+ return _mm512_mask_range_ps (d1, m16, d11, d11, 15);
+}
+
+__m512
+bar4 (__m512 d2, __m512 d1, __m512 d11)
+{
+ return _mm512_mask_range_round_ps (d11, m16, d1, d1, 15, 8);
+}
+
+__m512
+bar5 (__m512 d2, __m512 d1, __m512 d11)
+{
+ return _mm512_maskz_range_ps (m16, d11, d11, 15);
+}
+
+__m512
+bar6 (__m512 d2, __m512 d1, __m512 d11)
+{
+ return _mm512_maskz_range_round_ps (m16, d1, d1, 15, 8);
+}
+
+__m256
+bar7 (__m256 d1, __m256 d2)
+{
+ return _mm256_range_ps (d2, d2, 15);
+}
+
+__m256
+bar8 (__m256 d1, __m256 d2)
+{
+ return _mm256_mask_range_ps (d2, m8, d2, d2, 15);
+}
+
+__m256
+bar9 (__m256 d1, __m256 d2)
+{
+ return _mm256_maskz_range_ps (m8, d2, d2, 15);
+}
+
+__m128
+bar10 (__m128 d1, __m128 d3)
+{
+ return _mm_range_ps (d3, d3, 15);
+}
+
+__m128
+bar11 (__m128 d1, __m128 d3)
+{
+ return _mm_mask_range_ps (d3, m8, d3, d3, 15);
+}
+
+__m128
+bar12 (__m128 d1, __m128 d3)
+{
+ return _mm_maskz_range_ps (m8, d3, d3, 15);
+}
+
+__m128
+bar13 (__m128 d1, __m128 d33)
+{
+ return _mm_range_ss (d33, d33, 15);
+}
+
+__m128
+bar14 (__m128 d1, __m128 d33)
+{
+ return _mm_mask_range_ss (d33, m8, d33, d33, 15);
+}
+
+__m128
+bar15 (__m128 d1, __m128 d33)
+{
+ return _mm_maskz_range_ss (m8, d33, d33, 15);
}
/* { dg-final { scan-assembler-times "vxorps" 26 } } */
@@ -13,86 +13,288 @@ volatile __m512d *pd11;
__mmask16 m16;
__mmask8 m8;
-void vperm_test (void)
-{
- d1 = _mm512_permutex_pd (d1, 12);
- d1 = _mm512_mask_permutex_pd (d1, m8, d1, 13);
- d1 = _mm512_maskz_permutex_pd (m8, d1, 14);
- d11 = _mm512_permutexvar_pd (i1, d11);
- d11 = _mm512_mask_permutexvar_pd (d11, m8, i2, d11);
- d11 = _mm512_maskz_permutexvar_pd (m8, i3, d11);
-
- f1 = _mm512_permutexvar_ps (i1, f1);
- f1 = _mm512_mask_permutexvar_ps (f1, m16, i1, f1);
- f1 = _mm512_maskz_permutexvar_ps (m16, i1, f1);
-
- i3 = _mm512_permutexvar_epi64 (i3, i3);
- i3 = _mm512_mask_permutexvar_epi64 (i3, m8, i1, i1);
- i3 = _mm512_maskz_permutexvar_epi64 (m8, i3, i1);
- i1 = _mm512_permutex_epi64 (i3, 12);
- i1 = _mm512_mask_permutex_epi64 (i1, m8, i1, 12);
- i1 = _mm512_maskz_permutex_epi64 (m8, i1, 12);
-
- i2 = _mm512_permutexvar_epi32 (i2, i2);
- i2 = _mm512_mask_permutexvar_epi32 (i2, m16, i2, i2);
- i3 = _mm512_maskz_permutexvar_epi32 (m16, i3, i3);
-}
-
-void getmant_test (void)
-{
- d1 = _mm512_getmant_pd (*pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d1 = _mm512_getmant_round_pd (*pd11, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- d1 = _mm512_mask_getmant_pd (d1, m8, *pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d1 = _mm512_mask_getmant_round_pd (d1, m8, *pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- d1 = _mm512_maskz_getmant_pd (m8, *pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d1 = _mm512_maskz_getmant_round_pd (m8, *pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- f1 = _mm512_getmant_ps (*pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f1 = _mm512_getmant_round_ps (*pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- f1 = _mm512_mask_getmant_ps (f1, m16, *pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f1 = _mm512_mask_getmant_round_ps (f1, m16, *pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- f1 = _mm512_maskz_getmant_ps (m16, *pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f1 = _mm512_maskz_getmant_round_ps (m16, *pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
-
- d2 = _mm_getmant_sd (d2, d2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d2 = _mm_getmant_round_sd (d2, d2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- d2 = _mm_mask_getmant_sd (d2, m8, d2, d2, _MM_MANT_NORM_p75_1p5,
+__m512d
+foo1 (__m512d d2, __m512d d1)
+{
+ return _mm512_permutex_pd (d1, 12);
+}
+
+__m512d
+foo2 (__m512d d2, __m512d d1)
+{
+ return _mm512_mask_permutex_pd (d1, m8, d1, 13);
+}
+
+__m512d
+foo3 (__m512d d2, __m512d d1)
+{
+ return _mm512_maskz_permutex_pd (m8, d1, 14);
+}
+
+__m512d
+foo4 (__m512d d2, __m512d d11, __m512i i1)
+{
+ return _mm512_permutexvar_pd (i1, d11);
+}
+
+__m512d
+foo5 (__m512d d2, __m512d d11, __m512i i2)
+{
+ return _mm512_mask_permutexvar_pd (d11, m8, i2, d11);
+}
+
+__m512d
+foo6 (__m512d d2, __m512d d11, __m512i i3)
+{
+ return _mm512_maskz_permutexvar_pd (m8, i3, d11);
+}
+
+__m512i
+ioo1 (__m512i d2, __m512i d1)
+{
+ return _mm512_permutex_epi64 (d1, 12);
+}
+
+__m512i
+ioo2 (__m512i d2, __m512i d1)
+{
+ return _mm512_mask_permutex_epi64 (d1, m8, d1, 13);
+}
+
+__m512i
+ioo3 (__m512i d2, __m512i d1)
+{
+ return _mm512_maskz_permutex_epi64 (m8, d1, 14);
+}
+
+__m512i
+ioo4 (__m512i d2, __m512i d11, __m512i i1)
+{
+ return _mm512_permutexvar_epi64 (i1, d11);
+}
+
+__m512i
+ioo5 (__m512i d2, __m512i d11, __m512i i2)
+{
+ return _mm512_mask_permutexvar_epi64 (d11, m8, i2, d11);
+}
+
+__m512i
+ioo6 (__m512i d2, __m512i d11, __m512i i3)
+{
+ return _mm512_maskz_permutexvar_epi64 (m8, i3, d11);
+}
+
+__m512
+koo1 (__m512 f2, __m512i i1, __m512 f1)
+{
+ return _mm512_permutexvar_ps (i1, f1);
+}
+
+__m512
+koo2 (__m512 f2, __m512i i1, __m512 f1)
+{
+ return _mm512_mask_permutexvar_ps (f1, m16, i1, f1);
+}
+
+__m512
+koo3 (__m512 f2, __m512i i1, __m512 f1)
+{
+ return _mm512_maskz_permutexvar_ps (m16, i1, f1);
+}
+
+__m512i
+hoo1 (__m512i f2, __m512i i1, __m512i f1)
+{
+ return _mm512_permutexvar_epi32 (i1, f1);
+}
+
+__m512i
+hoo2 (__m512i f2, __m512i i1, __m512i f1)
+{
+ return _mm512_mask_permutexvar_epi32 (f1, m16, i1, f1);
+}
+
+__m512i
+hoo3 (__m512i f2, __m512i i1, __m512i f1)
+{
+ return _mm512_maskz_permutexvar_epi32 (m16, i1, f1);
+}
+
+__m512d
+moo1 (__m512d d2, __m512d* d1)
+{
+ return _mm512_getmant_pd (*d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m512d
+moo2 (__m512d d2, __m512d* d1)
+{
+ return _mm512_getmant_round_pd (*d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+__m512d
+moo3 (__m512d d2, __m512d d1, __m512d* d3)
+{
+
+ return _mm512_mask_getmant_pd (d1, m8, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m512d
+moo4 (__m512d d2, __m512d d1, __m512d* d3)
+{
+ return _mm512_mask_getmant_round_pd (d1, m8, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+__m512d
+moo5 (__m512d d2, __m512d* d1)
+{
+ return _mm512_maskz_getmant_pd (m8, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m512d
+moo6 (__m512d d2, __m512d* d1, __m512d d3)
+{
+ return _mm512_maskz_getmant_round_pd (m8, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+__m512
+noo1 (__m512 d2, __m512* d1)
+{
+ return _mm512_getmant_ps (*d1, _MM_MANT_NORM_p75_1p5,
_MM_MANT_SIGN_src);
- d2 = _mm_mask_getmant_round_sd (d2, m8, d2, d2, _MM_MANT_NORM_p75_1p5,
+}
+
+__m512
+noo2 (__m512 d2, __m512* d1)
+{
+ return _mm512_getmant_round_ps (*d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+__m512
+noo3 (__m512 d2, __m512 d1, __m512* d3)
+{
+
+ return _mm512_mask_getmant_ps (d1, m16, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m512
+noo4 (__m512 d2, __m512 d1, __m512* d3)
+{
+ return _mm512_mask_getmant_round_ps (d1, m16, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+__m512
+noo5 (__m512 d2, __m512* d1)
+{
+ return _mm512_maskz_getmant_ps (m16, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m512
+noo6 (__m512 d2, __m512* d1, __m512 d3)
+{
+ return _mm512_maskz_getmant_round_ps (m16, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+
+__m128d
+ooo1 (__m128d d2, __m128d d1)
+{
+ return _mm_getmant_sd (d1, d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128d
+ooo2 (__m128d d2, __m128d d1)
+{
+ return _mm_getmant_round_sd (d1, d1, _MM_MANT_NORM_p75_1p5,
_MM_MANT_SIGN_src, 8);
- d2 = _mm_maskz_getmant_sd (m8, d2, d2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d2 = _mm_maskz_getmant_round_sd (m8, d2, d2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- f2 = _mm_getmant_ss (f2, f2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f2 = _mm_getmant_round_ss (f2, f2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
- f2 = _mm_mask_getmant_ss (f2, m8, f2, f2, _MM_MANT_NORM_p75_1p5,
+}
+
+__m128d
+ooo3 (__m128d d2, __m128d d1, __m128d d3)
+{
+
+ return _mm_mask_getmant_sd (d1, m8, d3, d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128d
+ooo4 (__m128d d2, __m128d d1, __m128d d3)
+{
+ return _mm_mask_getmant_round_sd (d1, m8, d3, d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+__m128d
+ooo5 (__m128d d2, __m128d d1)
+{
+ return _mm_maskz_getmant_sd (m8, d1, d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128d
+ooo6 (__m128d d2, __m128d d1, __m128d d3)
+{
+ return _mm_maskz_getmant_round_sd (m8, d1, d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+
+__m128
+poo1 (__m128 d2, __m128 d1)
+{
+ return _mm_getmant_ss (d1, d1, _MM_MANT_NORM_p75_1p5,
_MM_MANT_SIGN_src);
- f2 = _mm_mask_getmant_round_ss (f2, m8, f2, f2, _MM_MANT_NORM_p75_1p5,
+}
+
+__m128
+poo2 (__m128 d2, __m128 d1)
+{
+ return _mm_getmant_round_ss (d1, d1, _MM_MANT_NORM_p75_1p5,
_MM_MANT_SIGN_src, 8);
- f2 = _mm_maskz_getmant_ss (m8, f2, f2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f2 = _mm_maskz_getmant_round_ss (m8, f2, f2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src, 8);
+}
+
+__m128
+poo3 (__m128 d2, __m128 d1, __m128 d3)
+{
+
+ return _mm_mask_getmant_ss (d1, m8, d3, d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128
+poo4 (__m128 d2, __m128 d1, __m128 d3)
+{
+ return _mm_mask_getmant_round_ss (d1, m8, d3, d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
+}
+__m128
+poo5 (__m128 d2, __m128 d1)
+{
+ return _mm_maskz_getmant_ss (m8, d1, d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128
+poo6 (__m128 d2, __m128 d1, __m128 d3)
+{
+ return _mm_maskz_getmant_round_ss (m8, d1, d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src, 8);
}
-/* { dg-final { scan-assembler-times "vxorps" 22 } } */
+/* { dg-final { scan-assembler-times "vxorps" 24 } } */
/* { dg-final { scan-assembler-times "vpermd" 3 } } */
/* { dg-final { scan-assembler-times "vpermq" 6 } } */
/* { dg-final { scan-assembler-times "vpermps" 3 } } */
@@ -11,32 +11,98 @@ __mmask32 m32;
__mmask16 m16;
__mmask8 m8;
-void complex_mul_test (void)
-{
- h1 = _mm512_fmul_pch (h1, h1);
- h1 = _mm512_fmul_round_pch (h1, h1, 8);
- h1 = _mm512_mask_fmul_pch (h1, m32, h1, h1);
- h1 = _mm512_mask_fmul_round_pch (h1, m32, h1, h1, 8);
- h1 = _mm512_maskz_fmul_pch (m32, h1, h1);
- h1 = _mm512_maskz_fmul_round_pch (m32, h1, h1, 11);
-
- h3 = _mm_fmul_sch (h3, h3);
- h3 = _mm_fmul_round_sch (h3, h3, 8);
- h3 = _mm_mask_fmul_sch (h3, m8, h3, h3);
- h3 = _mm_mask_fmul_round_sch (h3, m8, h3, h3, 8);
- h3 = _mm_maskz_fmul_sch (m8, h3, h3);
- h3 = _mm_maskz_fmul_round_sch (m8, h3, h3, 11);
-}
-
-void vgetmant_test (void)
-{
- h3 = _mm_getmant_sh (h3, h3, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- h3 = _mm_mask_getmant_sh (h3, m8, h3, h3, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- h3 = _mm_maskz_getmant_sh (m8, h3, h3, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
-}
+__m512h
+foo1 (__m512h h2, __m512h h1)
+{
+ return _mm512_fmul_pch (h1, h1);
+}
+
+__m512h
+foo2 (__m512h h2, __m512h h1)
+{
+ return _mm512_fmul_round_pch (h1, h1, 8);
+}
+
+__m512h
+foo3 (__m512h h2, __m512h h1)
+{
+ return _mm512_mask_fmul_pch (h1, m32, h1, h1);
+}
+
+__m512h
+foo4 (__m512h h2, __m512h h1)
+{
+ return _mm512_mask_fmul_round_pch (h1, m32, h1, h1, 8);
+}
+
+__m512h
+foo5 (__m512h h2, __m512h h1)
+{
+ return _mm512_maskz_fmul_pch (m32, h1, h1);
+}
+
+__m512h
+foo6 (__m512h h2, __m512h h1)
+{
+ return _mm512_maskz_fmul_round_pch (m32, h1, h1, 11);
+}
+
+__m128h
+bar1 (__m128h h2, __m128h h1)
+{
+ return _mm_fmul_sch (h1, h1);
+}
+
+__m128h
+bar2 (__m128h h2, __m128h h1)
+{
+ return _mm_fmul_round_sch (h1, h1, 8);
+}
+
+__m128h
+bar3 (__m128h h2, __m128h h1)
+{
+ return _mm_mask_fmul_sch (h1, m8, h1, h1);
+}
+
+__m128h
+bar4 (__m128h h2, __m128h h1)
+{
+ return _mm_mask_fmul_round_sch (h1, m8, h1, h1, 8);
+}
+
+__m128h
+bar5 (__m128h h2, __m128h h1)
+{
+ return _mm_maskz_fmul_sch (m8, h1, h1);
+}
+
+__m128h
+bar6 (__m128h h2, __m128h h1)
+{
+ return _mm_maskz_fmul_round_sch (m8, h1, h1, 11);
+}
+
+__m128h
+zoo1 (__m128h h1, __m128h h3)
+{
+ return _mm_getmant_sh (h3, h3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128h
+zoo2 (__m128h h1, __m128h h3)
+{
+ return _mm_mask_getmant_sh (h3, m8, h3, h3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128h
+zoo3 (__m128h h1, __m128h h3)
+{
+ return _mm_maskz_getmant_sh (m8, h3, h3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
/* { dg-final { scan-assembler-times "vxorps" 10 } } */
/* { dg-final { scan-assembler-times "vfmulcph" 6 } } */
@@ -13,60 +13,203 @@ extern __m128 f2, *pf2;
__mmask16 m16;
__mmask8 m8;
-void vperm_test (void)
-{
- d1 = _mm256_permutex_pd (d1, 12);
- d1 = _mm256_mask_permutex_pd (d1, m8, d1, 12);
- d1 = _mm256_maskz_permutex_pd (m8, d1, 12);
- d11 = _mm256_permutexvar_pd (i1, d11);
- d11 = _mm256_mask_permutexvar_pd (d11, m8, i1, d11);
- d11 = _mm256_maskz_permutexvar_pd (m8, i1, d11);
-
- f1 = _mm256_permutexvar_ps (i1, f1);
- f1 = _mm256_mask_permutexvar_ps (f1, m8, i1, f1);
- f1 = _mm256_maskz_permutexvar_ps (m8, i1, f1);
-
- i1 = _mm256_permutexvar_epi64 (i1, i1);
- i1 = _mm256_mask_permutexvar_epi64 (i1, m8, i1, i1);
- i1 = _mm256_maskz_permutexvar_epi64 (m8, i1, i1);
- i1 = _mm256_permutex_epi64 (i1, 12);
- i1 = _mm256_mask_permutex_epi64 (i1, m8, i1, 12);
- i1 = _mm256_maskz_permutex_epi64 (m8, i1, 12);
-
- i2 = _mm256_permutexvar_epi32 (i2, i2);
- i2 = _mm256_mask_permutexvar_epi32 (i2, m8, i2, i2);
- i3 = _mm256_maskz_permutexvar_epi32 (m8, i3, i3);
-}
-
-void getmant_test (void)
-{
- d1 = _mm256_getmant_pd (*pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d1 = _mm256_mask_getmant_pd (d1, m8, *pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d1 = _mm256_maskz_getmant_pd (m8, *pd1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d2 = _mm_getmant_pd (*pd2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- d2 = _mm_mask_getmant_pd (d2, m8, *pd2, _MM_MANT_NORM_p75_1p5,
+__m256d
+foo1 (__m256d d2, __m256d d1)
+{
+ return _mm256_permutex_pd (d1, 12);
+}
+
+__m256d
+foo2 (__m256d d2, __m256d d1)
+{
+ return _mm256_mask_permutex_pd (d1, m8, d1, 13);
+}
+
+__m256d
+foo3 (__m256d d2, __m256d d1)
+{
+ return _mm256_maskz_permutex_pd (m8, d1, 14);
+}
+
+__m256d
+foo4 (__m256d d2, __m256d d11, __m256i i1)
+{
+ return _mm256_permutexvar_pd (i1, d11);
+}
+
+__m256d
+foo5 (__m256d d2, __m256d d11, __m256i i2)
+{
+ return _mm256_mask_permutexvar_pd (d11, m8, i2, d11);
+}
+
+__m256d
+foo6 (__m256d d2, __m256d d11, __m256i i3)
+{
+ return _mm256_maskz_permutexvar_pd (m8, i3, d11);
+}
+
+__m256i
+ioo1 (__m256i d2, __m256i d1)
+{
+ return _mm256_permutex_epi64 (d1, 12);
+}
+
+__m256i
+ioo2 (__m256i d2, __m256i d1)
+{
+ return _mm256_mask_permutex_epi64 (d1, m8, d1, 13);
+}
+
+__m256i
+ioo3 (__m256i d2, __m256i d1)
+{
+ return _mm256_maskz_permutex_epi64 (m8, d1, 14);
+}
+
+__m256i
+ioo4 (__m256i d2, __m256i d11, __m256i i1)
+{
+ return _mm256_permutexvar_epi64 (i1, d11);
+}
+
+__m256i
+ioo5 (__m256i d2, __m256i d11, __m256i i2)
+{
+ return _mm256_mask_permutexvar_epi64 (d11, m8, i2, d11);
+}
+
+__m256i
+ioo6 (__m256i d2, __m256i d11, __m256i i3)
+{
+ return _mm256_maskz_permutexvar_epi64 (m8, i3, d11);
+}
+
+__m256
+koo1 (__m256 f2, __m256i i1, __m256 f1)
+{
+ return _mm256_permutexvar_ps (i1, f1);
+}
+
+__m256
+koo2 (__m256 f2, __m256i i1, __m256 f1)
+{
+ return _mm256_mask_permutexvar_ps (f1, m8, i1, f1);
+}
+
+__m256
+koo3 (__m256 f2, __m256i i1, __m256 f1)
+{
+ return _mm256_maskz_permutexvar_ps (m8, i1, f1);
+}
+
+__m256i
+hoo1 (__m256i f2, __m256i i1, __m256i f1)
+{
+ return _mm256_permutexvar_epi32 (i1, f1);
+}
+
+__m256i
+hoo2 (__m256i f2, __m256i i1, __m256i f1)
+{
+ return _mm256_mask_permutexvar_epi32 (f1, m8, i1, f1);
+}
+
+__m256i
+hoo3 (__m256i f2, __m256i i1, __m256i f1)
+{
+ return _mm256_maskz_permutexvar_epi32 (m8, i1, f1);
+}
+
+__m256d
+moo1 (__m256d d2, __m256d* d1)
+{
+ return _mm256_getmant_pd (*d1, _MM_MANT_NORM_p75_1p5,
_MM_MANT_SIGN_src);
- d2 = _mm_maskz_getmant_pd (m8, *pd2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f1 = _mm256_getmant_ps (*pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f1 = _mm256_mask_getmant_ps (f1, m8, *pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f1 = _mm256_maskz_getmant_ps (m8, *pf1, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f2 = _mm_getmant_ps (*pf2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
- f2 = _mm_mask_getmant_ps (f2, m8, *pf2, _MM_MANT_NORM_p75_1p5,
+}
+
+__m256d
+moo3 (__m256d d2, __m256d d1, __m256d* d3)
+{
+
+ return _mm256_mask_getmant_pd (d1, m8, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m256d
+moo5 (__m256d d2, __m256d* d1)
+{
+ return _mm256_maskz_getmant_pd (m8, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128d
+moo2 (__m128d d2, __m128d* d1)
+{
+ return _mm_getmant_pd (*d1, _MM_MANT_NORM_p75_1p5,
_MM_MANT_SIGN_src);
- f2 = _mm_maskz_getmant_ps (m8, *pf2, _MM_MANT_NORM_p75_1p5,
- _MM_MANT_SIGN_src);
}
-/* { dg-final { scan-assembler-times "vxorps" 19 } } */
+__m128d
+moo4 (__m128d d2, __m128d d1, __m128d* d3)
+{
+
+ return _mm_mask_getmant_pd (d1, m8, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128d
+moo6 (__m128d d2, __m128d* d1)
+{
+ return _mm_maskz_getmant_pd (m8, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m256
+noo1 (__m256 d2, __m256* d1)
+{
+ return _mm256_getmant_ps (*d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m256
+noo3 (__m256 d2, __m256 d1, __m256* d3)
+{
+
+ return _mm256_mask_getmant_ps (d1, m8, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m256
+noo5 (__m256 d2, __m256* d1)
+{
+ return _mm256_maskz_getmant_ps (m8, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128
+noo2 (__m128 d2, __m128* d1)
+{
+ return _mm_getmant_ps (*d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128
+noo4 (__m128 d2, __m128 d1, __m128* d3)
+{
+
+ return _mm_mask_getmant_ps (d1, m8, *d3, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+__m128
+noo6 (__m128 d2, __m128* d1)
+{
+ return _mm_maskz_getmant_ps (m8, *d1, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
+
+/* { dg-final { scan-assembler-times "vxorps" 20 } } */
/* { dg-final { scan-assembler-times "vpermpd" 6 } } */
/* { dg-final { scan-assembler-times "vpermps" 3 } } */
/* { dg-final { scan-assembler-times "vpermq" 6 } } */
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-not {(?n)vfmadd[1-3]*ps.*\(} } } */
+/* { dg-final { scan-assembler-not {(?n)vfmadd[1-3]*ps.*\(} { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times {(?n)vfmadd[1-3]*ps[ \t]*} 3 } } */
#include<immintrin.h>