Message ID | 20210809202355.568303-6-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Support more SSE4.1 intrinsics | expand |
Hi Paul, On 8/9/21 3:23 PM, Paul A. Clarke via Gcc-patches wrote: > Also, copy tests for: > - _mm_cmpeq_epi64, _mm_cmpgt_epi64 > - _mm_mullo_epi32, _mm_mul_epi32 > - _mm_packus_epi32 > > from gcc/testsuite/gcc.target/i386. Testing, backports, etc. This patch LGTM with the usual comment about documenting -Wno-psabi. Thanks! Bill > > 2021-08-09 Paul A. Clarke <pc@us.ibm.com> > > gcc > * config/rs6000/smmintrin.h (_mm_cmpeq_epi64, _mm_cmpgt_epi64, > _mm_mullo_epi32, _mm_mul_epi32, _mm_packus_epi32): New. > > gcc/testsuite > * gcc.target/powerpc/pr78102.c: Copy from gcc.target/i386, > adjust dg directives to suit. > * gcc.target/powerpc/sse4_1-packusdw.c: Same. > * gcc.target/powerpc/sse4_1-pcmpeqq.c: Same. > * gcc.target/powerpc/sse4_1-pmuldq.c: Same. > * gcc.target/powerpc/sse4_1-pmulld.c: Same. > * gcc.target/powerpc/sse4_2-pcmpgtq.c: Same. > --- > gcc/config/rs6000/smmintrin.h | 41 +++++++++++ > gcc/testsuite/gcc.target/powerpc/pr78102.c | 23 ++++++ > .../gcc.target/powerpc/sse4_1-packusdw.c | 73 +++++++++++++++++++ > .../gcc.target/powerpc/sse4_1-pcmpeqq.c | 46 ++++++++++++ > .../gcc.target/powerpc/sse4_1-pmuldq.c | 51 +++++++++++++ > .../gcc.target/powerpc/sse4_1-pmulld.c | 46 ++++++++++++ > .../gcc.target/powerpc/sse4_2-pcmpgtq.c | 46 ++++++++++++ > 7 files changed, 326 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr78102.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index 7f6ff7baff50..8d6ae98c7ce3 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -392,6 +392,15 @@ _mm_testnzc_si128 (__m128i __A, __m128i __B) > > #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) > > +#ifdef _ARCH_PWR8 > +__inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cmpeq_epi64 (__m128i __X, __m128i __Y) > +{ > + return (__m128i) vec_cmpeq ((__v2di)__X, (__v2di)__Y); > +} > +#endif > + > __inline __m128i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm_min_epi8 (__m128i __X, __m128i __Y) > @@ -448,6 +457,22 @@ _mm_max_epu32 (__m128i __X, __m128i __Y) > return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); > } > > +__inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mullo_epi32 (__m128i __X, __m128i __Y) > +{ > + return (__m128i) vec_mul ((__v4su)__X, (__v4su)__Y); > +} > + > +#ifdef _ARCH_PWR8 > +__inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mul_epi32 (__m128i __X, __m128i __Y) > +{ > + return (__m128i) vec_mule ((__v4si)__X, (__v4si)__Y); > +} > +#endif > + > __inline __m128i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm_cvtepi8_epi16 (__m128i __A) > @@ -611,4 +636,20 @@ _mm_minpos_epu16 (__m128i __A) > return __r.__m; > } > > +__inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_packus_epi32 (__m128i __X, __m128i __Y) > +{ > + return (__m128i) vec_packsu ((__v4si)__X, (__v4si)__Y); > +} > + > +#ifdef _ARCH_PWR8 > +__inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cmpgt_epi64 (__m128i __X, __m128i __Y) > +{ > + return (__m128i) vec_cmpgt ((__v2di)__X, (__v2di)__Y); > +} > +#endif > + > #endif > diff --git a/gcc/testsuite/gcc.target/powerpc/pr78102.c b/gcc/testsuite/gcc.target/powerpc/pr78102.c > new file mode 100644 > index 000000000000..a9db140f7335 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr78102.c > @@ -0,0 +1,23 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > + > +#include <x86intrin.h> > + > +__m128i > +foo (const __m128i x, const __m128i y) > +{ > + return _mm_cmpeq_epi64 (x, y); > +} > + > +__v2di > +bar (const __v2di x, const __v2di y) > +{ > + return x == y; > +} > + > +__v2di > +baz (const __v2di x, const __v2di y) > +{ > + return x != y; > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c > new file mode 100644 > index 000000000000..2438a755cbe9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c > @@ -0,0 +1,73 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse4_1-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse4_1_test > +#endif > + > +#include CHECK_H > + > +#include <smmintrin.h> > + > +#define NUM 64 > + > +static unsigned short > +int_to_ushort (int iVal) > +{ > + unsigned short sVal; > + > + if (iVal < 0) > + sVal = 0; > + else if (iVal > 0xffff) > + sVal = 0xffff; > + else sVal = iVal; > + > + return sVal; > +} > + > +static void > +TEST (void) > +{ > + union > + { > + __m128i x[NUM / 4]; > + int i[NUM]; > + } src1, src2; > + union > + { > + __m128i x[NUM / 4]; > + unsigned short s[NUM * 2]; > + } dst; > + int i, sign = 1; > + > + for (i = 0; i < NUM; i++) > + { > + src1.i[i] = i * i * sign; > + src2.i[i] = (i + 20) * sign; > + sign = -sign; > + } > + > + for (i = 0; i < NUM; i += 4) > + dst.x[i / 4] = _mm_packus_epi32 (src1.x [i / 4], src2.x [i / 4]); > + > + for (i = 0; i < NUM; i ++) > + { > + int dstIndex; > + unsigned short sVal; > + > + sVal = int_to_ushort (src1.i[i]); > + dstIndex = (i % 4) + (i / 4) * 8; > + if (sVal != dst.s[dstIndex]) > + abort (); > + > + sVal = int_to_ushort (src2.i[i]); > + dstIndex += 4; > + if (sVal != dst.s[dstIndex]) > + abort (); > + } > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c > new file mode 100644 > index 000000000000..ea3a83adf81c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c > @@ -0,0 +1,46 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ > +/* { dg-require-effective-target p8vector_hw } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse4_1-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse4_1_test > +#endif > + > +#include CHECK_H > + > +#include <smmintrin.h> > + > +#define NUM 64 > + > +static void > +TEST (void) > +{ > + union > + { > + __m128i x[NUM / 2]; > + long long ll[NUM]; > + } dst, src1, src2; > + int i, sign=1; > + long long is_eq; > + > + for (i = 0; i < NUM; i++) > + { > + src1.ll[i] = i * i * sign; > + src2.ll[i] = (i + 20) * sign; > + sign = -sign; > + } > + > + for (i = 0; i < NUM; i += 2) > + dst.x [i / 2] = _mm_cmpeq_epi64(src1.x [i / 2], src2.x [i / 2]); > + > + for (i = 0; i < NUM; i++) > + { > + is_eq = src1.ll[i] == src2.ll[i] ? 0xffffffffffffffffLL : 0LL; > + if (is_eq != dst.ll[i]) > + abort (); > + } > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c > new file mode 100644 > index 000000000000..f5ff27dc36c3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c > @@ -0,0 +1,51 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ > +/* { dg-require-effective-target p8vector_hw } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse4_1-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse4_1_test > +#endif > + > +#include CHECK_H > + > +#include <smmintrin.h> > + > +#define NUM 64 > + > +static void > +TEST (void) > +{ > + union > + { > + __m128i x[NUM / 2]; > + long long ll[NUM]; > + } dst; > + union > + { > + __m128i x[NUM / 2]; > + int i[NUM * 2]; > + } src1, src2; > + int i, sign = 1; > + long long value; > + > + for (i = 0; i < NUM * 2; i += 2) > + { > + src1.i[i] = i * i * sign; > + src2.i[i] = (i + 20) * sign; > + sign = -sign; > + } > + > + for (i = 0; i < NUM; i += 2) > + dst.x[i / 2] = _mm_mul_epi32 (src1.x[i / 2], src2.x[i / 2]); > + > + for (i = 0; i < NUM; i++) > + { > + value = (long long) src1.i[i * 2] * (long long) src2.i[i * 2]; > + if (value != dst.ll[i]) > + abort (); > + } > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c > new file mode 100644 > index 000000000000..32513edf21bf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c > @@ -0,0 +1,46 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse4_1-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse4_1_test > +#endif > + > +#include CHECK_H > + > +#include <smmintrin.h> > + > +#define NUM 64 > + > +static void > +TEST (void) > +{ > + union > + { > + __m128i x[NUM / 4]; > + int i[NUM]; > + } dst, src1, src2; > + int i, sign = 1; > + int value; > + > + for (i = 0; i < NUM; i++) > + { > + src1.i[i] = i * i * sign; > + src2.i[i] = (i + 20) * sign; > + sign = -sign; > + } > + > + for (i = 0; i < NUM; i += 4) > + dst.x[i / 4] = _mm_mullo_epi32 (src1.x[i / 4], src2.x[i / 4]); > + > + for (i = 0; i < NUM; i++) > + { > + value = src1.i[i] * src2.i[i]; > + if (value != dst.i[i]) > + abort (); > + } > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c b/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c > new file mode 100644 > index 000000000000..5cd88fd2ae19 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c > @@ -0,0 +1,46 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse4_2-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse4_2_test > +#endif > + > +#include CHECK_H > + > +#include <nmmintrin.h> > + > +#define NUM 64 > + > +static void > +TEST (void) > +{ > + union > + { > + __m128i x[NUM / 2]; > + long long ll[NUM]; > + } dst, src1, src2; > + int i, sign = 1; > + long long is_eq; > + > + for (i = 0; i < NUM; i++) > + { > + src1.ll[i] = i * i * sign; > + src2.ll[i] = (i + 20) * sign; > + sign = -sign; > + } > + > + for (i = 0; i < NUM; i += 2) > + dst.x[i / 2] = _mm_cmpgt_epi64 (src1.x[i / 2], src2.x[i / 2]); > + > + for (i = 0; i < NUM; i++) > + { > + is_eq = src1.ll[i] > src2.ll[i] ? 0xFFFFFFFFFFFFFFFFLL : 0LL; > + if (is_eq != dst.ll[i]) > + abort (); > + } > +}
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 7f6ff7baff50..8d6ae98c7ce3 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -392,6 +392,15 @@ _mm_testnzc_si128 (__m128i __A, __m128i __B) #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) +#ifdef _ARCH_PWR8 +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_epi64 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_cmpeq ((__v2di)__X, (__v2di)__Y); +} +#endif + __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_epi8 (__m128i __X, __m128i __Y) @@ -448,6 +457,22 @@ _mm_max_epu32 (__m128i __X, __m128i __Y) return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); } +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mullo_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_mul ((__v4su)__X, (__v4su)__Y); +} + +#ifdef _ARCH_PWR8 +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_mule ((__v4si)__X, (__v4si)__Y); +} +#endif + __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi8_epi16 (__m128i __A) @@ -611,4 +636,20 @@ _mm_minpos_epu16 (__m128i __A) return __r.__m; } +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packus_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_packsu ((__v4si)__X, (__v4si)__Y); +} + +#ifdef _ARCH_PWR8 +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_epi64 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_cmpgt ((__v2di)__X, (__v2di)__Y); +} +#endif + #endif diff --git a/gcc/testsuite/gcc.target/powerpc/pr78102.c b/gcc/testsuite/gcc.target/powerpc/pr78102.c new file mode 100644 index 000000000000..a9db140f7335 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr78102.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#include <x86intrin.h> + +__m128i +foo (const __m128i x, const __m128i y) +{ + return _mm_cmpeq_epi64 (x, y); +} + +__v2di +bar (const __v2di x, const __v2di y) +{ + return x == y; +} + +__v2di +baz (const __v2di x, const __v2di y) +{ + return x != y; +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c new file mode 100644 index 000000000000..2438a755cbe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-packusdw.c @@ -0,0 +1,73 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 64 + +static unsigned short +int_to_ushort (int iVal) +{ + unsigned short sVal; + + if (iVal < 0) + sVal = 0; + else if (iVal > 0xffff) + sVal = 0xffff; + else sVal = iVal; + + return sVal; +} + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + } src1, src2; + union + { + __m128i x[NUM / 4]; + unsigned short s[NUM * 2]; + } dst; + int i, sign = 1; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_packus_epi32 (src1.x [i / 4], src2.x [i / 4]); + + for (i = 0; i < NUM; i ++) + { + int dstIndex; + unsigned short sVal; + + sVal = int_to_ushort (src1.i[i]); + dstIndex = (i % 4) + (i / 4) * 8; + if (sVal != dst.s[dstIndex]) + abort (); + + sVal = int_to_ushort (src2.i[i]); + dstIndex += 4; + if (sVal != dst.s[dstIndex]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c new file mode 100644 index 000000000000..ea3a83adf81c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pcmpeqq.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 64 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + } dst, src1, src2; + int i, sign=1; + long long is_eq; + + for (i = 0; i < NUM; i++) + { + src1.ll[i] = i * i * sign; + src2.ll[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x [i / 2] = _mm_cmpeq_epi64(src1.x [i / 2], src2.x [i / 2]); + + for (i = 0; i < NUM; i++) + { + is_eq = src1.ll[i] == src2.ll[i] ? 0xffffffffffffffffLL : 0LL; + if (is_eq != dst.ll[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c new file mode 100644 index 000000000000..f5ff27dc36c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmuldq.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 64 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + } dst; + union + { + __m128i x[NUM / 2]; + int i[NUM * 2]; + } src1, src2; + int i, sign = 1; + long long value; + + for (i = 0; i < NUM * 2; i += 2) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x[i / 2] = _mm_mul_epi32 (src1.x[i / 2], src2.x[i / 2]); + + for (i = 0; i < NUM; i++) + { + value = (long long) src1.i[i * 2] * (long long) src2.i[i * 2]; + if (value != dst.ll[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c new file mode 100644 index 000000000000..32513edf21bf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmulld.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> + +#define NUM 64 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 4]; + int i[NUM]; + } dst, src1, src2; + int i, sign = 1; + int value; + + for (i = 0; i < NUM; i++) + { + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 4) + dst.x[i / 4] = _mm_mullo_epi32 (src1.x[i / 4], src2.x[i / 4]); + + for (i = 0; i < NUM; i++) + { + value = src1.i[i] * src2.i[i]; + if (value != dst.i[i]) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c b/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c new file mode 100644 index 000000000000..5cd88fd2ae19 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_2-check.h" +#endif + +#ifndef TEST +#define TEST sse4_2_test +#endif + +#include CHECK_H + +#include <nmmintrin.h> + +#define NUM 64 + +static void +TEST (void) +{ + union + { + __m128i x[NUM / 2]; + long long ll[NUM]; + } dst, src1, src2; + int i, sign = 1; + long long is_eq; + + for (i = 0; i < NUM; i++) + { + src1.ll[i] = i * i * sign; + src2.ll[i] = (i + 20) * sign; + sign = -sign; + } + + for (i = 0; i < NUM; i += 2) + dst.x[i / 2] = _mm_cmpgt_epi64 (src1.x[i / 2], src2.x[i / 2]); + + for (i = 0; i < NUM; i++) + { + is_eq = src1.ll[i] > src2.ll[i] ? 0xFFFFFFFFFFFFFFFFLL : 0LL; + if (is_eq != dst.ll[i]) + abort (); + } +}