Message ID | 20210716135022.489455-3-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Add SSE4.1 "blend", "ceil", "floor" | expand |
Hi Paul, Thanks for the cleanup, LGTM! Recommend maintainers approve. Bill On 7/16/21 8:50 AM, Paul A. Clarke wrote: > Copy the tests for _mm_blend_pd, _mm_blendv_pd, _mm_blend_ps, > _mm_blendv_ps from gcc/testsuite/gcc.target/i386. > > 2021-07-16 Paul A. Clarke <pc@us.ibm.com> > > gcc/testsuite > * gcc.target/powerpc/sse4_1-blendpd.c: Copy from gcc.target/i386. > * gcc.target/powerpc/sse4_1-blendps-2.c: Likewise. > * gcc.target/powerpc/sse4_1-blendps.c: Likewise. > * gcc.target/powerpc/sse4_1-blendvpd.c: Likewise. > --- > v2: Improve formatting per review from Bill. > > .../gcc.target/powerpc/sse4_1-blendpd.c | 89 ++++++++++++++++++ > .../gcc.target/powerpc/sse4_1-blendps-2.c | 81 +++++++++++++++++ > .../gcc.target/powerpc/sse4_1-blendps.c | 90 +++++++++++++++++++ > .../gcc.target/powerpc/sse4_1-blendvpd.c | 65 ++++++++++++++ > 4 files changed, 325 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-blendpd.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-blendps-2.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-blendps.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-blendvpd.c > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendpd.c > new file mode 100644 > index 000000000000..ca1780471fa2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendpd.c > @@ -0,0 +1,89 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target p8vector_hw } */ > +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse4_1-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse4_1_test > +#endif > + > +#include CHECK_H > + > +#include <smmintrin.h> > +#include <string.h> > + > +#define NUM 20 > + > +#ifndef MASK > +#define MASK 0x03 > +#endif > + > +static void > +init_blendpd (double *src1, double *src2) > +{ > + int i, sign = 1; > + > + for (i = 0; i < NUM * 2; i++) > + { > + src1[i] = i * i * sign; > + src2[i] = (i + 20) * sign; > + sign = -sign; > + } > +} > + > +static int > +check_blendpd (__m128d *dst, double *src1, double *src2) > +{ > + double tmp[2]; > + int j; > + > + memcpy (&tmp[0], src1, sizeof (tmp)); > + > + for(j = 0; j < 2; j++) > + if ((MASK & (1 << j))) > + tmp[j] = src2[j]; > + > + return memcmp (dst, &tmp[0], sizeof (tmp)); > +} > + > +static void > +TEST (void) > +{ > + __m128d x, y; > + union > + { > + __m128d x[NUM]; > + double d[NUM * 2]; > + } dst, src1, src2; > + union > + { > + __m128d x; > + double d[2]; > + } src3; > + int i; > + > + init_blendpd (src1.d, src2.d); > + > + /* Check blendpd imm8, m128, xmm */ > + for (i = 0; i < NUM; i++) > + { > + dst.x[i] = _mm_blend_pd (src1.x[i], src2.x[i], MASK); > + if (check_blendpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2])) > + abort (); > + } > + > + /* Check blendpd imm8, xmm, xmm */ > + src3.x = _mm_setzero_pd (); > + > + x = _mm_blend_pd (dst.x[2], src3.x, MASK); > + y = _mm_blend_pd (src3.x, dst.x[2], MASK); > + > + if (check_blendpd (&x, &dst.d[4], &src3.d[0])) > + abort (); > + > + if (check_blendpd (&y, &src3.d[0], &dst.d[4])) > + abort (); > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps-2.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps-2.c > new file mode 100644 > index 000000000000..768b6e64bbae > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps-2.c > @@ -0,0 +1,81 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target p8vector_hw } */ > +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ > + > +#include "sse4_1-check.h" > + > +#include <smmintrin.h> > +#include <string.h> > +#include <stdlib.h> > + > +#define NUM 20 > + > +#undef MASK > +#define MASK 0xe > + > +static void > +init_blendps (float *src1, float *src2) > +{ > + int i, sign = 1; > + > + for (i = 0; i < NUM * 4; i++) > + { > + src1[i] = i * i * sign; > + src2[i] = (i + 20) * sign; > + sign = -sign; > + } > +} > + > +static int > +check_blendps (__m128 *dst, float *src1, float *src2) > +{ > + float tmp[4]; > + int j; > + > + memcpy (&tmp[0], src1, sizeof (tmp)); > + for (j = 0; j < 4; j++) > + if ((MASK & (1 << j))) > + tmp[j] = src2[j]; > + > + return memcmp (dst, &tmp[0], sizeof (tmp)); > +} > + > +static void > +sse4_1_test (void) > +{ > + __m128 x, y; > + union > + { > + __m128 x[NUM]; > + float f[NUM * 4]; > + } dst, src1, src2; > + union > + { > + __m128 x; > + float f[4]; > + } src3; > + int i; > + > + init_blendps (src1.f, src2.f); > + > + for (i = 0; i < 4; i++) > + src3.f[i] = (int) rand (); > + > + /* Check blendps imm8, m128, xmm */ > + for (i = 0; i < NUM; i++) > + { > + dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK); > + if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4])) > + abort (); > + } > + > + /* Check blendps imm8, xmm, xmm */ > + x = _mm_blend_ps (dst.x[2], src3.x, MASK); > + y = _mm_blend_ps (src3.x, dst.x[2], MASK); > + > + if (check_blendps (&x, &dst.f[8], &src3.f[0])) > + abort (); > + > + if (check_blendps (&y, &src3.f[0], &dst.f[8])) > + abort (); > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps.c > new file mode 100644 > index 000000000000..2f114b69a84b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps.c > @@ -0,0 +1,90 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target p8vector_hw } */ > +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse4_1-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse4_1_test > +#endif > + > +#include CHECK_H > + > +#include <smmintrin.h> > +#include <string.h> > +#include <stdlib.h> > + > +#define NUM 20 > + > +#ifndef MASK > +#define MASK 0x0f > +#endif > + > +static void > +init_blendps (float *src1, float *src2) > +{ > + int i, sign = 1; > + > + for (i = 0; i < NUM * 4; i++) > + { > + src1[i] = i * i * sign; > + src2[i] = (i + 20) * sign; > + sign = -sign; > + } > +} > + > +static int > +check_blendps (__m128 *dst, float *src1, float *src2) > +{ > + float tmp[4]; > + int j; > + > + memcpy (&tmp[0], src1, sizeof (tmp)); > + for (j = 0; j < 4; j++) > + if ((MASK & (1 << j))) > + tmp[j] = src2[j]; > + > + return memcmp (dst, &tmp[0], sizeof (tmp)); > +} > + > +static void > +TEST (void) > +{ > + __m128 x, y; > + union > + { > + __m128 x[NUM]; > + float f[NUM * 4]; > + } dst, src1, src2; > + union > + { > + __m128 x; > + float f[4]; > + } src3; > + int i; > + > + init_blendps (src1.f, src2.f); > + > + for (i = 0; i < 4; i++) > + src3.f[i] = (int) rand (); > + > + /* Check blendps imm8, m128, xmm */ > + for (i = 0; i < NUM; i++) > + { > + dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK); > + if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4])) > + abort (); > + } > + > + /* Check blendps imm8, xmm, xmm */ > + x = _mm_blend_ps (dst.x[2], src3.x, MASK); > + y = _mm_blend_ps (src3.x, dst.x[2], MASK); > + > + if (check_blendps (&x, &dst.f[8], &src3.f[0])) > + abort (); > + > + if (check_blendps (&y, &src3.f[0], &dst.f[8])) > + abort (); > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendvpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendvpd.c > new file mode 100644 > index 000000000000..b82cd28848a6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendvpd.c > @@ -0,0 +1,65 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target p8vector_hw } */ > +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ > + > +#include "sse4_1-check.h" > + > +#include <smmintrin.h> > +#include <string.h> > + > +#define NUM 20 > + > +static void > +init_blendvpd (double *src1, double *src2, double *mask) > +{ > + int i, msk, sign = 1; > + > + msk = -1; > + for (i = 0; i < NUM * 2; i++) > + { > + if((i % 2) == 0) > + msk++; > + src1[i] = i* (i + 1) * sign; > + src2[i] = (i + 20) * sign; > + mask[i] = (i + 120) * i; > + if( (msk & (1 << (i % 2)))) > + mask[i] = -mask[i]; > + sign = -sign; > + } > +} > + > +static int > +check_blendvpd (__m128d *dst, double *src1, double *src2, > + double *mask) > +{ > + double tmp[2]; > + int j; > + > + memcpy (&tmp[0], src1, sizeof (tmp)); > + for (j = 0; j < 2; j++) > + if (mask [j] < 0.0) > + tmp[j] = src2[j]; > + > + return memcmp (dst, &tmp[0], sizeof (tmp)); > +} > + > +static void > +sse4_1_test (void) > +{ > + union > + { > + __m128d x[NUM]; > + double d[NUM * 2]; > + } dst, src1, src2, mask; > + int i; > + > + init_blendvpd (src1.d, src2.d, mask.d); > + > + for (i = 0; i < NUM; i++) > + { > + dst.x[i] = _mm_blendv_pd (src1.x[i], src2.x[i], mask.x[i]); > + if (check_blendvpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2], > + &mask.d[i * 2])) > + abort (); > + } > +}
Hi! On Fri, Jul 16, 2021 at 08:50:18AM -0500, Paul A. Clarke wrote: > Copy the tests for _mm_blend_pd, _mm_blendv_pd, _mm_blend_ps, > _mm_blendv_ps from gcc/testsuite/gcc.target/i386. You get less messy series in cases like this if you just put the tests in the same patch as the code it tests (which works fine with Git by default, it sorts everything in gcc/testsuite/ after everything in gcc/config/ after all, so the important stuff is first in your patch). > gcc/testsuite > * gcc.target/powerpc/sse4_1-blendpd.c: Copy from gcc.target/i386. > * gcc.target/powerpc/sse4_1-blendps-2.c: Likewise. > * gcc.target/powerpc/sse4_1-blendps.c: Likewise. > * gcc.target/powerpc/sse4_1-blendvpd.c: Likewise. Well, they aren't exact copies, the dg-* statements are different (to make it run only on a p8 or up, and enabling generating p8 code). So maybe say that? Okay for trunk. Thanks! Segher
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendpd.c new file mode 100644 index 000000000000..ca1780471fa2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendpd.c @@ -0,0 +1,89 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +#ifndef MASK +#define MASK 0x03 +#endif + +static void +init_blendpd (double *src1, double *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 2; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_blendpd (__m128d *dst, double *src1, double *src2) +{ + double tmp[2]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + + for(j = 0; j < 2; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +TEST (void) +{ + __m128d x, y; + union + { + __m128d x[NUM]; + double d[NUM * 2]; + } dst, src1, src2; + union + { + __m128d x; + double d[2]; + } src3; + int i; + + init_blendpd (src1.d, src2.d); + + /* Check blendpd imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_pd (src1.x[i], src2.x[i], MASK); + if (check_blendpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2])) + abort (); + } + + /* Check blendpd imm8, xmm, xmm */ + src3.x = _mm_setzero_pd (); + + x = _mm_blend_pd (dst.x[2], src3.x, MASK); + y = _mm_blend_pd (src3.x, dst.x[2], MASK); + + if (check_blendpd (&x, &dst.d[4], &src3.d[0])) + abort (); + + if (check_blendpd (&y, &src3.d[0], &dst.d[4])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps-2.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps-2.c new file mode 100644 index 000000000000..768b6e64bbae --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps-2.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> +#include <stdlib.h> + +#define NUM 20 + +#undef MASK +#define MASK 0xe + +static void +init_blendps (float *src1, float *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 4; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_blendps (__m128 *dst, float *src1, float *src2) +{ + float tmp[4]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 4; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + __m128 x, y; + union + { + __m128 x[NUM]; + float f[NUM * 4]; + } dst, src1, src2; + union + { + __m128 x; + float f[4]; + } src3; + int i; + + init_blendps (src1.f, src2.f); + + for (i = 0; i < 4; i++) + src3.f[i] = (int) rand (); + + /* Check blendps imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK); + if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4])) + abort (); + } + + /* Check blendps imm8, xmm, xmm */ + x = _mm_blend_ps (dst.x[2], src3.x, MASK); + y = _mm_blend_ps (src3.x, dst.x[2], MASK); + + if (check_blendps (&x, &dst.f[8], &src3.f[0])) + abort (); + + if (check_blendps (&y, &src3.f[0], &dst.f[8])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps.c new file mode 100644 index 000000000000..2f114b69a84b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendps.c @@ -0,0 +1,90 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include <smmintrin.h> +#include <string.h> +#include <stdlib.h> + +#define NUM 20 + +#ifndef MASK +#define MASK 0x0f +#endif + +static void +init_blendps (float *src1, float *src2) +{ + int i, sign = 1; + + for (i = 0; i < NUM * 4; i++) + { + src1[i] = i * i * sign; + src2[i] = (i + 20) * sign; + sign = -sign; + } +} + +static int +check_blendps (__m128 *dst, float *src1, float *src2) +{ + float tmp[4]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 4; j++) + if ((MASK & (1 << j))) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +TEST (void) +{ + __m128 x, y; + union + { + __m128 x[NUM]; + float f[NUM * 4]; + } dst, src1, src2; + union + { + __m128 x; + float f[4]; + } src3; + int i; + + init_blendps (src1.f, src2.f); + + for (i = 0; i < 4; i++) + src3.f[i] = (int) rand (); + + /* Check blendps imm8, m128, xmm */ + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK); + if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4])) + abort (); + } + + /* Check blendps imm8, xmm, xmm */ + x = _mm_blend_ps (dst.x[2], src3.x, MASK); + y = _mm_blend_ps (src3.x, dst.x[2], MASK); + + if (check_blendps (&x, &dst.f[8], &src3.f[0])) + abort (); + + if (check_blendps (&y, &src3.f[0], &dst.f[8])) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-blendvpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendvpd.c new file mode 100644 index 000000000000..b82cd28848a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-blendvpd.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ + +#include "sse4_1-check.h" + +#include <smmintrin.h> +#include <string.h> + +#define NUM 20 + +static void +init_blendvpd (double *src1, double *src2, double *mask) +{ + int i, msk, sign = 1; + + msk = -1; + for (i = 0; i < NUM * 2; i++) + { + if((i % 2) == 0) + msk++; + src1[i] = i* (i + 1) * sign; + src2[i] = (i + 20) * sign; + mask[i] = (i + 120) * i; + if( (msk & (1 << (i % 2)))) + mask[i] = -mask[i]; + sign = -sign; + } +} + +static int +check_blendvpd (__m128d *dst, double *src1, double *src2, + double *mask) +{ + double tmp[2]; + int j; + + memcpy (&tmp[0], src1, sizeof (tmp)); + for (j = 0; j < 2; j++) + if (mask [j] < 0.0) + tmp[j] = src2[j]; + + return memcmp (dst, &tmp[0], sizeof (tmp)); +} + +static void +sse4_1_test (void) +{ + union + { + __m128d x[NUM]; + double d[NUM * 2]; + } dst, src1, src2, mask; + int i; + + init_blendvpd (src1.d, src2.d, mask.d); + + for (i = 0; i < NUM; i++) + { + dst.x[i] = _mm_blendv_pd (src1.x[i], src2.x[i], mask.x[i]); + if (check_blendvpd (&dst.x[i], &src1.d[i * 2], &src2.d[i * 2], + &mask.d[i * 2])) + abort (); + } +}