===================================================================
@@ -66,4 +66,24 @@ _mm_extract_ps (__m128 __X, const int __N)
return ((__v4si)__X)[__N & 3];
}
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
+{
+ __v16qi __charmask = vec_splats ((signed char) __imm8);
+ __charmask = vec_gb (__charmask);
+ __v8hu __shortmask = (__v8hu) vec_unpackh (__charmask);
+ #ifdef __BIG_ENDIAN__
+ __shortmask = vec_reve (__shortmask);
+ #endif
+ return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
+{
+ const __v16qu __seven = vec_splats ((unsigned char) 0x07);
+ __v16qu __lmask = vec_sra ((__v16qu) __mask, __seven);
+ return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
+}
+
#endif
===================================================================
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "m128-check.h"
+
+//#define DEBUG 1
+
+#define TEST sse4_1_test
+
+static void sse4_1_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+ sse4_1_test ();
+}
+
+int
+main ()
+{
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+}
===================================================================
@@ -0,0 +1,71 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+static void
+init_pblendvb (unsigned char *src1, unsigned char *src2,
+ unsigned char *mask)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 16; i++)
+ {
+ src1[i] = i* i * sign;
+ src2[i] = (i + 20) * sign;
+ mask[i] = (i % 3) + ((i * (14 + sign))
+ ^ (src1[i] | src2[i] | (i*3)));
+ sign = -sign;
+ }
+}
+
+static int
+check_pblendvb (__m128i *dst, unsigned char *src1,
+ unsigned char *src2, unsigned char *mask)
+{
+ unsigned char tmp[16];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 16; j++)
+ if (mask [j] & 0x80)
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x[NUM];
+ unsigned char c[NUM * 16];
+ } dst, src1, src2, mask;
+ int i;
+
+ init_pblendvb (src1.c, src2.c, mask.c);
+
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blendv_epi8 (src1.x[i], src2.x[i], mask.x[i]);
+ if (check_pblendvb (&dst.x[i], &src1.c[i * 16], &src2.c[i * 16],
+ &mask.c[i * 16]))
+ abort ();
+ }
+}
===================================================================
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#include "sse4_1-check.h"
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#undef MASK
+#define MASK 0xfe
+
+static void
+init_pblendw (short *src1, short *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 8; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_pblendw (__m128i *dst, short *src1, short *src2)
+{
+ short tmp[8];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 8; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+sse4_1_test (void)
+{
+ __m128i x, y;
+ union
+ {
+ __m128i x[NUM];
+ short s[NUM * 8];
+ } dst, src1, src2;
+ union
+ {
+ __m128i x;
+ short s[8];
+ } src3;
+ int i;
+
+ init_pblendw (src1.s, src2.s);
+
+ /* Check pblendw imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
+ if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
+ abort ();
+ }
+
+ /* Check pblendw imm8, xmm, xmm */
+ src3.x = _mm_setzero_si128 ();
+
+ x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
+ y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
+
+ if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
+ abort ();
+
+ if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
+ abort ();
+}
===================================================================
@@ -0,0 +1,89 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#ifndef MASK
+#define MASK 0x0f
+#endif
+
+static void
+init_pblendw (short *src1, short *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 8; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_pblendw (__m128i *dst, short *src1, short *src2)
+{
+ short tmp[8];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 8; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+TEST (void)
+{
+ __m128i x, y;
+ union
+ {
+ __m128i x[NUM];
+ short s[NUM * 8];
+ } dst, src1, src2;
+ union
+ {
+ __m128i x;
+ short s[8];
+ } src3;
+ int i;
+
+ init_pblendw (src1.s, src2.s);
+
+ /* Check pblendw imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
+ if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
+ abort ();
+ }
+
+ /* Check pblendw imm8, xmm, xmm */
+ src3.x = _mm_setzero_si128 ();
+
+ x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
+ y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
+
+ if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
+ abort ();
+
+ if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
+ abort ();
+}