===================================================================
@@ -10393,14 +10393,17 @@
})
(define_expand "insv"
- [(set (zero_extract (match_operand 0 "ext_register_operand" "")
- (match_operand 1 "const8_operand" "")
- (match_operand 2 "const8_operand" ""))
+ [(set (zero_extract (match_operand 0 "register_operand" "")
+ (match_operand 1 "const_int_operand" "")
+ (match_operand 2 "const_int_operand" ""))
(match_operand 3 "register_operand" ""))]
""
{
rtx (*gen_mov_insv_1) (rtx, rtx);
+ if (ix86_expand_pinsr (operands))
+ DONE;
+
/* Handle insertions to %ah et al. */
if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
FAIL;
===================================================================
@@ -6051,7 +6051,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_pinsrb"
+(define_insn "sse4_1_pinsrb"
[(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
(vec_merge:V16QI
(vec_duplicate:V16QI
@@ -6083,7 +6083,7 @@
(set_attr "prefix" "orig,orig,vex,vex")
(set_attr "mode" "TI")])
-(define_insn "*sse2_pinsrw"
+(define_insn "sse2_pinsrw"
[(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
(vec_merge:V8HI
(vec_duplicate:V8HI
@@ -6117,7 +6117,7 @@
(set_attr "mode" "TI")])
;; It must come before sse2_loadld since it is preferred.
-(define_insn "*sse4_1_pinsrd"
+(define_insn "sse4_1_pinsrd"
[(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_merge:V4SI
(vec_duplicate:V4SI
@@ -6145,7 +6145,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "*sse4_1_pinsrq"
+(define_insn "sse4_1_pinsrq"
[(set (match_operand:V2DI 0 "register_operand" "=x,x")
(vec_merge:V2DI
(vec_duplicate:V2DI
===================================================================
@@ -203,6 +203,7 @@ extern void ix86_expand_vector_extract (
extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
+extern bool ix86_expand_pinsr (rtx *);
/* In i386-c.c */
extern void ix86_target_macros (void);
===================================================================
@@ -34106,6 +34106,88 @@ ix86_expand_vec_extract_even_odd (rtx ta
/* ... or we use the special-case patterns. */
expand_vec_perm_even_odd_1 (&d, odd);
}
+
+/* Expand an insert into a vector register through pinsr insn.
+ Return true if successful. */
+
+bool
+ix86_expand_pinsr (rtx *operands)
+{
+ rtx dst = operands[0];
+ rtx src = operands[3];
+
+ unsigned int size = INTVAL (operands[1]);
+ unsigned int pos = INTVAL (operands[2]);
+
+ if (GET_CODE (dst) == SUBREG)
+ {
+ pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
+ dst = SUBREG_REG (dst);
+ }
+
+ if (GET_CODE (src) == SUBREG)
+ src = SUBREG_REG (src);
+
+ switch (GET_MODE (dst))
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ {
+ enum machine_mode srcmode, dstmode;
+ rtx (*pinsr)(rtx, rtx, rtx, rtx);
+
+ srcmode = mode_for_size (size, MODE_INT, 0);
+
+ switch (srcmode)
+ {
+ case QImode:
+ if (!TARGET_SSE4_1)
+ return false;
+ dstmode = V16QImode;
+ pinsr = gen_sse4_1_pinsrb;
+ break;
+
+ case HImode:
+ if (!TARGET_SSE2)
+ return false;
+ dstmode = V8HImode;
+ pinsr = gen_sse2_pinsrw;
+ break;
+
+ case SImode:
+ if (!TARGET_SSE4_1)
+ return false;
+ dstmode = V4SImode;
+ pinsr = gen_sse4_1_pinsrd;
+ break;
+
+ case DImode:
+ gcc_assert (TARGET_64BIT);
+ if (!TARGET_SSE4_1)
+ return false;
+ dstmode = V2DImode;
+ pinsr = gen_sse4_1_pinsrq;
+ break;
+
+ default:
+ return false;
+ }
+
+ dst = gen_lowpart (dstmode, dst);
+ src = gen_lowpart (srcmode, src);
+
+ pos /= size;
+
+ emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
+ return true;
+ }
+
+ default:
+ return false;
+ }
+}
/* This function returns the calling abi specific va_list type node.
It returns the FNDECL specific va_list type. */
===================================================================
@@ -0,0 +1,86 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -msse2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+#include <string.h>
+
+#define msk0 0x00
+#define msk1 0x01
+#define msk2 0x02
+#define msk3 0x03
+#define msk4 0x04
+#define msk5 0x05
+#define msk6 0x06
+#define msk7 0x07
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x;
+ unsigned int i[4];
+ unsigned short s[8];
+ } res [8], val, tmp;
+ int masks[8];
+ unsigned short ins[4] = { 3, 4, 5, 6 };
+ int i;
+
+ val.i[0] = 0x35251505;
+ val.i[1] = 0x75655545;
+ val.i[2] = 0xB5A59585;
+ val.i[3] = 0xF5E5D5C5;
+
+ /* Check pinsrw imm8, r32, xmm. */
+ res[0].x = _mm_insert_epi16 (val.x, ins[0], msk0);
+ res[1].x = _mm_insert_epi16 (val.x, ins[0], msk1);
+ res[2].x = _mm_insert_epi16 (val.x, ins[0], msk2);
+ res[3].x = _mm_insert_epi16 (val.x, ins[0], msk3);
+ res[4].x = _mm_insert_epi16 (val.x, ins[0], msk4);
+ res[5].x = _mm_insert_epi16 (val.x, ins[0], msk5);
+ res[6].x = _mm_insert_epi16 (val.x, ins[0], msk6);
+ res[7].x = _mm_insert_epi16 (val.x, ins[0], msk7);
+
+ masks[0] = msk0;
+ masks[1] = msk1;
+ masks[2] = msk2;
+ masks[3] = msk3;
+ masks[4] = msk4;
+ masks[5] = msk5;
+ masks[6] = msk6;
+ masks[7] = msk7;
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp.x = val.x;
+ tmp.s[masks[i]] = ins[0];
+ if (memcmp (&tmp, &res[i], sizeof (tmp)))
+ abort ();
+ }
+
+ /* Check pinsrw imm8, m16, xmm. */
+ for (i = 0; i < 8; i++)
+ {
+ res[i].x = _mm_insert_epi16 (val.x, ins[i % 2], msk0);
+ masks[i] = msk0;
+ }
+
+ for (i = 0; i < 8; i++)
+ {
+ tmp.x = val.x;
+ tmp.s[masks[i]] = ins[i % 2];
+ if (memcmp (&tmp, &res[i], sizeof (tmp)))
+ abort ();
+ }
+}
===================================================================
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+typedef long T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, long x)
+{
+ struct S s;
+
+ s.d = y;
+ ((T *) &s.d)[1] = x;
+ return s.d;
+}
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x;
+ unsigned int i[4];
+ unsigned long l[2];
+ } res, val, tmp;
+ unsigned long ins[4] = { 3, 4, 5, 6 };
+
+ val.i[0] = 0x35251505;
+ val.i[1] = 0x75655545;
+ val.i[2] = 0xB5A59585;
+ val.i[3] = 0xF5E5D5C5;
+
+ res.x = foo (val.x, ins[3]);
+
+ tmp.x = val.x;
+ tmp.l[1] = ins[3];
+ if (memcmp (&tmp, &res, sizeof (tmp)))
+ abort ();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+typedef char T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, char x)
+{
+ struct S s;
+
+ s.d = y;
+ ((T *) &s.d)[1] = x;
+ return s.d;
+}
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x;
+ unsigned int i[4];
+ unsigned char c[16];
+ } res, val, tmp;
+ unsigned char ins[4] = { 3, 4, 5, 6 };
+
+ val.i[0] = 0x35251505;
+ val.i[1] = 0x75655545;
+ val.i[2] = 0xB5A59585;
+ val.i[3] = 0xF5E5D5C5;
+
+ res.x = foo (val.x, ins[3]);
+
+ tmp.x = val.x;
+ tmp.c[1] = ins[3];
+ if (memcmp (&tmp, &res, sizeof (tmp)))
+ abort ();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -msse2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+#include <string.h>
+
+typedef short T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, short x)
+{
+ struct S s;
+
+ s.d = y;
+ ((T *) &s.d)[1] = x;
+ return s.d;
+}
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x;
+ unsigned int i[4];
+ unsigned short s[8];
+ } res, val, tmp;
+ unsigned short ins[4] = { 3, 4, 5, 6 };
+
+ val.i[0] = 0x35251505;
+ val.i[1] = 0x75655545;
+ val.i[2] = 0xB5A59585;
+ val.i[3] = 0xF5E5D5C5;
+
+ res.x = foo (val.x, ins[3]);
+
+ tmp.x = val.x;
+ tmp.s[1] = ins[3];
+ if (memcmp (&tmp, &res, sizeof (tmp)))
+ abort ();
+}
===================================================================
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse2-pinsrw.c"
===================================================================
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include <smmintrin.h>
+#include <string.h>
+
+typedef int T __attribute__((may_alias));
+struct S { __m128i d; };
+
+__m128i
+__attribute__((noinline))
+foo (__m128i y, int x)
+{
+ struct S s;
+
+ s.d = y;
+ ((T *) &s.d)[1] = x;
+ return s.d;
+}
+
+static void
+TEST (void)
+{
+ union
+ {
+ __m128i x;
+ unsigned int i[4];
+ } res, val, tmp;
+ unsigned int ins[4] = { 3, 4, 5, 6 };
+
+ val.i[0] = 0x35251505;
+ val.i[1] = 0x75655545;
+ val.i[2] = 0xB5A59585;
+ val.i[3] = 0xF5E5D5C5;
+
+ res.x = foo (val.x, ins[3]);
+
+ tmp.x = val.x;
+ tmp.i[1] = ins[3];
+ if (memcmp (&tmp, &res, sizeof (tmp)))
+ abort ();
+}