@@ -18634,10 +18634,26 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
case E_V2DFmode:
case E_V4SFmode:
case E_V2DImode:
+ case E_V2SImode:
case E_V4SImode:
/* These are always directly implementable by expand_vec_perm_1. */
gcc_unreachable ();
+ case E_V4HImode:
+ if (d->testing_p)
+ break;
+ /* We need 2*log2(N)-1 operations to achieve odd/even
+ with interleave. */
+ t1 = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mmx_punpckhwd (t1, d->op0, d->op1));
+ emit_insn (gen_mmx_punpcklwd (d->target, d->op0, d->op1));
+ if (odd)
+ t2 = gen_mmx_punpckhwd (d->target, d->target, t1);
+ else
+ t2 = gen_mmx_punpcklwd (d->target, d->target, t1);
+ emit_insn (t2);
+ break;
+
case E_V8HImode:
if (TARGET_SSE4_1)
return expand_vec_perm_even_odd_pack (d);
@@ -18820,6 +18836,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
case E_V2DFmode:
case E_V2DImode:
case E_V4SFmode:
+ case E_V2SImode:
case E_V4SImode:
/* These are always implementable using standard shuffle patterns. */
gcc_unreachable ();
@@ -19312,6 +19329,11 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
if (d.testing_p && TARGET_SSSE3)
return true;
break;
+ case E_V2SImode:
+ case E_V4HImode:
+ if (!TARGET_MMX_WITH_SSE)
+ return false;
+ break;
case E_V2DImode:
case E_V2DFmode:
if (!TARGET_SSE)
@@ -19344,7 +19366,9 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
d.one_operand_p = (which != 3);
/* Implementable with shufps or pshufd. */
- if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
+ if (d.one_operand_p
+ && (d.vmode == V4SFmode
+ || d.vmode == V4SImode || d.vmode == V2SImode))
return true;
/* Otherwise we have to go through the motions and see if we can
@@ -1988,6 +1988,28 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI,TI")])
+(define_insn "*mmx_pshufd_1"
+ [(set (match_operand:V2SI 0 "register_operand" "=Yv")
+ (vec_select:V2SI
+ (match_operand:V2SI 1 "register_operand" "Yv")
+ (parallel [(match_operand 2 "const_0_to_1_operand")
+ (match_operand 3 "const_0_to_1_operand")])))]
+ "TARGET_MMX_WITH_SSE"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= 2 << 4;
+ mask |= 3 << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
(define_insn "mmx_pswapdv2si2"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_select:V2SI
new file mode 100644
@@ -0,0 +1,41 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef int S;
+typedef int V __attribute__((vector_size(8)));
+typedef int IV __attribute__((vector_size(8)));
+typedef union { S s[2]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1) \
+ b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
+ c.s[0] = i[0].s[E0]; \
+ c.s[1] = i[0].s[E1]; \
+ __asm__("" : : : "memory"); \
+ assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-2-2.inc"
+
+int main()
+{
+ check_isa ();
+
+ if (!sse_os_support ())
+ exit (0);
+
+ i[0].s[0] = 0;
+ i[0].s[1] = 1;
+ i[0].s[2] = 2;
+ i[0].s[3] = 3;
+
+ check();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef short S;
+typedef short V __attribute__((vector_size(8)));
+typedef short IV __attribute__((vector_size(8)));
+typedef union { S s[4]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1, E2, E3) \
+ b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
+ c.s[0] = i[0].s[E0]; \
+ c.s[1] = i[0].s[E1]; \
+ c.s[2] = i[0].s[E2]; \
+ c.s[3] = i[0].s[E3]; \
+ __asm__("" : : : "memory"); \
+ assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-4-2.inc"
+
+int main()
+{
+ check_isa ();
+
+ if (!sse_os_support ())
+ exit (0);
+
+ i[0].s[0] = 0;
+ i[0].s[1] = 1;
+ i[0].s[2] = 2;
+ i[0].s[3] = 3;
+ i[0].s[4] = 4;
+ i[0].s[5] = 5;
+ i[0].s[6] = 6;
+ i[0].s[7] = 7;
+
+ check();
+ return 0;
+}