@@ -2092,6 +2092,55 @@ (define_insn "*<insn><mode>3"
(set_attr "type" "sseadd")
(set_attr "mode" "TI")])
+(define_expand "mulv2si3"
+ [(set (match_operand:V2SI 0 "register_operand")
+ (mult:V2SI
+ (match_operand:V2SI 1 "register_operand")
+ (match_operand:V2SI 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ if (!TARGET_SSE4_1)
+ {
+ rtx op1 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[1]),
+ V2SImode);
+ rtx op2 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[2]),
+ V2SImode);
+
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vec_interleave_lowv4si (tmp1, op1, op1));
+ rtx tmp2 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vec_interleave_lowv4si (tmp2, op2, op2));
+
+ rtx res = gen_reg_rtx (V2DImode);
+ emit_insn (gen_vec_widen_umult_even_v4si (res, tmp1, tmp2));
+
+ rtx op0 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_pshufd_1 (op0, gen_lowpart (V4SImode, res),
+ const0_rtx, const2_rtx,
+ const0_rtx, const2_rtx));
+
+ emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
+ DONE;
+ }
+})
+
+(define_insn "*mulv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
+ (mult:V2SI
+ (match_operand:V2SI 1 "register_operand" "%0,0,v")
+ (match_operand:V2SI 2 "register_operand" "Yr,*x,v")))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "@
+ pmulld\t{%2, %0|%0, %2}
+ pmulld\t{%2, %0|%0, %2}
+ vpmulld\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector")
+ (set_attr "mode" "TI")])
+
(define_expand "mmx_mulv4hi3"
[(set (match_operand:V4HI 0 "register_operand")
(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#define N 2
+
+int a[N] = {-287807, 604344};
+int b[N] = {474362, 874120};
+int r[N];
+
+int rc[N] = {914249338, -11800128};
+
+static void
+sse2_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ r[i] = a[i] * b[i];
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ if (r[i] != rc[i])
+ abort ();
+}