@@ -429,6 +429,11 @@ scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
if (!HARD_REGISTER_P (DF_REF_REG (ref)))
analyze_register_chain (candidates, ref);
+
+ /* The operand(s) of VEC_SELECT don't need to be converted/convertible. */
+ if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT)
+ return;
+
for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
if (!DF_REF_REG_MEM_P (ref))
analyze_register_chain (candidates, ref);
@@ -629,6 +634,23 @@ general_scalar_chain::compute_convert_gain ()
}
break;
+ case VEC_SELECT:
+ if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
+ {
+ // movd (4 bytes) replaced with movdqa (4 bytes).
+ if (!optimize_insn_for_size_p ())
+ igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move;
+ }
+ else
+ {
+ // pshufd; movd replaced with pshufd.
+ if (optimize_insn_for_size_p ())
+ igain += COSTS_N_BYTES (4);
+ else
+ igain += ix86_cost->sse_to_integer;
+ }
+ break;
+
default:
gcc_unreachable ();
}
@@ -1167,6 +1189,24 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
convert_op (&src, insn);
break;
+ case VEC_SELECT:
+ if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
+ src = XEXP (src, 0);
+ else if (smode == DImode)
+ {
+ rtx tmp = gen_lowpart (V1TImode, XEXP (src, 0));
+ dst = gen_lowpart (V1TImode, dst);
+ src = gen_rtx_LSHIFTRT (V1TImode, tmp, GEN_INT (64));
+ }
+ else
+ {
+ rtx tmp = XVECEXP (XEXP (src, 1), 0, 0);
+ rtvec vec = gen_rtvec (4, tmp, tmp, tmp, tmp);
+ rtx par = gen_rtx_PARALLEL (VOIDmode, vec);
+ src = gen_rtx_VEC_SELECT (vmode, XEXP (src, 0), par);
+ }
+ break;
+
default:
gcc_unreachable ();
}
@@ -1917,6 +1957,16 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
case CONST_INT:
return REG_P (dst);
+ case VEC_SELECT:
+ /* Excluding MEM_P (dst) avoids intefering with vpextr[dq]. */
+ return REG_P (dst)
+ && REG_P (XEXP (src, 0))
+ && GET_MODE (XEXP (src, 0)) == (mode == DImode ? V2DImode
+ : V4SImode)
+ && GET_CODE (XEXP (src, 1)) == PARALLEL
+ && XVECLEN (XEXP (src, 1), 0) == 1
+ && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
+
default:
return false;
}
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mstv -mno-stackrealign" } */
+typedef unsigned int v4si __attribute__((vector_size(16)));
+
+unsigned int foo1 (v4si a, v4si b)
+{
+ a[0] += b[0];
+ return a[0] + a[1];
+}
+
+unsigned int foo2 (v4si a, v4si b)
+{
+ a[0] += b[0];
+ return a[0] + a[2];
+}
+
+unsigned int foo3 (v4si a, v4si b)
+{
+ a[0] += b[0];
+ return a[0] + a[3];
+}
+
+/* { dg-final { scan-assembler-times "\tmovd\t" 3 } } */
+/* { dg-final { scan-assembler-times "paddd" 6 } } */
+/* { dg-final { scan-assembler-not "addl" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mstv -mno-stackrealign" } */
+typedef unsigned long long v2di __attribute__((vector_size(16)));
+
+unsigned long long foo(v2di a, v2di b)
+{
+ a[0] += b[0];
+ return a[0] + a[1];
+}
+
+/* { dg-final { scan-assembler-not "\taddq\t" } } */
+/* { dg-final { scan-assembler-times "paddq" 2 } } */
+/* { dg-final { scan-assembler "psrldq" } } */