@@ -1124,8 +1124,9 @@ ix86_split_mmx_punpck (rtx operands[], bool high_p)
switch (mode)
{
- case E_V4QImode:
case E_V8QImode:
+ case E_V4QImode:
+ case E_V2QImode:
sse_mode = V16QImode;
double_sse_mode = V32QImode;
mask = gen_rtx_PARALLEL (VOIDmode,
@@ -5636,7 +5637,43 @@ ix86_expand_vec_perm (rtx operands[])
}
}
-/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
+/* Extend SRC into next wider integer vector type. UNSIGNED_P is
+ true if we should do zero extension, else sign extension. */
+
+void
+ix86_expand_sse_extend (rtx dest, rtx src, bool unsigned_p)
+{
+ machine_mode imode = GET_MODE (src);
+ rtx ops[3];
+
+ switch (imode)
+ {
+ case E_V8QImode:
+ case E_V4QImode:
+ case E_V2QImode:
+ case E_V4HImode:
+ case E_V2HImode:
+ case E_V2SImode:
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ ops[0] = gen_reg_rtx (imode);
+
+ ops[1] = force_reg (imode, src);
+
+ if (unsigned_p)
+ ops[2] = force_reg (imode, CONST0_RTX (imode));
+ else
+ ops[2] = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ src, pc_rtx, pc_rtx);
+
+ ix86_split_mmx_punpck (ops, false);
+ emit_move_insn (dest, lowpart_subreg (GET_MODE (dest), ops[0], imode));
+}
+
+/* Unpack SRC into the next wider integer vector type. UNSIGNED_P is
true if we should do zero extension, else sign extension. HIGH_P is
true if we want the N/2 high elements, else the low elements. */
@@ -155,6 +155,7 @@ extern bool ix86_expand_mask_vec_cmp (rtx, enum rtx_code, rtx, rtx);
extern bool ix86_expand_int_vec_cmp (rtx[]);
extern bool ix86_expand_fp_vec_cmp (rtx[]);
extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx);
+extern void ix86_expand_sse_extend (rtx, rtx, bool);
extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
extern void ix86_expand_fp_spaceship (rtx, rtx, rtx);
extern bool ix86_expand_int_addcc (rtx[]);
@@ -3744,8 +3744,14 @@ (define_expand "<insn>v4qiv4hi2"
[(set (match_operand:V4HI 0 "register_operand")
(any_extend:V4HI
(match_operand:V4QI 1 "register_operand")))]
- "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "TARGET_MMX_WITH_SSE"
{
+ if (!TARGET_SSE4_1)
+ {
+ ix86_expand_sse_extend (operands[0], operands[1], <u_bool>);
+ DONE;
+ }
+
rtx op1 = force_reg (V4QImode, operands[1]);
op1 = lowpart_subreg (V8QImode, op1, V4QImode);
emit_insn (gen_sse4_1_<code>v4qiv4hi2 (operands[0], op1));
@@ -3770,8 +3776,14 @@ (define_expand "<insn>v2hiv2si2"
[(set (match_operand:V2SI 0 "register_operand")
(any_extend:V2SI
(match_operand:V2HI 1 "register_operand")))]
- "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "TARGET_MMX_WITH_SSE"
{
+ if (!TARGET_SSE4_1)
+ {
+ ix86_expand_sse_extend (operands[0], operands[1], <u_bool>);
+ DONE;
+ }
+
rtx op1 = force_reg (V2HImode, operands[1]);
op1 = lowpart_subreg (V4HImode, op1, V2HImode);
emit_insn (gen_sse4_1_<code>v2hiv2si2 (operands[0], op1));
@@ -3822,8 +3834,14 @@ (define_expand "<insn>v2qiv2hi2"
[(set (match_operand:V2HI 0 "register_operand")
(any_extend:V2HI
(match_operand:V2QI 1 "register_operand")))]
- "TARGET_SSE4_1"
+ "TARGET_SSE2"
{
+ if (!TARGET_SSE4_1)
+ {
+ ix86_expand_sse_extend (operands[0], operands[1], <u_bool>);
+ DONE;
+ }
+
rtx op1 = force_reg (V2QImode, operands[1]);
op1 = lowpart_subreg (V4QImode, op1, V2QImode);
emit_insn (gen_sse4_1_<code>v2qiv2hi2 (operands[0], op1));
@@ -22919,8 +22919,15 @@ (define_expand "<insn>v8qiv8hi2"
[(set (match_operand:V8HI 0 "register_operand")
(any_extend:V8HI
(match_operand:V8QI 1 "nonimmediate_operand")))]
- "TARGET_SSE4_1"
+ "TARGET_SSE4_1 || TARGET_MMX_WITH_SSE"
{
+ if (!TARGET_SSE4_1)
+ {
+ rtx op1 = force_reg (V8QImode, operands[1]);
+ ix86_expand_sse_extend (operands[0], op1, <u_bool>);
+ DONE;
+ }
+
if (!MEM_P (operands[1]))
{
rtx op1 = force_reg (V8QImode, operands[1]);
@@ -23229,8 +23236,15 @@ (define_expand "<insn>v4hiv4si2"
[(set (match_operand:V4SI 0 "register_operand")
(any_extend:V4SI
(match_operand:V4HI 1 "nonimmediate_operand")))]
- "TARGET_SSE4_1"
+ "TARGET_SSE4_1 || TARGET_MMX_WITH_SSE"
{
+ if (!TARGET_SSE4_1)
+ {
+ rtx op1 = force_reg (V4HImode, operands[1]);
+ ix86_expand_sse_extend (operands[0], op1, <u_bool>);
+ DONE;
+ }
+
if (!MEM_P (operands[1]))
{
rtx op1 = force_reg (V4HImode, operands[1]);
@@ -23828,8 +23842,15 @@ (define_expand "<insn>v2siv2di2"
[(set (match_operand:V2DI 0 "register_operand")
(any_extend:V2DI
(match_operand:V2SI 1 "nonimmediate_operand")))]
- "TARGET_SSE4_1"
+ "TARGET_SSE4_1 || TARGET_MMX_WITH_SSE"
{
+ if (!TARGET_SSE4_1)
+ {
+ rtx op1 = force_reg (V2SImode, operands[1]);
+ ix86_expand_sse_extend (operands[0], op1, <u_bool>);
+ DONE;
+ }
+
if (!MEM_P (operands[1]))
{
rtx op1 = force_reg (V2SImode, operands[1]);
new file mode 100644
@@ -0,0 +1,52 @@
+/* PR target/111023 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse2 -mno-sse4.1" } */
+
+typedef char v16qi __attribute__((vector_size (16)));
+typedef short v8hi __attribute__((vector_size (16)));
+typedef int v4si __attribute__((vector_size (16)));
+typedef long long v2di __attribute__((vector_size (16)));
+
+void
+v8hi_v8qi (v8hi *dst, v16qi src)
+{
+ short tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8hi *) tem;
+}
+
+/* { dg-final { scan-assembler "pcmpgtb" } } */
+/* { dg-final { scan-assembler "punpcklbw" } } */
+
+void
+v4si_v4hi (v4si *dst, v8hi src)
+{
+ int tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4si *) tem;
+}
+
+/* { dg-final { scan-assembler "pcmpgtw" } } */
+/* { dg-final { scan-assembler "punpcklwd" } } */
+
+void
+v2di_v2si (v2di *dst, v4si src)
+{
+ long long tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler "pcmpgtd" } } */
+/* { dg-final { scan-assembler "punpckldq" } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* PR target/111023 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse2 -mno-sse4.1" } */
+
+typedef unsigned char v4qi __attribute__((vector_size (4)));
+typedef unsigned short v2hi __attribute__((vector_size (4)));
+
+void
+v2hi_v2qi (v2hi *dst, v4qi src)
+{
+ unsigned short tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2hi *) tem;
+}
+
+/* { dg-final { scan-assembler "punpcklbw" } } */
new file mode 100644
@@ -0,0 +1,31 @@
+/* PR target/111023 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse2 -mno-sse4.1" } */
+
+typedef unsigned char v8qi __attribute__((vector_size (8)));
+typedef unsigned short v4hi __attribute__((vector_size (8)));
+typedef unsigned int v2si __attribute__((vector_size (8)));
+
+void
+v4hi_v4qi (v4hi *dst, v8qi src)
+{
+ unsigned short tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4hi *) tem;
+}
+
+/* { dg-final { scan-assembler "punpcklbw" } } */
+
+void
+v2si_v2hi (v2si *dst, v4hi src)
+{
+ unsigned int tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2si *) tem;
+}
+
+/* { dg-final { scan-assembler "punpcklwd" } } */
new file mode 100644
@@ -0,0 +1,49 @@
+/* PR target/111023 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse2 -mno-sse4.1" } */
+
+typedef unsigned char v16qi __attribute__((vector_size (16)));
+typedef unsigned short v8hi __attribute__((vector_size (16)));
+typedef unsigned int v4si __attribute__((vector_size (16)));
+typedef unsigned long long v2di __attribute__((vector_size (16)));
+
+void
+v8hi_v8qi (v8hi *dst, v16qi src)
+{
+ unsigned short tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8hi *) tem;
+}
+
+/* { dg-final { scan-assembler "punpcklbw" } } */
+
+void
+v4si_v4hi (v4si *dst, v8hi src)
+{
+ unsigned int tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4si *) tem;
+}
+
+/* { dg-final { scan-assembler "punpcklwd" } } */
+
+void
+v2di_v2si (v2di *dst, v4si src)
+{
+ unsigned long long tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler "punpckldq" } } */