@@ -1209,6 +1209,8 @@ timode_scalar_chain::compute_convert_gain ()
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
+ HOST_WIDE_INT op1val;
+ int scost, vcost;
int igain = 0;
switch (GET_CODE (src))
@@ -1245,9 +1247,157 @@ timode_scalar_chain::compute_convert_gain ()
case ASHIFT:
case LSHIFTRT:
- /* For logical shifts by constant multiples of 8. */
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (4)
- : COSTS_N_INSNS (1);
+ /* See ix86_expand_v1ti_shift. */
+ op1val = XINT (src, 1);
+ if (optimize_insn_for_size_p ())
+ {
+ if (op1val == 64 || op1val == 65)
+ scost = COSTS_N_BYTES (5);
+ else if (op1val >= 66)
+ scost = COSTS_N_BYTES (6);
+ else if (op1val == 1)
+ scost = COSTS_N_BYTES (8);
+ else
+ scost = COSTS_N_BYTES (9);
+
+ if ((op1val & 7) == 0)
+ vcost = COSTS_N_BYTES (5);
+ else if (op1val > 64)
+ vcost = COSTS_N_BYTES (10);
+ else
+ vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23);
+ }
+ else
+ {
+ scost = COSTS_N_INSNS (2);
+ if ((op1val & 7) == 0)
+ vcost = COSTS_N_INSNS (1);
+ else if (op1val > 64)
+ vcost = COSTS_N_INSNS (2);
+ else
+ vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5);
+ }
+ igain = scost - vcost;
+ break;
+
+ case ASHIFTRT:
+ /* See ix86_expand_v1ti_ashiftrt. */
+ op1val = XINT (src, 1);
+ if (optimize_insn_for_size_p ())
+ {
+ if (op1val == 64 || op1val == 127)
+ scost = COSTS_N_BYTES (7);
+ else if (op1val == 1)
+ scost = COSTS_N_BYTES (8);
+ else if (op1val == 65)
+ scost = COSTS_N_BYTES (10);
+ else if (op1val >= 66)
+ scost = COSTS_N_BYTES (11);
+ else
+ scost = COSTS_N_BYTES (9);
+
+ if (op1val == 127)
+ vcost = COSTS_N_BYTES (10);
+ else if (op1val == 64)
+ vcost = COSTS_N_BYTES (14);
+ else if (op1val == 96)
+ vcost = COSTS_N_BYTES (18);
+ else if (op1val >= 111)
+ vcost = COSTS_N_BYTES (15);
+ else if (TARGET_AVX2 && op1val == 32)
+ vcost = COSTS_N_BYTES (16);
+ else if (TARGET_SSE4_1 && op1val == 32)
+ vcost = COSTS_N_BYTES (20);
+ else if (op1val >= 96)
+ vcost = COSTS_N_BYTES (23);
+ else if ((op1val & 7) == 0)
+ vcost = COSTS_N_BYTES (28);
+ else if (TARGET_AVX2 && op1val < 32)
+ vcost = COSTS_N_BYTES (30);
+ else if (op1val == 1 || op1val >= 64)
+ vcost = COSTS_N_BYTES (42);
+ else
+ vcost = COSTS_N_BYTES (47);
+ }
+ else
+ {
+ if (op1val >= 65 && op1val <= 126)
+ scost = COSTS_N_INSNS (3);
+ else
+ scost = COSTS_N_INSNS (2);
+
+ if (op1val == 127)
+ vcost = COSTS_N_INSNS (2);
+ else if (op1val == 64)
+ vcost = COSTS_N_INSNS (3);
+ else if (op1val == 96)
+ vcost = COSTS_N_INSNS (4);
+ else if (op1val >= 111)
+ vcost = COSTS_N_INSNS (3);
+ else if (TARGET_AVX2 && op1val == 32)
+ vcost = COSTS_N_INSNS (3);
+ else if (TARGET_SSE4_1 && op1val == 32)
+ vcost = COSTS_N_INSNS (4);
+ else if (op1val >= 96)
+ vcost = COSTS_N_INSNS (5);
+ else if ((op1val & 7) == 0)
+ vcost = COSTS_N_INSNS (6);
+ else if (TARGET_AVX2 && op1val < 32)
+ vcost = COSTS_N_INSNS (6);
+ else if (op1val == 1 || op1val >= 64)
+ vcost = COSTS_N_INSNS (9);
+ else
+ vcost = COSTS_N_INSNS (10);
+ }
+ igain = scost - vcost;
+ break;
+
+ case ROTATE:
+ case ROTATERT:
+ /* See ix86_expand_v1ti_rotate. */
+ op1val = XINT (src, 1);
+ if (optimize_insn_for_size_p ())
+ {
+ scost = COSTS_N_BYTES (13);
+ if ((op1val & 31) == 0)
+ vcost = COSTS_N_BYTES (5);
+ else if ((op1val & 7) == 0)
+ vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18);
+ else if (op1val > 32 && op1val < 96)
+ vcost = COSTS_N_BYTES (24);
+ else
+ vcost = COSTS_N_BYTES (19);
+ }
+ else
+ {
+ scost = COSTS_N_INSNS (3);
+ if ((op1val & 31) == 0)
+ vcost = COSTS_N_INSNS (1);
+ else if ((op1val & 7) == 0)
+ vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4);
+ else if (op1val > 32 && op1val < 96)
+ vcost = COSTS_N_INSNS (5);
+ else
+ vcost = COSTS_N_INSNS (1);
+ }
+ igain = scost - vcost;
+ break;
+
+ case COMPARE:
+ if (XEXP (src, 1) == const0_rtx)
+ {
+ if (GET_CODE (XEXP (src, 0)) == AND)
+ /* and;and;or (9 bytes) vs. ptest (5 bytes). */
+ igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
+ : COSTS_N_INSNS (2);
+ /* or (3 bytes) vs. ptest (5 bytes). */
+ else if (optimize_insn_for_size_p ())
+ igain = -COSTS_N_BYTES (2);
+ }
+ else if (XEXP (src, 1) == const1_rtx)
+ /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */
+ igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
+ : -COSTS_N_INSNS (1);
break;
default:
@@ -1503,6 +1653,9 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
case ASHIFT:
case LSHIFTRT:
+ case ASHIFTRT:
+ case ROTATERT:
+ case ROTATE:
convert_op (&XEXP (src, 0), insn);
PUT_MODE (src, V1TImode);
break;
@@ -1861,11 +2014,13 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
case ASHIFT:
case LSHIFTRT:
- /* Handle logical shifts by integer constants between 0 and 120
- that are multiples of 8. */
+ case ASHIFTRT:
+ case ROTATERT:
+ case ROTATE:
+ /* Handle shifts/rotates by integer constants between 0 and 127. */
return REG_P (XEXP (src, 0))
&& CONST_INT_P (XEXP (src, 1))
- && (INTVAL (XEXP (src, 1)) & ~0x78) == 0;
+ && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
default:
return false;
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse4.1 -mstv -mno-stackrealign" } */
+
+unsigned __int128 a, b;
+void rot1() { a = (b >> 1) | (b << 127); }
+void rot4() { a = (b >> 4) | (b << 124); }
+void rot8() { a = (b >> 8) | (b << 120); }
+void rot32() { a = (b >> 32) | (b << 96); }
+void rot64() { a = (b >> 64) | (b << 64); }
+
+/* { dg-final { scan-assembler-not "shrdq" } } */
+/* { dg-final { scan-assembler "pshufd" } } */