@@ -582,6 +582,25 @@ general_scalar_chain::compute_convert_gain ()
igain -= vector_const_cost (XEXP (src, 0));
break;
+ case ROTATE:
+ case ROTATERT:
+ igain += m * ix86_cost->shift_const;
+ if (smode == DImode)
+ {
+ int bits = INTVAL (XEXP (src, 1));
+ if ((bits & 0x0f) == 0)
+ igain -= ix86_cost->sse_op;
+ else if ((bits & 0x07) == 0)
+ igain -= 2 * ix86_cost->sse_op;
+ else
+ igain -= 3 * ix86_cost->sse_op;
+ }
+ else if (INTVAL (XEXP (src, 1)) == 16)
+ igain -= ix86_cost->sse_op;
+ else
+ igain -= 2 * ix86_cost->sse_op;
+ break;
+
case AND:
case IOR:
case XOR:
@@ -1154,6 +1173,95 @@ scalar_chain::convert_insn_common (rtx_insn *insn)
}
}
+/* Convert INSN which is an SImode or DImode rotation by a constant
+ to vector mode. CODE is either ROTATE or ROTATERT with operands
+ OP0 and OP1. Returns the SET_SRC of the last instruction in the
+ resulting sequence, which is emitted before INSN. */
+
+rtx
+general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
+ rtx_insn *insn)
+{
+ int bits = INTVAL (op1);
+ rtx pat, result;
+
+ convert_op (&op0, insn);
+ if (bits == 0)
+ return op0;
+
+ if (smode == DImode)
+ {
+ if (code == ROTATE)
+ bits = 64 - bits;
+ if (bits == 32)
+ {
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
+ GEN_INT (225));
+ emit_insn_before (pat, insn);
+ result = gen_lowpart (V2DImode, tmp1);
+ }
+ else if (bits == 16 || bits == 48)
+ {
+ rtx tmp1 = gen_reg_rtx (V8HImode);
+ pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0),
+ GEN_INT (bits == 16 ? 57 : 147));
+ emit_insn_before (pat, insn);
+ result = gen_lowpart (V2DImode, tmp1);
+ }
+ else if ((bits & 0x07) == 0)
+ {
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
+ GEN_INT (68));
+ emit_insn_before (pat, insn);
+ rtx tmp2 = gen_reg_rtx (V1TImode);
+ pat = gen_sse2_lshrv1ti3 (tmp2, gen_lowpart (V1TImode, tmp1),
+ GEN_INT (bits));
+ emit_insn_before (pat, insn);
+ result = gen_lowpart (V2DImode, tmp2);
+ }
+ else
+ {
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
+ GEN_INT (20));
+ emit_insn_before (pat, insn);
+ rtx tmp2 = gen_reg_rtx (V2DImode);
+ pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
+ GEN_INT (bits & 31));
+ emit_insn_before (pat, insn);
+ rtx tmp3 = gen_reg_rtx (V4SImode);
+ pat = gen_sse2_pshufd (tmp3, gen_lowpart (V4SImode, tmp2),
+ GEN_INT (bits > 32 ? 34 : 136));
+ emit_insn_before (pat, insn);
+ result = gen_lowpart (V2DImode, tmp3);
+ }
+ }
+ else if (bits == 16)
+ {
+ rtx tmp1 = gen_reg_rtx (V8HImode);
+ pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0), GEN_INT (225));
+ emit_insn_before (pat, insn);
+ result = gen_lowpart (V4SImode, tmp1);
+ }
+ else
+ {
+ if (code == ROTATE)
+ bits = 32 - bits;
+
+ rtx tmp1 = gen_reg_rtx (V4SImode);
+ emit_insn_before (gen_sse2_pshufd (tmp1, op0, GEN_INT (224)), insn);
+ rtx tmp2 = gen_reg_rtx (V2DImode);
+ pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
+ GEN_INT (bits));
+ emit_insn_before (pat, insn);
+ result = gen_lowpart (V4SImode, tmp2);
+ }
+
+ return result;
+}
+
/* Convert INSN to vector mode. */
void
@@ -1209,6 +1317,12 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
PUT_MODE (src, vmode);
break;
+ case ROTATE:
+ case ROTATERT:
+ src = convert_rotate (GET_CODE (src), XEXP (src, 0), XEXP (src, 1),
+ insn);
+ break;
+
case NEG:
src = XEXP (src, 0);
@@ -1982,6 +2096,8 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
case ASHIFT:
case LSHIFTRT:
+ case ROTATE:
+ case ROTATERT:
if (!CONST_INT_P (XEXP (src, 1))
|| !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
return false;
@@ -189,6 +189,7 @@ class general_scalar_chain : public scalar_chain
void convert_insn (rtx_insn *insn) final override;
void convert_op (rtx *op, rtx_insn *insn) final override;
int vector_const_cost (rtx exp);
+ rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn);
};
class timode_scalar_chain : public scalar_chain
new file mode 100644
@@ -0,0 +1,195 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+/* scalar 64-bit DImode rotations. */
+unsigned long long rot1(unsigned long long x) { return (x>>1) | (x<<63); }
+unsigned long long rot2(unsigned long long x) { return (x>>2) | (x<<62); }
+unsigned long long rot3(unsigned long long x) { return (x>>3) | (x<<61); }
+unsigned long long rot4(unsigned long long x) { return (x>>4) | (x<<60); }
+unsigned long long rot5(unsigned long long x) { return (x>>5) | (x<<59); }
+unsigned long long rot6(unsigned long long x) { return (x>>6) | (x<<58); }
+unsigned long long rot7(unsigned long long x) { return (x>>7) | (x<<57); }
+unsigned long long rot8(unsigned long long x) { return (x>>8) | (x<<56); }
+unsigned long long rot9(unsigned long long x) { return (x>>9) | (x<<55); }
+unsigned long long rot10(unsigned long long x) { return (x>>10) | (x<<54); }
+unsigned long long rot15(unsigned long long x) { return (x>>15) | (x<<49); }
+unsigned long long rot16(unsigned long long x) { return (x>>16) | (x<<48); }
+unsigned long long rot17(unsigned long long x) { return (x>>17) | (x<<47); }
+unsigned long long rot20(unsigned long long x) { return (x>>20) | (x<<44); }
+unsigned long long rot24(unsigned long long x) { return (x>>24) | (x<<40); }
+unsigned long long rot30(unsigned long long x) { return (x>>30) | (x<<34); }
+unsigned long long rot31(unsigned long long x) { return (x>>31) | (x<<33); }
+unsigned long long rot32(unsigned long long x) { return (x>>32) | (x<<32); }
+unsigned long long rot33(unsigned long long x) { return (x>>33) | (x<<31); }
+unsigned long long rot34(unsigned long long x) { return (x>>34) | (x<<30); }
+unsigned long long rot40(unsigned long long x) { return (x>>40) | (x<<24); }
+unsigned long long rot42(unsigned long long x) { return (x>>42) | (x<<22); }
+unsigned long long rot48(unsigned long long x) { return (x>>48) | (x<<16); }
+unsigned long long rot50(unsigned long long x) { return (x>>50) | (x<<14); }
+unsigned long long rot56(unsigned long long x) { return (x>>56) | (x<<8); }
+unsigned long long rot58(unsigned long long x) { return (x>>58) | (x<<6); }
+unsigned long long rot60(unsigned long long x) { return (x>>60) | (x<<4); }
+unsigned long long rot61(unsigned long long x) { return (x>>61) | (x<<3); }
+unsigned long long rot62(unsigned long long x) { return (x>>62) | (x<<2); }
+unsigned long long rot63(unsigned long long x) { return (x>>63) | (x<<1); }
+
+/* DImode mem-to-mem rotations. These STV with -m32. */
+void mem1(unsigned long long *p) { *p = rot1(*p); }
+void mem2(unsigned long long *p) { *p = rot2(*p); }
+void mem3(unsigned long long *p) { *p = rot3(*p); }
+void mem4(unsigned long long *p) { *p = rot4(*p); }
+void mem5(unsigned long long *p) { *p = rot5(*p); }
+void mem6(unsigned long long *p) { *p = rot6(*p); }
+void mem7(unsigned long long *p) { *p = rot7(*p); }
+void mem8(unsigned long long *p) { *p = rot8(*p); }
+void mem9(unsigned long long *p) { *p = rot9(*p); }
+void mem10(unsigned long long *p) { *p = rot10(*p); }
+void mem15(unsigned long long *p) { *p = rot15(*p); }
+void mem16(unsigned long long *p) { *p = rot16(*p); }
+void mem17(unsigned long long *p) { *p = rot17(*p); }
+void mem20(unsigned long long *p) { *p = rot20(*p); }
+void mem24(unsigned long long *p) { *p = rot24(*p); }
+void mem30(unsigned long long *p) { *p = rot30(*p); }
+void mem31(unsigned long long *p) { *p = rot31(*p); }
+void mem32(unsigned long long *p) { *p = rot32(*p); }
+void mem33(unsigned long long *p) { *p = rot33(*p); }
+void mem34(unsigned long long *p) { *p = rot34(*p); }
+void mem40(unsigned long long *p) { *p = rot40(*p); }
+void mem42(unsigned long long *p) { *p = rot42(*p); }
+void mem48(unsigned long long *p) { *p = rot48(*p); }
+void mem50(unsigned long long *p) { *p = rot50(*p); }
+void mem56(unsigned long long *p) { *p = rot56(*p); }
+void mem58(unsigned long long *p) { *p = rot58(*p); }
+void mem60(unsigned long long *p) { *p = rot60(*p); }
+void mem61(unsigned long long *p) { *p = rot61(*p); }
+void mem62(unsigned long long *p) { *p = rot62(*p); }
+void mem63(unsigned long long *p) { *p = rot63(*p); }
+
+/* Check that rotN and memN give the same result. */
+typedef unsigned long long (*rotN)(unsigned long long);
+typedef void (*memN)(unsigned long long*);
+
+void eval(rotN s, memN v, unsigned long long x)
+{
+ unsigned long long r = s(x);
+ unsigned long long t = x;
+ v(&t);
+
+ if (t != r)
+ __builtin_abort ();
+}
+
+void test(rotN s, memN v)
+{
+ eval(s,v,0x0000000000000000ll);
+ eval(s,v,0x0000000000000001ll);
+ eval(s,v,0x0000000000000002ll);
+ eval(s,v,0x0000000000000004ll);
+ eval(s,v,0x0000000000000008ll);
+ eval(s,v,0x0000000000000010ll);
+ eval(s,v,0x0000000000000020ll);
+ eval(s,v,0x0000000000000040ll);
+ eval(s,v,0x0000000000000080ll);
+ eval(s,v,0x0000000000000100ll);
+ eval(s,v,0x0000000000000200ll);
+ eval(s,v,0x0000000000000400ll);
+ eval(s,v,0x0000000000000800ll);
+ eval(s,v,0x0000000000001000ll);
+ eval(s,v,0x0000000000002000ll);
+ eval(s,v,0x0000000000004000ll);
+ eval(s,v,0x0000000000008000ll);
+ eval(s,v,0x0000000000010000ll);
+ eval(s,v,0x0000000000020000ll);
+ eval(s,v,0x0000000000040000ll);
+ eval(s,v,0x0000000000080000ll);
+ eval(s,v,0x0000000000100000ll);
+ eval(s,v,0x0000000000200000ll);
+ eval(s,v,0x0000000000400000ll);
+ eval(s,v,0x0000000000800000ll);
+ eval(s,v,0x0000000001000000ll);
+ eval(s,v,0x0000000002000000ll);
+ eval(s,v,0x0000000004000000ll);
+ eval(s,v,0x0000000008000000ll);
+ eval(s,v,0x0000000010000000ll);
+ eval(s,v,0x0000000020000000ll);
+ eval(s,v,0x0000000040000000ll);
+ eval(s,v,0x0000000080000000ll);
+ eval(s,v,0x0000000100000000ll);
+ eval(s,v,0x0000000200000000ll);
+ eval(s,v,0x0000000400000000ll);
+ eval(s,v,0x0000000800000000ll);
+ eval(s,v,0x0000001000000000ll);
+ eval(s,v,0x0000002000000000ll);
+ eval(s,v,0x0000004000000000ll);
+ eval(s,v,0x0000008000000000ll);
+ eval(s,v,0x0000010000000000ll);
+ eval(s,v,0x0000020000000000ll);
+ eval(s,v,0x0000040000000000ll);
+ eval(s,v,0x0000080000000000ll);
+ eval(s,v,0x0000100000000000ll);
+ eval(s,v,0x0000200000000000ll);
+ eval(s,v,0x0000400000000000ll);
+ eval(s,v,0x0000800000000000ll);
+ eval(s,v,0x0001000000000000ll);
+ eval(s,v,0x0002000000000000ll);
+ eval(s,v,0x0004000000000000ll);
+ eval(s,v,0x0008000000000000ll);
+ eval(s,v,0x0010000000000000ll);
+ eval(s,v,0x0020000000000000ll);
+ eval(s,v,0x0040000000000000ll);
+ eval(s,v,0x0080000000000000ll);
+ eval(s,v,0x0100000000000000ll);
+ eval(s,v,0x0200000000000000ll);
+ eval(s,v,0x0400000000000000ll);
+ eval(s,v,0x0800000000000000ll);
+ eval(s,v,0x1000000000000000ll);
+ eval(s,v,0x2000000000000000ll);
+ eval(s,v,0x4000000000000000ll);
+ eval(s,v,0x8000000000000000ll);
+ eval(s,v,0x0123456789abcdefll);
+ eval(s,v,0x1111111111111111ll);
+ eval(s,v,0x5555555555555555ll);
+ eval(s,v,0x8888888888888888ll);
+ eval(s,v,0xaaaaaaaaaaaaaaaall);
+ eval(s,v,0xcafebabecafebabell);
+ eval(s,v,0xdeadbeefdeadbeefll);
+ eval(s,v,0xfedcba9876543210ll);
+ eval(s,v,0xffffffffffffffffll);
+}
+
+int main()
+{
+ test(rot1,mem1);
+ test(rot2,mem2);
+ test(rot3,mem3);
+ test(rot4,mem4);
+ test(rot5,mem5);
+ test(rot6,mem6);
+ test(rot7,mem7);
+ test(rot8,mem8);
+ test(rot9,mem9);
+ test(rot10,mem10);
+ test(rot15,mem15);
+ test(rot16,mem16);
+ test(rot17,mem17);
+ test(rot20,mem20);
+ test(rot24,mem24);
+ test(rot30,mem30);
+ test(rot31,mem31);
+ test(rot32,mem32);
+ test(rot33,mem33);
+ test(rot34,mem34);
+ test(rot40,mem40);
+ test(rot42,mem42);
+ test(rot48,mem48);
+ test(rot50,mem50);
+ test(rot56,mem56);
+ test(rot58,mem58);
+ test(rot60,mem60);
+ test(rot61,mem61);
+ test(rot62,mem62);
+ test(rot63,mem63);
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse2" } */
+
+unsigned long long a,b,c,d;
+
+static unsigned long rot(unsigned long long x, int y)
+{
+ /* Only called with y in 1..63. */
+ return (x<<y) | (x>>(64-y));
+}
+
+void foo()
+{
+ d = rot(d ^ a,32);
+ c = c + d;
+ b = rot(b ^ c,24);
+ a = a + b;
+ d = rot(d ^ a,16);
+ c = c + d;
+ b = rot(b ^ c,63);
+}
+
+/* { dg-final { scan-assembler-not "shldl" } } */
+/* { dg-final { scan-assembler-not "%\[er\]sp" } } */