===================================================================
@@ -624,3 +624,7 @@
return REG_P (op) && REG_POINTER (op);
})
+;; True if this is the right-most vector element; for mux1 @brcst.
+(define_predicate "mux1_brcst_element"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))
===================================================================
@@ -1972,12 +1972,13 @@
return true;
}
-/* Emit an integral vector unpack operation. */
+/* The vectors LO and HI each contain N halves of a double-wide vector.
+ Reassemble either the first N/2 or the second N/2 elements. */
void
-ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
+ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
{
- enum machine_mode mode = GET_MODE (operands[1]);
+ enum machine_mode mode = GET_MODE (lo);
rtx (*gen) (rtx, rtx, rtx);
rtx x;
@@ -1993,96 +1994,66 @@
gcc_unreachable ();
}
- /* Fill in x with the sign extension of each element in op1. */
- if (unsignedp)
- x = CONST0_RTX (mode);
+ x = gen_lowpart (mode, out);
+ if (TARGET_BIG_ENDIAN)
+ x = gen (x, hi, lo);
else
- {
- bool neg;
-
- x = gen_reg_rtx (mode);
-
- neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
- CONST0_RTX (mode));
- gcc_assert (!neg);
- }
-
- emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x));
+ x = gen (x, lo, hi);
+ emit_insn (x);
}
-/* Emit an integral vector widening sum operations. */
+/* Return a vector of the sign-extension of VEC. */
-void
-ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
+static rtx
+ia64_unpack_sign (rtx vec, bool unsignedp)
{
- rtx l, h, x, s;
- enum machine_mode wmode, mode;
- rtx (*unpack_l) (rtx, rtx, rtx);
- rtx (*unpack_h) (rtx, rtx, rtx);
- rtx (*plus) (rtx, rtx, rtx);
+ enum machine_mode mode = GET_MODE (vec);
+ rtx zero = CONST0_RTX (mode);
- wmode = GET_MODE (operands[0]);
- mode = GET_MODE (operands[1]);
-
- switch (mode)
- {
- case V8QImode:
- unpack_l = gen_vec_interleave_lowv8qi;
- unpack_h = gen_vec_interleave_highv8qi;
- plus = gen_addv4hi3;
- break;
- case V4HImode:
- unpack_l = gen_vec_interleave_lowv4hi;
- unpack_h = gen_vec_interleave_highv4hi;
- plus = gen_addv2si3;
- break;
- default:
- gcc_unreachable ();
- }
-
- /* Fill in x with the sign extension of each element in op1. */
if (unsignedp)
- x = CONST0_RTX (mode);
+ return zero;
else
{
+ rtx sign = gen_reg_rtx (mode);
bool neg;
- x = gen_reg_rtx (mode);
-
- neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
- CONST0_RTX (mode));
+ neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
gcc_assert (!neg);
+
+ return sign;
}
+}
- l = gen_reg_rtx (wmode);
- h = gen_reg_rtx (wmode);
- s = gen_reg_rtx (wmode);
+/* Emit an integral vector unpack operation. */
- emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
- emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
- emit_insn (plus (s, l, operands[2]));
- emit_insn (plus (operands[0], h, s));
+void
+ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
+{
+ rtx sign = ia64_unpack_sign (operands[1], unsignedp);
+ ia64_unpack_assemble (operands[0], operands[1], sign, highp);
}
+/* Emit an integral vector widening sum operations. */
+
void
-ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
+ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
{
- rtx l = gen_reg_rtx (V4HImode);
- rtx h = gen_reg_rtx (V4HImode);
- rtx (*mulhigh)(rtx, rtx, rtx, rtx);
- rtx (*interl)(rtx, rtx, rtx);
+ enum machine_mode wmode;
+ rtx l, h, t, sign;
- emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+ sign = ia64_unpack_sign (operands[1], unsignedp);
- /* For signed, pmpy2.r would appear to more closely match this operation.
- However, the vectorizer is more likely to use the LO and HI patterns
- in pairs. At which point, with this formulation, the first two insns
- of each can be CSEd. */
- mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2;
- emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
+ wmode = GET_MODE (operands[0]);
+ l = gen_reg_rtx (wmode);
+ h = gen_reg_rtx (wmode);
- interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
- emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
+ ia64_unpack_assemble (l, operands[1], sign, false);
+ ia64_unpack_assemble (h, operands[1], sign, true);
+
+ t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
+ t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
+ if (t != operands[0])
+ emit_move_insn (operands[0], t);
}
/* Emit a signed or unsigned V8QI dot product operation. */
@@ -2090,48 +2061,31 @@
void
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
{
- rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
+ rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
+ rtx p1, p2, p3, p4, s1, s2, s3;
- /* Fill in x1 and x2 with the sign extension of each element. */
- if (unsignedp)
- x1 = x2 = CONST0_RTX (V8QImode);
- else
- {
- bool neg;
+ op1 = operands[1];
+ op2 = operands[2];
+ sn1 = ia64_unpack_sign (op1, unsignedp);
+ sn2 = ia64_unpack_sign (op2, unsignedp);
- x1 = gen_reg_rtx (V8QImode);
- x2 = gen_reg_rtx (V8QImode);
-
- neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
- CONST0_RTX (V8QImode));
- gcc_assert (!neg);
- neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
- CONST0_RTX (V8QImode));
- gcc_assert (!neg);
- }
-
l1 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode);
h1 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode);
+ ia64_unpack_assemble (l1, op1, sn1, false);
+ ia64_unpack_assemble (l2, op2, sn2, false);
+ ia64_unpack_assemble (h1, op1, sn1, true);
+ ia64_unpack_assemble (h2, op2, sn2, true);
- emit_insn (gen_vec_interleave_lowv8qi
- (gen_lowpart (V8QImode, l1), operands[1], x1));
- emit_insn (gen_vec_interleave_lowv8qi
- (gen_lowpart (V8QImode, l2), operands[2], x2));
- emit_insn (gen_vec_interleave_highv8qi
- (gen_lowpart (V8QImode, h1), operands[1], x1));
- emit_insn (gen_vec_interleave_highv8qi
- (gen_lowpart (V8QImode, h2), operands[2], x2));
-
p1 = gen_reg_rtx (V2SImode);
p2 = gen_reg_rtx (V2SImode);
p3 = gen_reg_rtx (V2SImode);
p4 = gen_reg_rtx (V2SImode);
- emit_insn (gen_pmpy2_r (p1, l1, l2));
- emit_insn (gen_pmpy2_l (p2, l1, l2));
- emit_insn (gen_pmpy2_r (p3, h1, h2));
- emit_insn (gen_pmpy2_l (p4, h1, h2));
+ emit_insn (gen_pmpy2_even (p1, l1, l2));
+ emit_insn (gen_pmpy2_even (p2, h1, h2));
+ emit_insn (gen_pmpy2_odd (p3, l1, l2));
+ emit_insn (gen_pmpy2_odd (p4, h1, h2));
s1 = gen_reg_rtx (V2SImode);
s2 = gen_reg_rtx (V2SImode);
===================================================================
@@ -172,35 +172,11 @@
(match_operand:V8QI 2 "gr_register_operand" "r")))]
""
{
- rtx r1, l1, r2, l2, rm, lm;
-
- r1 = gen_reg_rtx (V4HImode);
- l1 = gen_reg_rtx (V4HImode);
- r2 = gen_reg_rtx (V4HImode);
- l2 = gen_reg_rtx (V4HImode);
-
- /* Zero-extend the QImode elements into two words of HImode elements
- by interleaving them with zero bytes. */
- emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
- operands[1], CONST0_RTX (V8QImode)));
- emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
- operands[2], CONST0_RTX (V8QImode)));
- emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
- operands[1], CONST0_RTX (V8QImode)));
- emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
- operands[2], CONST0_RTX (V8QImode)));
-
- /* Multiply. */
- rm = gen_reg_rtx (V4HImode);
- lm = gen_reg_rtx (V4HImode);
- emit_insn (gen_mulv4hi3 (rm, r1, r2));
- emit_insn (gen_mulv4hi3 (lm, l1, l2));
-
- /* Zap the high order bytes of the HImode elements by overwriting those
- in one part with the low order bytes of the other. */
- emit_insn (gen_mix1_r (operands[0],
- gen_lowpart (V8QImode, rm),
- gen_lowpart (V8QImode, lm)));
+ rtx l = gen_reg_rtx (V4HImode);
+ rtx h = gen_reg_rtx (V4HImode);
+ emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2]));
+ emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2]));
+ emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h));
DONE;
})
@@ -296,7 +272,7 @@
"pmpyshr2.u %0 = %1, %2, %3"
[(set_attr "itanium_class" "mmmul")])
-(define_insn "pmpy2_r"
+(define_insn "pmpy2_even"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI
(vec_select:V2SI
@@ -308,10 +284,16 @@
(match_operand:V4HI 2 "gr_register_operand" "r"))
(parallel [(const_int 0) (const_int 2)]))))]
""
- "pmpy2.r %0 = %1, %2"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,pmpy2.l %0 = %1, %2";
+ else
+ return "%,pmpy2.r %0 = %1, %2";
+}
[(set_attr "itanium_class" "mmshf")])
-(define_insn "pmpy2_l"
+(define_insn "pmpy2_odd"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI
(vec_select:V2SI
@@ -323,7 +305,13 @@
(match_operand:V4HI 2 "gr_register_operand" "r"))
(parallel [(const_int 1) (const_int 3)]))))]
""
- "pmpy2.l %0 = %1, %2"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,pmpy2.r %0 = %1, %2";
+ else
+ return "%,pmpy2.l %0 = %1, %2";
+}
[(set_attr "itanium_class" "mmshf")])
(define_expand "vec_widen_smult_lo_v4hi"
@@ -332,7 +320,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
- ia64_expand_widen_mul_v4hi (operands, false, false);
+ rtx l = gen_reg_rtx (V4HImode);
+ rtx h = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+ emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+ ia64_unpack_assemble (operands[0], l, h, false);
DONE;
})
@@ -342,7 +334,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
- ia64_expand_widen_mul_v4hi (operands, false, true);
+ rtx l = gen_reg_rtx (V4HImode);
+ rtx h = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+ emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
+ ia64_unpack_assemble (operands[0], l, h, true);
DONE;
})
@@ -352,7 +348,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
- ia64_expand_widen_mul_v4hi (operands, true, false);
+ rtx l = gen_reg_rtx (V4HImode);
+ rtx h = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+ emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+ ia64_unpack_assemble (operands[0], l, h, false);
DONE;
})
@@ -362,7 +362,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
- ia64_expand_widen_mul_v4hi (operands, true, true);
+ rtx l = gen_reg_rtx (V4HImode);
+ rtx h = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
+ emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
+ ia64_unpack_assemble (operands[0], l, h, true);
DONE;
})
@@ -405,15 +409,28 @@
emit_insn (gen_mulv4hi3 (t3, t0, op2h));
/* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */
- emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t4), t1, t2));
+ x = gen_lowpart (V4HImode, t4);
+ if (TARGET_BIG_ENDIAN)
+ x = gen_mix2_odd (x, t2, t1);
+ else
+ x = gen_mix2_even (x, t1, t2);
+ emit_insn (x);
/* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */
- emit_insn (gen_mix2_l (gen_lowpart (V4HImode, t5),
- CONST0_RTX (V4HImode), t3));
+ x = gen_lowpart (V4HImode, t5);
+ if (TARGET_BIG_ENDIAN)
+ x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode));
+ else
+ x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3);
+ emit_insn (x);
/* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */
- emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t6),
- CONST0_RTX (V4HImode), t3));
+ x = gen_lowpart (V4HImode, t6);
+ if (TARGET_BIG_ENDIAN)
+ x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode));
+ else
+ x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3);
+ emit_insn (x);
emit_insn (gen_addv2si3 (t7, t4, t5));
emit_insn (gen_addv2si3 (operands[0], t6, t7));
@@ -608,16 +625,36 @@
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
- rtx l, r, t;
+ rtx e, o, t;
- r = gen_reg_rtx (V2SImode);
+ e = gen_reg_rtx (V2SImode);
+ o = gen_reg_rtx (V2SImode);
+ t = gen_reg_rtx (V2SImode);
+
+ emit_insn (gen_pmpy2_even (e, operands[1], operands[2]));
+ emit_insn (gen_pmpy2_odd (o, operands[1], operands[2]));
+ emit_insn (gen_addv2si3 (t, e, operands[3]));
+ emit_insn (gen_addv2si3 (operands[0], t, o));
+ DONE;
+})
+
+(define_expand "udot_prodv4hi"
+ [(match_operand:V2SI 0 "gr_register_operand" "")
+ (match_operand:V4HI 1 "gr_register_operand" "")
+ (match_operand:V4HI 2 "gr_register_operand" "")
+ (match_operand:V2SI 3 "gr_register_operand" "")]
+ ""
+{
+ rtx l, h, t;
+
l = gen_reg_rtx (V2SImode);
+ h = gen_reg_rtx (V2SImode);
t = gen_reg_rtx (V2SImode);
- emit_insn (gen_pmpy2_r (r, operands[1], operands[2]));
- emit_insn (gen_pmpy2_l (l, operands[1], operands[2]));
- emit_insn (gen_addv2si3 (t, r, operands[3]));
- emit_insn (gen_addv2si3 (operands[0], t, l));
+ emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2]));
+ emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2]));
+ emit_insn (gen_addv2si3 (t, l, operands[3]));
+ emit_insn (gen_addv2si3 (operands[0], t, h));
DONE;
})
@@ -673,7 +710,13 @@
(ss_truncate:V4QI
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
""
- "pack2.sss %0 = %r1, %r2"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,pack2.sss %0 = %r2, %r1";
+ else
+ return "%,pack2.sss %0 = %r1, %r2";
+}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_pack_usat_v4hi"
@@ -684,7 +727,13 @@
(us_truncate:V4QI
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
""
- "pack2.uss %0 = %r1, %r2"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,pack2.uss %0 = %r2, %r1";
+ else
+ return "%,pack2.uss %0 = %r1, %r2";
+}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_pack_ssat_v2si"
@@ -695,7 +744,13 @@
(ss_truncate:V2HI
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))]
""
- "pack4.sss %0 = %r1, %r2"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,pack4.sss %0 = %r2, %r1";
+ else
+ return "%,pack4.sss %0 = %r1, %r2";
+}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_lowv8qi"
@@ -709,7 +764,13 @@
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
""
- "unpack1.l %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,unpack1.h %0 = %r1, %r2";
+ else
+ return "%,unpack1.l %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_highv8qi"
@@ -723,57 +784,63 @@
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
""
- "unpack1.h %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,unpack1.l %0 = %r1, %r2";
+ else
+ return "%,unpack1.h %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
-(define_insn "mix1_r"
+(define_insn "mix1_even"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 0)
- (const_int 8)
- (const_int 2)
- (const_int 10)
- (const_int 4)
- (const_int 12)
- (const_int 6)
- (const_int 14)])))]
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 2) (const_int 10)
+ (const_int 4) (const_int 12)
+ (const_int 6) (const_int 14)])))]
""
- "mix1.r %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,mix1.l %0 = %r1, %r2";
+ else
+ return "%,mix1.r %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
-(define_insn "mix1_l"
+(define_insn "mix1_odd"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 1)
- (const_int 9)
- (const_int 3)
- (const_int 11)
- (const_int 5)
- (const_int 13)
- (const_int 7)
- (const_int 15)])))]
+ (parallel [(const_int 1) (const_int 9)
+ (const_int 3) (const_int 11)
+ (const_int 5) (const_int 13)
+ (const_int 7) (const_int 15)])))]
""
- "mix1.l %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,mix1.r %0 = %r1, %r2";
+ else
+ return "%,mix1.l %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
(define_insn "*mux1_rev"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
- (parallel [(const_int 7)
- (const_int 6)
- (const_int 5)
- (const_int 4)
- (const_int 3)
- (const_int 2)
- (const_int 1)
- (const_int 0)])))]
+ (parallel [(const_int 7) (const_int 6)
+ (const_int 5) (const_int 4)
+ (const_int 3) (const_int 2)
+ (const_int 1) (const_int 0)])))]
""
"mux1 %0 = %1, @rev"
[(set_attr "itanium_class" "mmshf")])
@@ -782,14 +849,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
- (parallel [(const_int 0)
- (const_int 4)
- (const_int 2)
- (const_int 6)
- (const_int 1)
- (const_int 5)
- (const_int 3)
- (const_int 7)])))]
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)
+ (const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))]
""
"mux1 %0 = %1, @mix"
[(set_attr "itanium_class" "mmshf")])
@@ -798,14 +861,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
- (parallel [(const_int 0)
- (const_int 4)
- (const_int 1)
- (const_int 5)
- (const_int 2)
- (const_int 6)
- (const_int 3)
- (const_int 7)])))]
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)
+ (const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
""
"mux1 %0 = %1, @shuf"
[(set_attr "itanium_class" "mmshf")])
@@ -814,14 +873,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
- (parallel [(const_int 0)
- (const_int 2)
- (const_int 4)
- (const_int 6)
- (const_int 1)
- (const_int 3)
- (const_int 5)
- (const_int 7)])))]
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)
+ (const_int 1) (const_int 3)
+ (const_int 5) (const_int 7)])))]
""
"mux1 %0 = %1, @alt"
[(set_attr "itanium_class" "mmshf")])
@@ -830,14 +885,14 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
- (parallel [(const_int 0)
- (const_int 0)
- (const_int 0)
- (const_int 0)
- (const_int 0)
- (const_int 0)
- (const_int 0)
- (const_int 0)])))]
+ (parallel [(match_operand 2 "mux1_brcst_element" "")
+ (match_dup 2)
+ (match_dup 2)
+ (match_dup 2)
+ (match_dup 2)
+ (match_dup 2)
+ (match_dup 2)
+ (match_dup 2)])))]
""
"mux1 %0 = %1, @brcst"
[(set_attr "itanium_class" "mmshf")])
@@ -857,7 +912,7 @@
""
{
rtx temp = gen_reg_rtx (V8QImode);
- emit_insn (gen_mix1_r (temp, operands[1], operands[2]));
+ emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp));
DONE;
})
@@ -869,7 +924,7 @@
""
{
rtx temp = gen_reg_rtx (V8QImode);
- emit_insn (gen_mix1_l (temp, operands[1], operands[2]));
+ emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp));
DONE;
})
@@ -880,12 +935,16 @@
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 0)
- (const_int 4)
- (const_int 1)
- (const_int 5)])))]
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
""
- "unpack2.l %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,unpack2.h %0 = %r1, %r2";
+ else
+ return "%,unpack2.l %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_highv4hi"
@@ -894,40 +953,52 @@
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 2)
- (const_int 6)
- (const_int 3)
- (const_int 7)])))]
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
""
- "unpack2.h %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,unpack2.l %0 = %r1, %r2";
+ else
+ return "%,unpack2.h %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
-(define_insn "mix2_r"
+(define_insn "mix2_even"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 0)
- (const_int 4)
- (const_int 2)
- (const_int 6)])))]
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))]
""
- "mix2.r %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,mix2.l %0 = %r1, %r0";
+ else
+ return "%,mix2.r %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
-(define_insn "mix2_l"
+(define_insn "mix2_odd"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 1)
- (const_int 5)
- (const_int 3)
- (const_int 7)])))]
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))]
""
- "mix2.l %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,mix2.r %0 = %r1, %r0";
+ else
+ return "%,mix2.l %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
(define_insn "*mux2"
@@ -940,11 +1011,21 @@
(match_operand 5 "const_int_2bit_operand" "")])))]
""
{
- int mask;
- mask = INTVAL (operands[2]);
- mask |= INTVAL (operands[3]) << 2;
- mask |= INTVAL (operands[4]) << 4;
- mask |= INTVAL (operands[5]) << 6;
+ int mask = 0;
+ if (TARGET_BIG_ENDIAN)
+ {
+ mask |= (3 - INTVAL (operands[2])) << 6;
+ mask |= (3 - INTVAL (operands[3])) << 4;
+ mask |= (3 - INTVAL (operands[4])) << 2;
+ mask |= 3 - INTVAL (operands[5]);
+ }
+ else
+ {
+ mask |= INTVAL (operands[2]);
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ }
operands[2] = GEN_INT (mask);
return "%,mux2 %0 = %1, %2";
}
@@ -954,10 +1035,8 @@
[(set (match_operand:V4HI 0 "gr_register_operand" "")
(vec_select:V4HI
(match_operand:V4HI 1 "gr_register_operand" "")
- (parallel [(const_int 0)
- (const_int 2)
- (const_int 1)
- (const_int 3)])))]
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 1) (const_int 3)])))]
"")
(define_expand "vec_extract_evenv4hi"
@@ -967,7 +1046,7 @@
""
{
rtx temp = gen_reg_rtx (V4HImode);
- emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
+ emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE;
})
@@ -979,7 +1058,7 @@
""
{
rtx temp = gen_reg_rtx (V4HImode);
- emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
+ emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE;
})
@@ -992,30 +1071,38 @@
"mux2 %0 = %1, 0"
[(set_attr "itanium_class" "mmshf")])
-;; Note that mix4.r performs the exact same operation.
(define_insn "vec_interleave_lowv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
(match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 0)
- (const_int 2)])))]
+ (parallel [(const_int 0) (const_int 2)])))]
""
- "unpack4.l %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,unpack4.h %0 = %r1, %r2";
+ else
+ return "%,unpack4.l %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
-;; Note that mix4.l performs the exact same operation.
(define_insn "vec_interleave_highv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
(match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
- (parallel [(const_int 1)
- (const_int 3)])))]
+ (parallel [(const_int 1) (const_int 3)])))]
""
- "unpack4.h %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,unpack4.l %0 = %r1, %r2";
+ else
+ return "%,unpack4.h %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
(define_expand "vec_extract_evenv2si"
@@ -1061,10 +1148,7 @@
if (!gr_reg_or_0_operand (op2, SImode))
op2 = force_reg (SImode, op2);
- if (TARGET_BIG_ENDIAN)
- x = gen_rtx_VEC_CONCAT (V2SImode, op2, op1);
- else
- x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
+ x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
DONE;
})
@@ -1075,7 +1159,13 @@
(match_operand:SI 1 "gr_reg_or_0_operand" "rO")
(match_operand:SI 2 "gr_reg_or_0_operand" "rO")))]
""
- "unpack4.l %0 = %r2, %r1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,unpack4.l %0 = %r1, %r2";
+ else
+ return "%,unpack4.l %0 = %r2, %r1";
+}
[(set_attr "itanium_class" "mmshf")])
;; Missing operations
@@ -1333,10 +1423,7 @@
if (!fr_reg_or_fp01_operand (op2, SFmode))
op2 = force_reg (SFmode, op2);
- if (TARGET_BIG_ENDIAN)
- emit_insn (gen_fpack (operands[0], op2, op1));
- else
- emit_insn (gen_fpack (operands[0], op1, op2));
+ emit_insn (gen_fpack (operands[0], op1, op2));
DONE;
})
@@ -1346,7 +1433,13 @@
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
""
- "fpack %0 = %F2, %F1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,fpack %0 = %F1, %F2";
+ else
+ return "%,fpack %0 = %F2, %F1";
+}
[(set_attr "itanium_class" "fmisc")])
(define_insn "fswap"
@@ -1357,7 +1450,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 1) (const_int 2)])))]
""
- "fswap %0 = %F1, %F2"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,fswap %0 = %F2, %F1";
+ else
+ return "%,fswap %0 = %F1, %F2";
+}
[(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_highv2sf"
@@ -1368,7 +1467,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 1) (const_int 3)])))]
""
- "fmix.l %0 = %F2, %F1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,fmix.r %0 = %F1, %F2";
+ else
+ return "%,fmix.l %0 = %F2, %F1";
+}
[(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_lowv2sf"
@@ -1379,7 +1484,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 0) (const_int 2)])))]
""
- "fmix.r %0 = %F2, %F1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,fmix.l %0 = %F1, %F2";
+ else
+ return "%,fmix.r %0 = %F2, %F1";
+}
[(set_attr "itanium_class" "fmisc")])
(define_insn "fmix_lr"
@@ -1390,7 +1501,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 0) (const_int 3)])))]
""
- "fmix.lr %0 = %F2, %F1"
+{
+ /* Recall that vector elements are numbered in memory order. */
+ if (TARGET_BIG_ENDIAN)
+ return "%,fmix.lr %0 = %F1, %F0";
+ else
+ return "%,fmix.lr %0 = %F2, %F1";
+}
[(set_attr "itanium_class" "fmisc")])
(define_expand "vec_extract_evenv2sf"
@@ -1415,23 +1532,24 @@
DONE;
})
-
(define_expand "vec_setv2sf"
- [(match_operand:V2SF 0 "fr_register_operand" "")
- (match_operand:SF 1 "fr_register_operand" "")
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" "")
(match_operand 2 "const_int_operand" "")]
""
{
+ rtx op0 = operands[0];
rtx tmp = gen_reg_rtx (V2SFmode);
+
emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
switch (INTVAL (operands[2]))
{
case 0:
- emit_insn (gen_fmix_lr (operands[0], tmp, operands[0]));
+ emit_insn (gen_fmix_lr (op0, tmp, op0));
break;
case 1:
- emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[0], tmp));
+ emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
break;
default:
gcc_unreachable ();
@@ -1458,8 +1576,8 @@
})
(define_insn_and_split "*vec_extractv2sf_0_be"
- [(set (match_operand:SF 0 "register_operand" "=r,f")
- (unspec:SF [(match_operand:V2SF 1 "register_operand" "rf,r")
+ [(set (match_operand:SF 0 "register_operand" "=rf,r")
+ (unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r")
(const_int 0)]
UNSPEC_VECT_EXTR))]
"TARGET_BIG_ENDIAN"
@@ -1467,31 +1585,44 @@
"reload_completed"
[(set (match_dup 0) (match_dup 1))]
{
- if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
- operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0]));
+ if (MEM_P (operands[1]))
+ operands[1] = adjust_address (operands[1], SFmode, 0);
else
- operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+ {
+ emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+ DONE;
+ }
})
-(define_insn_and_split "*vec_extractv2sf_1"
+(define_insn_and_split "*vec_extractv2sf_1_le"
[(set (match_operand:SF 0 "register_operand" "=r")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
(const_int 1)]
UNSPEC_VECT_EXTR))]
- ""
+ "!TARGET_BIG_ENDIAN"
"#"
- "reload_completed"
+ "&& reload_completed"
[(const_int 0)]
{
operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
- if (TARGET_BIG_ENDIAN)
- emit_move_insn (operands[0], operands[1]);
- else
- emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+ emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
DONE;
})
+(define_insn_and_split "*vec_extractv2sf_1_be"
+ [(set (match_operand:SF 0 "register_operand" "=rf")
+ (unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
+ (const_int 1)]
+ UNSPEC_VECT_EXTR))]
+ "TARGET_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
(define_expand "vec_extractv2sf"
[(set (match_operand:SF 0 "register_operand" "")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "")
@@ -1542,9 +1673,12 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
- rtx op1 = gen_lowpart(V8QImode, operands[1]);
- rtx op2 = gen_lowpart(V8QImode, operands[2]);
- emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
+ rtx op1 = gen_lowpart (V8QImode, operands[1]);
+ rtx op2 = gen_lowpart (V8QImode, operands[2]);
+ if (TARGET_BIG_ENDIAN)
+ emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
+ else
+ emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
DONE;
})
@@ -1554,9 +1688,12 @@
(match_operand:V2SI 2 "gr_register_operand" "")]
""
{
- rtx op1 = gen_lowpart(V4HImode, operands[1]);
- rtx op2 = gen_lowpart(V4HImode, operands[2]);
- emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
+ rtx op1 = gen_lowpart (V4HImode, operands[1]);
+ rtx op2 = gen_lowpart (V4HImode, operands[2]);
+ if (TARGET_BIG_ENDIAN)
+ emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
+ else
+ emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
DONE;
})
===================================================================
@@ -39,6 +39,7 @@
extern void ia64_expand_compare (rtx *, rtx *, rtx *);
extern void ia64_expand_vecint_cmov (rtx[]);
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
+extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
extern void ia64_expand_unpack (rtx [], bool, bool);
extern void ia64_expand_widen_sum (rtx[], bool);
extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool);