@@ -25,6 +25,7 @@
UNSPEC_LOONGSON_PCMPGT
UNSPEC_LOONGSON_PEXTR
UNSPEC_LOONGSON_PINSRH
+ UNSPEC_LOONGSON_VINIT
UNSPEC_LOONGSON_PMADD
UNSPEC_LOONGSON_PMOVMSK
UNSPEC_LOONGSON_PMULHU
@@ -83,6 +84,9 @@
;; but with twice as many elements.
(define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")])
+;; Given a vector type T, the inner mode.
+(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
+
;; The Loongson instruction suffixes corresponding to the conversions
;; specified by V_half_width.
(define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")])
@@ -119,6 +123,28 @@
DONE;
})
+;; Helper for vec_init. Initialize element 0 of the output from the input.
+;; All other elements are undefined.
+(define_insn "loongson_vec_init1_<mode>"
+ [(set (match_operand:VHB 0 "register_operand" "=f")
+ (unspec:VHB [(truncate:<V_inner>
+ (match_operand:DI 1 "reg_or_0_operand" "Jd"))]
+ UNSPEC_LOONGSON_VINIT))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "dmtc1\t%z1,%0"
+ [(set_attr "move_type" "mtc")
+ (set_attr "mode" "DI")])
+
+;; Helper for vec_initv2si.
+(define_insn "*vec_concatv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=f")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "register_operand" "f")
+ (match_operand:SI 2 "register_operand" "f")))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "punpcklwd\t%0,%1,%2"
+ [(set_attr "type" "fdiv")])
+
;; Instruction patterns for SIMD instructions.
;; Pack with signed saturation.
@@ -259,13 +259,11 @@
(match_operand:V2SF 1 "")]
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
{
- rtx op0 = force_reg (SFmode, XVECEXP (operands[1], 0, 0));
- rtx op1 = force_reg (SFmode, XVECEXP (operands[1], 0, 1));
- emit_insn (gen_vec_initv2sf_internal (operands[0], op0, op1));
+ mips_expand_vector_init (operands[0], operands[1]);
DONE;
})
-(define_insn "vec_initv2sf_internal"
+(define_insn "vec_concatv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=f")
(vec_concat:V2SF
(match_operand:SF 1 "register_operand" "f")
@@ -315,7 +313,7 @@
/* We don't have an insert instruction, so we duplicate the float, and
then use a PUL instruction. */
rtx temp = gen_reg_rtx (V2SFmode);
- emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1]));
+ emit_insn (gen_vec_concatv2sf (temp, operands[1], operands[1]));
operands[1] = temp;
operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2);
})
@@ -328,11 +326,9 @@
"TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
{
if (BYTES_BIG_ENDIAN)
- emit_insn (gen_vec_initv2sf_internal (operands[0], operands[1],
- operands[2]));
+ emit_insn (gen_vec_concatv2sf (operands[0], operands[1], operands[2]));
else
- emit_insn (gen_vec_initv2sf_internal (operands[0], operands[2],
- operands[1]));
+ emit_insn (gen_vec_concatv2sf (operands[0], operands[2], operands[1]));
DONE;
})
@@ -15932,30 +15932,6 @@ mips_conditional_register_usage (void)
}
}
-/* Initialize vector TARGET to VALS. */
-
-void
-mips_expand_vector_init (rtx target, rtx vals)
-{
- enum machine_mode mode;
- enum machine_mode inner;
- unsigned int i, n_elts;
- rtx mem;
-
- mode = GET_MODE (target);
- inner = GET_MODE_INNER (mode);
- n_elts = GET_MODE_NUNITS (mode);
-
- gcc_assert (VECTOR_MODE_P (mode));
-
- mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
- for (i = 0; i < n_elts; i++)
- emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
- XVECEXP (vals, 0, i));
-
- emit_move_insn (target, mem);
-}
-
/* When generating MIPS16 code, we want to allocate $24 (T_REG) before
other registers for instructions for which it is possible. This
encourages the compiler to use CMP in cases where an XOR would
@@ -16475,6 +16451,48 @@ mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d)
return true;
}
+/* Recognize broadcast patterns for the Loongson. */
+
+static bool
+mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d)
+{
+ unsigned i, elt;
+ rtx t0, t1;
+
+ if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS))
+ return false;
+ /* Note that we've already matched V2SI via punpck and V4HI via pshufh. */
+ if (d->vmode != V8QImode)
+ return false;
+ if (!d->one_vector_p)
+ return false;
+
+ elt = d->perm[0];
+ for (i = 1; i < 8; ++i)
+ if (d->perm[i] != elt)
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ /* With one interleave we put two of the desired element adjacent. */
+ t0 = gen_reg_rtx (V8QImode);
+ if (elt < 4)
+ emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0));
+ else
+ emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0));
+
+ /* Shuffle that one HImode element into all locations. */
+ elt &= 3;
+ elt *= 0x55;
+ t1 = gen_reg_rtx (V4HImode);
+ emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0),
+ force_reg (SImode, GEN_INT (elt))));
+
+ emit_move_insn (d->target, gen_lowpart (V8QImode, t1));
+ return true;
+}
+
static bool
mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
{
@@ -16506,6 +16524,8 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
if (mips_expand_vpc_loongson_pshufh (d))
return true;
+ if (mips_expand_vpc_loongson_bcast (d))
+ return true;
return false;
}
@@ -16656,6 +16676,164 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
}
+
+/* A subroutine of mips_expand_vec_init, match constant vector elements. */
+
+static inline bool
+mips_constant_elt_p (rtx x)
+{
+ return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
+}
+
+/* A subroutine of mips_expand_vec_init, expand via broadcast. */
+
+static void
+mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt)
+{
+ struct expand_vec_perm_d d;
+ rtx t1;
+ bool ok;
+
+ if (elt != const0_rtx)
+ elt = force_reg (GET_MODE_INNER (vmode), elt);
+ if (REG_P (elt))
+ elt = gen_lowpart (DImode, elt);
+
+ t1 = gen_reg_rtx (vmode);
+ switch (vmode)
+ {
+ case V8QImode:
+ emit_insn (gen_loongson_vec_init1_v8qi (t1, elt));
+ break;
+ case V4HImode:
+ emit_insn (gen_loongson_vec_init1_v4hi (t1, elt));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ memset (&d, 0, sizeof(d));
+ d.target = target;
+ d.op0 = t1;
+ d.op1 = t1;
+ d.vmode = vmode;
+ d.nelt = GET_MODE_NUNITS (vmode);
+ d.one_vector_p = true;
+
+ ok = mips_expand_vec_perm_const_1 (&d);
+ gcc_assert (ok);
+}
+
+/* A subroutine of mips_expand_vec_init, replacing all of the non-constant
+ elements of VALS with zeros, copy the constant vector to TARGET. */
+
+static void
+mips_expand_vi_constant (enum machine_mode vmode, unsigned nelt,
+ rtx target, rtx vals)
+{
+ rtvec vec = shallow_copy_rtvec (XVEC (vals, 0));
+ unsigned i;
+
+ for (i = 0; i < nelt; ++i)
+ {
+ if (!mips_constant_elt_p (RTVEC_ELT (vec, i)))
+ RTVEC_ELT (vec, i) = const0_rtx;
+ }
+
+ emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
+}
+
+
+/* A subroutine of mips_expand_vec_init, expand via pinsrh. */
+
+static void
+mips_expand_vi_loongson_one_pinsrh (rtx target, rtx vals, unsigned one_var)
+{
+ mips_expand_vi_constant (V4HImode, 4, target, vals);
+
+ emit_insn (gen_vec_setv4hi (target, target, XVECEXP (vals, 0, one_var),
+ GEN_INT (one_var)));
+}
+
+/* A subroutine of mips_expand_vec_init, expand anything via memory. */
+
+static void
+mips_expand_vi_general (enum machine_mode vmode, enum machine_mode imode,
+ unsigned nelt, unsigned nvar, rtx target, rtx vals)
+{
+ rtx mem = assign_stack_temp (vmode, GET_MODE_SIZE (vmode), 0);
+ unsigned int i, isize = GET_MODE_SIZE (imode);
+
+ if (nvar < nelt)
+ mips_expand_vi_constant (vmode, nelt, mem, vals);
+
+ for (i = 0; i < nelt; ++i)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (!mips_constant_elt_p (x))
+ emit_move_insn (adjust_address (mem, imode, i * isize), x);
+ }
+
+ emit_move_insn (target, mem);
+}
+
+/* Expand a vector initialization. */
+
+void
+mips_expand_vector_init (rtx target, rtx vals)
+{
+ enum machine_mode vmode = GET_MODE (target);
+ enum machine_mode imode = GET_MODE_INNER (vmode);
+ unsigned i, nelt = GET_MODE_NUNITS (vmode);
+ unsigned nvar = 0, one_var = -1u;
+ bool all_same = true;
+ rtx x;
+
+ for (i = 0; i < nelt; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!mips_constant_elt_p (x))
+ nvar++, one_var = i;
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ /* Load constants from the pool, or whatever's handy. */
+ if (nvar == 0)
+ {
+ emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
+ return;
+ }
+
+ /* For two-part initialization, always use CONCAT. */
+ if (nelt == 2)
+ {
+ rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0));
+ rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1));
+ x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return;
+ }
+
+ /* Loongson is the only cpu with vectors with more elements. */
+ gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS);
+
+ /* If all values are identical, broadcast the value. */
+ if (all_same)
+ {
+ mips_expand_vi_broadcast (vmode, target, XVECEXP (vals, 0, 0));
+ return;
+ }
+
+ /* If we've only got one non-variable V4HImode, use PINSRH. */
+ if (nvar == 1 && vmode == V4HImode)
+ {
+ mips_expand_vi_loongson_one_pinsrh (target, vals, one_var);
+ return;
+ }
+
+ mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals);
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP