@@ -125,9 +125,6 @@ (define_mode_iterator IMSA_WH [V4SI V8HI])
;; Only floating-point modes.
(define_mode_iterator FMSA [V2DF V4SF])
-;; Only used for immediate set shuffle elements instruction.
-(define_mode_iterator MSA_WHB_W [V4SI V8HI V16QI V4SF])
-
;; The attribute gives the integer vector mode with same size.
(define_mode_attr VIMODE
[(V2DF "V2DI")
@@ -2520,21 +2517,29 @@ (define_insn "msa_sat_u_<msafmt>"
(set_attr "mode" "<MODE>")])
(define_insn "msa_shf_<msafmt_f>"
- [(set (match_operand:MSA_WHB_W 0 "register_operand" "=f")
- (vec_select:MSA_WHB_W
- (match_operand:MSA_WHB_W 1 "register_operand" "f")
+ [(set (match_operand:MSA 0 "register_operand" "=f")
+ (vec_select:MSA
+ (match_operand:MSA 1 "register_operand" "f")
(match_operand 2 "par_const_vector_shf_set_operand" "")))]
"ISA_HAS_MSA"
{
- HOST_WIDE_INT val = 0;
- unsigned int i;
-
- /* We convert the selection to an immediate. */
- for (i = 0; i < 4; i++)
- val |= INTVAL (XVECEXP (operands[2], 0, i)) << (2 * i);
-
- operands[2] = GEN_INT (val);
- return "shf.<msafmt>\t%w0,%w1,%X2";
+ HOST_WIDE_INT rval = mips_msa_shf_i8 (operands);
+ /* 0b11100100 means that there is no shf needed at all. This RTL
+ should be optimized out in some pass. */
+ if ((rval & 0xff) == 0xe4)
+ gcc_unreachable ();
+ operands[2] = GEN_INT (rval & 0xff);
+ switch (rval & 0xff00)
+ {
+ default: gcc_unreachable ();
+ case 0x400:
+ return "shf.w\t%w0,%w1,%X2";
+ case 0x200:
+ return "shf.h\t%w0,%w1,%X2";
+ case 0x100:
+ return "shf.b\t%w0,%w1,%X2";
+ }
+ gcc_unreachable ();
}
[(set_attr "type" "simd_shf")
(set_attr "mode" "<MODE>")])
@@ -387,6 +387,7 @@ extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code);
extern void mips_register_frame_header_opt (void);
extern void mips_expand_vec_cond_expr (machine_mode, machine_mode, rtx *, bool);
extern void mips_expand_vec_cmp_expr (rtx *);
+extern HOST_WIDE_INT mips_msa_shf_i8 (rtx *);
extern void mips_emit_speculation_barrier_function (void);
@@ -2079,6 +2079,72 @@ mips_const_vector_shuffle_set_p (rtx op, machine_mode mode)
int nsets = nunits / 4;
int set = 0;
int i, j;
+ int val[4];
+ bool ok;
+
+ /* We support swapping 2 Doubleword part with shf.w. */
+ if (ISA_HAS_MSA && (mode == V2DFmode || mode == V2DImode))
+ {
+ if (!IN_RANGE (INTVAL (XVECEXP (op, 0, 0)), 0, 1)
+ || !IN_RANGE (INTVAL (XVECEXP (op, 0, 1)), 0, 1))
+ return false;
+ }
+
+ if (ISA_HAS_MSA && mode == V16QImode)
+ {
+ /* We can use shf.w if the elements are in-order inner 32bit. */
+ ok = true;
+ for (j = 0; j < 4; j++)
+ {
+ val[0] = INTVAL (XVECEXP (op, 0, j * 4));
+ val[1] = INTVAL (XVECEXP (op, 0, j * 4 + 1));
+ val[2] = INTVAL (XVECEXP (op, 0, j * 4 + 2));
+ val[3] = INTVAL (XVECEXP (op, 0, j * 4 + 3));
+ if (val[0] != val[1] - 1
+ || val[1] != val[2] - 1
+ || val[2] != val[3] - 1)
+ ok = false;
+ if (val[0] != 0 && val[0] != 4 && val[0] != 8 && val[0] != 12)
+ ok = false;
+ }
+ if (ok)
+ return ok;
+
+ /* We can use shf.h if the elements are in order inner 16bit. */
+ ok = true;
+ for (j = 0; j < 4; j++)
+ {
+ val[0] = INTVAL (XVECEXP (op, 0, j * 2));
+ val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1));
+ val[2] = INTVAL (XVECEXP (op, 0, j * 2 + 8));
+ val[3] = INTVAL (XVECEXP (op, 0, j * 2 + 1 + 8));
+ if (val[0] != val[1] - 1 || val[2] != val[3] - 1)
+ ok = false;
+ if (val[0] != val[2] - 8 || val[1] != val[3] - 8)
+ ok = false;
+ if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6)
+ ok = false;
+ }
+ if (ok)
+ return ok;
+ }
+
+ if (ISA_HAS_MSA && mode == V8HImode)
+ {
+ /* We can use shf.w if the elements are in-order inner 32bit. */
+ ok = true;
+ for (j = 0; j < 4; j++)
+ {
+ val[0] = INTVAL (XVECEXP (op, 0, j * 2));
+ val[1] = INTVAL (XVECEXP (op, 0, j * 2 + 1));
+ if (val[0] != val[1] - 1)
+ ok = false;
+ if (val[0] != 0 && val[0] != 2 && val[0] != 4 && val[0] != 6)
+ ok = false;
+ }
+ if (ok)
+ return ok;
+ }
/* Check if we have the same 4-element sets. */
for (j = 0; j < nsets; j++, set = 4 * j)
@@ -22304,6 +22370,89 @@ mips_msa_vec_parallel_const_half (machine_mode mode, bool high_p)
return gen_rtx_PARALLEL (VOIDmode, v);
}
+/* Construct and return i8 of SHF.df. No error will happen since tt has
+ been constrained by mips_const_vector_shuffle_set_p.
+ Return (IMM | (INSN << 8)): The range of IMM is [0, 0xFF].
+ The INSN can be 0 (error)/1 (SHF.B)/2 (SHF.H)/4 (SHF.W). */
+
+HOST_WIDE_INT
+mips_msa_shf_i8 (rtx *operands)
+{
+ HOST_WIDE_INT rval = 0, val[16];
+ unsigned int i;
+ machine_mode mode = GET_MODE (operands[0]);
+ int which_op = 0;
+
+ /* We use shf.w to swap 2 doubleword part. */
+ if (mode == V2DImode || mode == V2DFmode)
+ {
+ val[0] = INTVAL (XVECEXP (operands[2], 0, 0));
+ val[1] = INTVAL (XVECEXP (operands[2], 0, 1));
+ val[3] = val[1] == 0 ? 1 : 3;
+ val[2] = val[1] == 0 ? 0 : 2;
+ val[1] = val[0] == 0 ? 1 : 3;
+ val[0] = val[0] == 0 ? 0 : 2;
+ which_op = 4;
+ }
+ else if (mode == V16QImode)
+ {
+ for (i = 0; i < 16; i++)
+ val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+ if (val[1] - val[0] == 1
+ && val[2] - val[1] == 1
+ && val[3] - val[2] == 1)
+ {
+ which_op = 4;
+ val[0] = val[0] / 4;
+ val[1] = val[4] / 4;
+ val[2] = val[8] / 4;
+ val[3] = val[12] / 4;
+ }
+ else if (val[1] - val[0] == 1
+ && val[3] - val[2] == 1)
+ {
+ which_op = 2;
+ val[0] = val[0] / 2;
+ val[1] = val[2] / 2;
+ val[2] = val[4] / 2;
+ val[3] = val[6] / 2;
+ }
+ else
+ which_op = 1;
+ }
+ else if (mode == V8HImode)
+ {
+ for (i = 0; i < 8; i++)
+ val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+ if (val[1] - val[0] == 1
+ && val[3] - val[2] == 1
+ && val[5] - val[4] == 1
+ && val[7] - val[6] == 1)
+ {
+ which_op = 4;
+ val[0] = val[0] / 2;
+ val[1] = val[2] / 2;
+ val[2] = val[4] / 2;
+ val[3] = val[6] / 2;
+ }
+ else
+ which_op = 2;
+ }
+ else if (mode == V4SImode || mode == V4SFmode)
+ {
+ for (i = 0; i < 4; i++)
+ val[i] = INTVAL (XVECEXP (operands[2], 0, i));
+ which_op = 4;
+ }
+
+ /* We convert the selection to an immediate. */
+ for (i = 0; i < 4; i++)
+ rval |= val[i] << (2 * i);
+
+ rval |= (which_op << 8);
+ return rval;
+}
+
/* A subroutine of mips_expand_vec_init, match constant vector elements. */
static inline bool