@@ -39,6 +39,8 @@
UNSPEC_LOONGSON_PUNPCKL
UNSPEC_LOONGSON_PADDD
UNSPEC_LOONGSON_PSUBD
+ UNSPEC_LOONGSON_DSLL
+ UNSPEC_LOONGSON_DSRL
])
;; Mode iterators and attributes.
@@ -58,6 +60,9 @@
;; 64-bit vectors of words and halfwords.
(define_mode_iterator VWH [V2SI V4HI])
+;; 64-bit vectors of words and bytes
+(define_mode_iterator VWB [V2SI V8QI])
+
;; 64-bit vectors of words, halfwords and bytes.
(define_mode_iterator VWHB [V2SI V4HI V8QI])
@@ -404,39 +409,61 @@
})
;; Maximum of signed halfwords.
-(define_insn "smax<mode>3"
- [(set (match_operand:VH 0 "register_operand" "=f")
- (smax:VH (match_operand:VH 1 "register_operand" "f")
- (match_operand:VH 2 "register_operand" "f")))]
+(define_insn "smaxv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=f")
+ (smax:V4HI (match_operand:V4HI 1 "register_operand" "f")
+ (match_operand:V4HI 2 "register_operand" "f")))]
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
- "pmaxs<V_suffix>\t%0,%1,%2"
+ "pmaxsh\t%0,%1,%2"
[(set_attr "type" "fadd")])
+(define_expand "smax<mode>3"
+ [(match_operand:VWB 0 "register_operand" "")
+ (match_operand:VWB 1 "register_operand" "")
+ (match_operand:VWB 2 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ mips_expand_vec_minmax (operands[0], operands[1], operands[2],
+ gen_loongson_pcmpgt<V_suffix>, false);
+ DONE;
+})
+
;; Maximum of unsigned bytes.
-(define_insn "umax<mode>3"
- [(set (match_operand:VB 0 "register_operand" "=f")
- (umax:VB (match_operand:VB 1 "register_operand" "f")
- (match_operand:VB 2 "register_operand" "f")))]
+(define_insn "umaxv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=f")
+ (umax:V8QI (match_operand:V8QI 1 "register_operand" "f")
+ (match_operand:V8QI 2 "register_operand" "f")))]
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
- "pmaxu<V_suffix>\t%0,%1,%2"
+ "pmaxub\t%0,%1,%2"
[(set_attr "type" "fadd")])
;; Minimum of signed halfwords.
-(define_insn "smin<mode>3"
- [(set (match_operand:VH 0 "register_operand" "=f")
- (smin:VH (match_operand:VH 1 "register_operand" "f")
- (match_operand:VH 2 "register_operand" "f")))]
+(define_insn "sminv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=f")
+ (smin:V4HI (match_operand:V4HI 1 "register_operand" "f")
+ (match_operand:V4HI 2 "register_operand" "f")))]
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
- "pmins<V_suffix>\t%0,%1,%2"
+ "pminsh\t%0,%1,%2"
[(set_attr "type" "fadd")])
+(define_expand "smin<mode>3"
+ [(match_operand:VWB 0 "register_operand" "")
+ (match_operand:VWB 1 "register_operand" "")
+ (match_operand:VWB 2 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ mips_expand_vec_minmax (operands[0], operands[1], operands[2],
+ gen_loongson_pcmpgt<V_suffix>, true);
+ DONE;
+})
+
;; Minimum of unsigned bytes.
-(define_insn "umin<mode>3"
- [(set (match_operand:VB 0 "register_operand" "=f")
- (umin:VB (match_operand:VB 1 "register_operand" "f")
- (match_operand:VB 2 "register_operand" "f")))]
+(define_insn "uminv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=f")
+ (umin:V8QI (match_operand:V8QI 1 "register_operand" "f")
+ (match_operand:V8QI 2 "register_operand" "f")))]
"TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
- "pminu<V_suffix>\t%0,%1,%2"
+ "pminub\t%0,%1,%2"
[(set_attr "type" "fadd")])
;; Move byte mask.
@@ -506,6 +533,14 @@
"biadd\t%0,%1"
[(set_attr "type" "fabs")])
+(define_insn "reduc_uplus_v8qi"
+ [(set (match_operand:V8QI 0 "register_operand" "=f")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")]
+ UNSPEC_LOONGSON_BIADD))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "biadd\t%0,%1"
+ [(set_attr "type" "fabs")])
+
;; Sum of absolute differences.
(define_insn "loongson_psadbh"
[(set (match_operand:<V_stretch_half> 0 "register_operand" "=f")
@@ -620,6 +655,20 @@
"punpckhhw\t%0,%1,%2"
[(set_attr "type" "fdiv")])
+(define_insn "loongson_punpckhhw_qi"
+ [(set (match_operand:V8QI 0 "register_operand" "=f")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "f")
+ (match_operand:V8QI 2 "register_operand" "f"))
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 12) (const_int 13)
+ (const_int 6) (const_int 7)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "punpckhhw\t%0,%1,%2"
+ [(set_attr "type" "fdiv")])
+
(define_insn "loongson_punpckhwd"
[(set (match_operand:V2SI 0 "register_operand" "=f")
(vec_select:V2SI
@@ -631,6 +680,32 @@
"punpckhwd\t%0,%1,%2"
[(set_attr "type" "fcvt")])
+(define_insn "loongson_punpckhwd_qi"
+ [(set (match_operand:V8QI 0 "register_operand" "=f")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "f")
+ (match_operand:V8QI 2 "register_operand" "f"))
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "punpckhwd\t%0,%1,%2"
+ [(set_attr "type" "fcvt")])
+
+(define_insn "loongson_punpckhwd_hi"
+ [(set (match_operand:V4HI 0 "register_operand" "=f")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "register_operand" "f")
+ (match_operand:V4HI 2 "register_operand" "f"))
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "punpckhwd\t%0,%1,%2"
+ [(set_attr "type" "fcvt")])
+
;; Unpack low data.
(define_insn "loongson_punpcklbh"
[(set (match_operand:V8QI 0 "register_operand" "=f")
@@ -658,6 +733,20 @@
"punpcklhw\t%0,%1,%2"
[(set_attr "type" "fdiv")])
+(define_insn "*loongson_punpcklhw_qi"
+ [(set (match_operand:V8QI 0 "register_operand" "=f")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "f")
+ (match_operand:V8QI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 8) (const_int 9)
+ (const_int 2) (const_int 3)
+ (const_int 10) (const_int 11)])))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "punpcklhw\t%0,%1,%2"
+ [(set_attr "type" "fdiv")])
+
(define_insn "loongson_punpcklwd"
[(set (match_operand:V2SI 0 "register_operand" "=f")
(vec_select:V2SI
@@ -669,6 +758,32 @@
"punpcklwd\t%0,%1,%2"
[(set_attr "type" "fcvt")])
+(define_insn "*loongson_punpcklwd_qi"
+ [(set (match_operand:V8QI 0 "register_operand" "=f")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "f")
+ (match_operand:V8QI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)])))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "punpcklwd\t%0,%1,%2"
+ [(set_attr "type" "fcvt")])
+
+(define_insn "*loongson_punpcklwd_hi"
+ [(set (match_operand:V4HI 0 "register_operand" "=f")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "register_operand" "f")
+ (match_operand:V4HI 2 "register_operand" "f"))
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 4) (const_int 5)])))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "punpcklwd\t%0,%1,%2"
+ [(set_attr "type" "fcvt")])
+
(define_expand "vec_perm_const<mode>"
[(match_operand:VWHB 0 "register_operand" "")
(match_operand:VWHB 1 "register_operand" "")
@@ -718,6 +833,81 @@
DONE;
})
+;; Whole vector shifts, used for reduction epilogues.
+(define_insn "vec_shl_<mode>"
+ [(set (match_operand:VWHBDI 0 "register_operand" "=f")
+ (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
+ (match_operand:SI 2 "register_operand" "f")]
+ UNSPEC_LOONGSON_DSLL))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "dsll\t%0,%1,%2"
+ [(set_attr "type" "fcvt")])
+
+(define_insn "vec_shr_<mode>"
+ [(set (match_operand:VWHBDI 0 "register_operand" "=f")
+ (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f")
+ (match_operand:SI 2 "register_operand" "f")]
+ UNSPEC_LOONGSON_DSRL))]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+ "dsrl\t%0,%1,%2"
+ [(set_attr "type" "fcvt")])
+
+(define_expand "reduc_uplus_<mode>"
+ [(match_operand:VWH 0 "register_operand" "")
+ (match_operand:VWH 1 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ mips_expand_vec_reduc (operands[0], operands[1], gen_add<mode>3);
+ DONE;
+})
+
+; ??? Given that we're not describing a widening reduction, we should
+; not have separate optabs for signed and unsigned.
+(define_expand "reduc_splus_<mode>"
+ [(match_operand:VWHB 0 "register_operand" "")
+ (match_operand:VWHB 1 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ emit_insn (gen_reduc_uplus_<mode>(operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+ [(match_operand:VWHB 0 "register_operand" "")
+ (match_operand:VWHB 1 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ mips_expand_vec_reduc (operands[0], operands[1], gen_smax<mode>3);
+ DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+ [(match_operand:VWHB 0 "register_operand" "")
+ (match_operand:VWHB 1 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ mips_expand_vec_reduc (operands[0], operands[1], gen_smin<mode>3);
+ DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+ [(match_operand:VB 0 "register_operand" "")
+ (match_operand:VB 1 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ mips_expand_vec_reduc (operands[0], operands[1], gen_umax<mode>3);
+ DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+ [(match_operand:VB 0 "register_operand" "")
+ (match_operand:VB 1 "register_operand" "")]
+ "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
+{
+ mips_expand_vec_reduc (operands[0], operands[1], gen_umin<mode>3);
+ DONE;
+})
+
;; Integer division and modulus. For integer multiplication, see mips.md.
(define_insn "<u>div<mode>3"
@@ -330,6 +330,9 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs,
extern void mips_expand_vector_init (rtx, rtx);
extern bool mips_expand_vec_perm_const (rtx op[4]);
extern void mips_expand_vec_unpack (rtx op[2], bool, bool);
+extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx));
+extern void mips_expand_vec_minmax (rtx, rtx, rtx,
+ rtx (*) (rtx, rtx, rtx), bool);
extern bool mips_eh_uses (unsigned int);
extern bool mips_epilogue_uses (unsigned int);
@@ -16834,6 +16834,83 @@ mips_expand_vector_init (rtx target, rtx vals)
mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals);
}
+
+/* Expand a vector reduction. */
+
+void
+mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx))
+{
+ enum machine_mode vmode = GET_MODE (in);
+ unsigned char perm2[2];
+ rtx tmp;
+ bool ok;
+
+ tmp = gen_reg_rtx (vmode);
+ switch (vmode)
+ {
+ case V2SFmode:
+ /* Use PUL/PLU to produce { L, H } op { H, L }.
+ By reversing the pair order, rather a pure interleave high,
+ we don't produce erroneous exceptional conditions. */
+ perm2[0] = 1;
+ perm2[1] = 2;
+ ok = expand_vselect_vconcat (tmp, in, in, perm2, 2);
+ gcc_assert (ok);
+ break;
+
+ case V2SImode:
+ /* Use interleave to produce { H, L } op { H, H }. */
+ emit_insn (gen_loongson_punpckhwd (tmp, in, in));
+ break;
+
+ case V4HImode:
+ /* Perform the first reduction with interleave,
+ and subsequent reductions with shifts. */
+ emit_insn (gen_loongson_punpckhwd_hi (tmp, in, in));
+ emit_insn (gen (in, in, tmp));
+ emit_insn (gen_vec_shr_v4hi (tmp, in, force_reg (SImode, GEN_INT (16))));
+ break;
+
+ case V8QImode:
+ emit_insn (gen_loongson_punpckhwd_qi (tmp, in, in));
+ emit_insn (gen (in, in, tmp));
+ emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (16))));
+ emit_insn (gen (in, in, tmp));
+ emit_insn (gen_vec_shr_v8qi (tmp, in, force_reg (SImode, GEN_INT (8))));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ emit_insn (gen (target, in, tmp));
+}
+
+/* Expand a vector minimum/maximum. */
+
+void
+mips_expand_vec_minmax (rtx target, rtx op0, rtx op1,
+ rtx (*cmp) (rtx, rtx, rtx), bool min_p)
+{
+ enum machine_mode vmode = GET_MODE (target);
+ rtx tc, t0, t1, x;
+
+ tc = gen_reg_rtx (vmode);
+ t0 = gen_reg_rtx (vmode);
+ t1 = gen_reg_rtx (vmode);
+
+ /* op0 > op1 */
+ emit_insn (cmp (tc, op0, op1));
+
+ x = gen_rtx_AND (vmode, tc, (min_p ? op1 : op0));
+ emit_insn (gen_rtx_SET (VOIDmode, t0, x));
+
+ x = gen_rtx_NOT (vmode, tc);
+ x = gen_rtx_AND (vmode, x, (min_p ? op0 : op1));
+ emit_insn (gen_rtx_SET (VOIDmode, t1, x));
+
+ x = gen_rtx_IOR (vmode, t0, t1);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP