===================================================================
@@ -1115,12 +1115,13 @@
; vector registers. Make an attempt at removing unnecessary moves, though
; we're really at the mercy of the register allocator.
-(define_insn "move_lo_quad_v4si"
- [(set (match_operand:V4SI 0 "s_register_operand" "+w")
- (vec_concat:V4SI
- (match_operand:V2SI 1 "s_register_operand" "w")
- (vec_select:V2SI (match_dup 0)
- (parallel [(const_int 2) (const_int 3)]))))]
+(define_insn "neon_move_lo_quad_<mode>"
+ [(set (match_operand:ANY128 0 "s_register_operand" "+w")
+ (vec_concat:ANY128
+ (match_operand:<V_HALF> 1 "s_register_operand" "w")
+ (vec_select:<V_HALF>
+ (match_dup 0)
+ (match_operand:ANY128 2 "vect_par_constant_high" ""))))]
"TARGET_NEON"
{
int dest = REGNO (operands[0]);
@@ -1134,67 +1135,62 @@
[(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "move_lo_quad_v4sf"
- [(set (match_operand:V4SF 0 "s_register_operand" "+w")
- (vec_concat:V4SF
- (match_operand:V2SF 1 "s_register_operand" "w")
- (vec_select:V2SF (match_dup 0)
- (parallel [(const_int 2) (const_int 3)]))))]
+(define_insn "neon_move_hi_quad_<mode>"
+ [(set (match_operand:ANY128 0 "s_register_operand" "+w")
+ (vec_concat:ANY128
+ (match_operand:<V_HALF> 1 "s_register_operand" "w")
+ (vec_select:<V_HALF>
+ (match_dup 0)
+ (match_operand:ANY128 2 "vect_par_constant_low" ""))))]
"TARGET_NEON"
{
int dest = REGNO (operands[0]);
int src = REGNO (operands[1]);
if (dest != src)
- return "vmov\t%e0, %P1";
+ return "vmov\t%f0, %P1";
else
return "";
}
[(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "move_lo_quad_v8hi"
- [(set (match_operand:V8HI 0 "s_register_operand" "+w")
- (vec_concat:V8HI
- (match_operand:V4HI 1 "s_register_operand" "w")
- (vec_select:V4HI (match_dup 0)
- (parallel [(const_int 4) (const_int 5)
- (const_int 6) (const_int 7)]))))]
- "TARGET_NEON"
+(define_expand "move_hi_quad_<mode>"
+ [(match_operand:ANY128 0 "s_register_operand" "")
+ (match_operand:<V_HALF> 1 "s_register_operand" "")]
+ "TARGET_NEON"
{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
+ rtvec v = rtvec_alloc (<V_mode_nunits>/2);
+ rtx t1;
+ int i;
- if (dest != src)
- return "vmov\t%e0, %P1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
+ for (i=0; i < (<V_mode_nunits>/2); i++)
+ RTVEC_ELT (v, i) = GEN_INT (i);
-(define_insn "move_lo_quad_v16qi"
- [(set (match_operand:V16QI 0 "s_register_operand" "+w")
- (vec_concat:V16QI
- (match_operand:V8QI 1 "s_register_operand" "w")
- (vec_select:V8QI (match_dup 0)
- (parallel [(const_int 8) (const_int 9)
- (const_int 10) (const_int 11)
- (const_int 12) (const_int 13)
- (const_int 14) (const_int 15)]))))]
- "TARGET_NEON"
+ t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+ emit_insn (gen_neon_move_hi_quad_<mode> (operands[0], operands[1], t1));
+
+ DONE;
+})
+
+(define_expand "move_lo_quad_<mode>"
+ [(match_operand:ANY128 0 "s_register_operand" "")
+ (match_operand:<V_HALF> 1 "s_register_operand" "")]
+ "TARGET_NEON"
{
- int dest = REGNO (operands[0]);
- int src = REGNO (operands[1]);
+ rtvec v = rtvec_alloc (<V_mode_nunits>/2);
+ rtx t1;
+ int i;
- if (dest != src)
- return "vmov\t%e0, %P1";
- else
- return "";
-}
- [(set_attr "neon_type" "neon_bp_simple")]
-)
+ for (i=0; i < (<V_mode_nunits>/2); i++)
+ RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
+ t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+ emit_insn (gen_neon_move_lo_quad_<mode> (operands[0], operands[1], t1));
+
+ DONE;
+})
+
;; Reduction operations
(define_expand "reduc_splus_<mode>"
@@ -5179,3 +5175,38 @@
}
)
+
+(define_insn "vec_pack_trunc_<mode>"
+ [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
+ (vec_concat:<V_narrow_pack>
+ (truncate:<V_narrow>
+ (match_operand:VN 1 "register_operand" "w"))
+ (truncate:<V_narrow>
+ (match_operand:VN 2 "register_operand" "w"))))]
+ "TARGET_NEON"
+ "vmovn.i<V_sz_elem>\t%e0, %q1\n\tvmovn.i<V_sz_elem>\t%f0, %q2"
+ [(set_attr "neon_type" "neon_shift_1")]
+)
+
+;; For the non-quad case.
+(define_insn "neon_vec_pack_trunc_<mode>"
+ [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
+ (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "")))]
+ "TARGET_NEON"
+ "vmovn.i<V_sz_elem>\t%0, %q1"
+ [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(match_operand:<V_narrow_pack> 0 "register_operand" "")
+ (match_operand:VSHFT 1 "register_operand" "")
+ (match_operand:VSHFT 2 "register_operand")]
+ "TARGET_NEON"
+{
+ rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
+
+ emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
+ emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
+ emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
+ DONE;
+})
===================================================================
@@ -28,6 +28,8 @@
;; registers.
(define_mode_iterator ANY64 [DI DF V8QI V4HI V2SI V2SF])
+(define_mode_iterator ANY128 [V2DI V2DF V16QI V8HI V4SI V4SF])
+
;; A list of integer modes that are up to one word long
(define_mode_iterator QHSI [QI HI SI])
@@ -227,9 +229,13 @@
;; Narrower modes with the same number of elements.
(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")])
+;; Narrower modes with double the number of elements.
+(define_mode_attr V_narrow_pack [(V4SI "V8HI") (V8HI "V16QI") (V2DI "V4SI")
+ (V4HI "V8QI") (V2SI "V4HI") (DI "V2SI")])
+
;; Modes with half the number of equal-sized elements.
(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
- (V4SI "V2SI") (V4SF "V2SF")
+ (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF")
(V2DI "DI")])
;; Same, but lower-case.
@@ -239,7 +245,7 @@
;; Modes with twice the number of equal-sized elements.
(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
- (V2SI "V4SI") (V2SF "V4SF")
+ (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF")
(DI "V2DI")])
;; Same, but lower-case.
@@ -362,7 +368,8 @@
(V4HI "4") (V8HI "8")
(V2SI "2") (V4SI "4")
(V2SF "2") (V4SF "4")
- (DI "1") (V2DI "2")])
+ (DI "1") (V2DI "2")
+ (DF "1") (V2DF "2")])
;; Same as V_widen, but lower-case.
(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")])