@@ -3981,14 +3981,16 @@ __extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b);
+ return (uint8x16_t)
+ __builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b);
}
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b);
+ return (uint8x16_t)
+ __builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b);
}
__extension__ extern __inline uint8x16_t
@@ -4520,14 +4522,14 @@ __extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v16qi (__a, __b);
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v16qi (__a, __b);
}
__extension__ extern __inline int8x16_t
@@ -4821,14 +4823,16 @@ __extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b);
+ return (uint16x8_t)
+ __builtin_mve_vcaddq_rot90v8hi ((int16x8_t)__a, (int16x8_t)__b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b);
+ return (uint16x8_t)
+ __builtin_mve_vcaddq_rot270v8hi ((int16x8_t)__a, (int16x8_t)__b);
}
__extension__ extern __inline uint16x8_t
@@ -5360,14 +5364,14 @@ __extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v8hi (__a, __b);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v8hi (__a, __b);
}
__extension__ extern __inline int16x8_t
@@ -5661,14 +5665,16 @@ __extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv4si (__a, __b);
+ return (uint32x4_t)
+ __builtin_mve_vcaddq_rot90v4si ((int32x4_t)__a, (int32x4_t)__b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv4si (__a, __b);
+ return (uint32x4_t)
+ __builtin_mve_vcaddq_rot270v4si ((int32x4_t)__a, (int32x4_t)__b);
}
__extension__ extern __inline uint32x4_t
@@ -6200,14 +6206,14 @@ __extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot90v4si (__a, __b);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot270v4si (__a, __b);
}
__extension__ extern __inline int32x4_t
@@ -17370,14 +17376,14 @@ __extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b);
+ return __builtin_mve_vcaddq_rot90v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b);
+ return __builtin_mve_vcaddq_rot270v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
@@ -17622,14 +17628,14 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b);
+ return __builtin_mve_vcaddq_rot90v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b);
+ return __builtin_mve_vcaddq_rot270v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
@@ -125,8 +125,6 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si)
@@ -202,8 +200,6 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si)
-VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si)
-VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si)
@@ -268,8 +264,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf)
@@ -892,3 +886,7 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si)
+
+/* optabs without any suffixes. */
+VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf)
+VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf)
@@ -310,7 +310,7 @@ (define_constraint "Dz"
"@internal
In ARM/Thumb-2 state a vector of constant zeros."
(and (match_code "const_vector")
- (match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
+ (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)")))
(define_constraint "Da"
"@internal
@@ -1182,6 +1182,9 @@ (define_int_attr rot [(UNSPEC_VCADD90 "90")
(UNSPEC_VCMLA180 "180")
(UNSPEC_VCMLA270 "270")])
+(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90")
+ (UNSPEC_VCADD270 "_rot270")])
+
(define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8")
(UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8")
(UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8")
@@ -1232,10 +1235,8 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
(VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
(VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
(VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
- (VADDVQ_P_S "s") (VADDVQ_P_U "u")
- (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
- (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
- (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
+ (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBRSRQ_N_S "s")
+ (VBRSRQ_N_U "u") (VCMPEQQ_S "s") (VCMPEQQ_U "u")
(VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
(VCMPNEQ_N_U "u")
(VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
@@ -1500,8 +1501,6 @@ (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
-(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
-(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
@@ -962,34 +962,28 @@ (define_insn "mve_vbrsrq_n_<supf><mode>"
])
;;
-;; [vcaddq_rot270_s, vcaddq_rot270_u])
+;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
-(define_insn "mve_vcaddq_rot270_<supf><mode>"
+(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")]
- VCADDQ_ROT270))
+ VCADD))
]
"TARGET_HAVE_MVE"
- "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #270"
+ "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
-;;
-;; [vcaddq_rot90_u, vcaddq_rot90_s])
-;;
-(define_insn "mve_vcaddq_rot90_<supf><mode>"
- [
- (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
- (match_operand:MVE_2 2 "s_register_operand" "w")]
- VCADDQ_ROT90))
- ]
- "TARGET_HAVE_MVE"
- "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #90"
- [(set_attr "type" "mve_move")
-])
+;; Auto vectorizer pattern for int vcadd
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:MVE_2 0 "register_operand")
+ (unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand")
+ (match_operand:MVE_2 2 "register_operand")]
+ VCADD))]
+ "TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN"
+)
;;
;; [vcmpcsq_n_u])
@@ -2102,32 +2096,17 @@ (define_insn "mve_vbicq_n_<supf><mode>"
])
;;
-;; [vcaddq_rot270_f])
-;;
-(define_insn "mve_vcaddq_rot270_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCADDQ_ROT270_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcaddq_rot90_f])
+;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
-(define_insn "mve_vcaddq_rot90_f<mode>"
+(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")]
- VCADDQ_ROT90_F))
+ VCADD))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90"
+ "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
@@ -598,8 +598,6 @@ (define_c_enum "unspec" [
VADDVAQ_S
VADDVQ_P_S
VBRSRQ_N_S
- VCADDQ_ROT270_S
- VCADDQ_ROT90_S
VCMPEQQ_S
VCMPEQQ_N_S
VCMPNEQ_N_S
@@ -641,8 +639,6 @@ (define_c_enum "unspec" [
VADDVAQ_U
VADDVQ_P_U
VBRSRQ_N_U
- VCADDQ_ROT270_U
- VCADDQ_ROT90_U
VCMPEQQ_U
VCMPEQQ_N_U
VCMPNEQ_N_U
@@ -709,8 +705,6 @@ (define_c_enum "unspec" [
VABDQ_M_U
VABDQ_F
VADDQ_N_F
- VCADDQ_ROT270_F
- VCADDQ_ROT90_F
VCMPEQQ_F
VCMPEQQ_N_F
VCMPGEQ_F
@@ -205,3 +205,13 @@ (define_expand "neg<mode>2"
(neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
"ARM_HAVE_<MODE>_ARITH"
)
+
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF [(match_operand:VF 1 "register_operand")
+ (match_operand:VF 2 "register_operand")]
+ VCADD))]
+ "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN"
+)
+