@@ -3983,14 +3983,14 @@ __extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v16qi (__a, __b);
}
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v16qi (__a, __b);
}
__extension__ extern __inline uint8x16_t
@@ -4522,14 +4522,14 @@ __extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v16qi (__a, __b);
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v16qi (__a, __b);
}
__extension__ extern __inline int8x16_t
@@ -4823,14 +4823,14 @@ __extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v8hi (__a, __b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v8hi (__a, __b);
}
__extension__ extern __inline uint16x8_t
@@ -5362,14 +5362,14 @@ __extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot90v8hi (__a, __b);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b);
+ return __builtin_mve_vcaddq_rot270v8hi (__a, __b);
}
__extension__ extern __inline int16x8_t
@@ -5663,14 +5663,14 @@ __extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_uv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot90v4si (__a, __b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_uv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot270v4si (__a, __b);
}
__extension__ extern __inline uint32x4_t
@@ -6202,14 +6202,14 @@ __extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_sv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot90v4si (__a, __b);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_sv4si (__a, __b);
+ return __builtin_mve_vcaddq_rot270v4si (__a, __b);
}
__extension__ extern __inline int32x4_t
@@ -17380,42 +17380,42 @@ __extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_rot90_fv8hf (__a, __b);
+ return __builtin_mve_vcmulq_rot90v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_rot270_fv8hf (__a, __b);
+ return __builtin_mve_vcmulq_rot270v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_rot180_fv8hf (__a, __b);
+ return __builtin_mve_vcmulq_rot180v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcmulq_fv8hf (__a, __b);
+ return __builtin_mve_vcmulqv8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b);
+ return __builtin_mve_vcaddq_rot90v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
{
- return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b);
+ return __builtin_mve_vcaddq_rot270v8hf (__a, __b);
}
__extension__ extern __inline float16x8_t
@@ -17632,42 +17632,42 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot90_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_rot90_fv4sf (__a, __b);
+ return __builtin_mve_vcmulq_rot90v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot270_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_rot270_fv4sf (__a, __b);
+ return __builtin_mve_vcmulq_rot270v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_rot180_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_rot180_fv4sf (__a, __b);
+ return __builtin_mve_vcmulq_rot180v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmulq_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcmulq_fv4sf (__a, __b);
+ return __builtin_mve_vcmulqv4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b);
+ return __builtin_mve_vcaddq_rot90v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b);
+ return __builtin_mve_vcaddq_rot270v4sf (__a, __b);
}
__extension__ extern __inline float32x4_t
@@ -17822,28 +17822,28 @@ __extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaqv8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot180_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_rot180_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot180v8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot270_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_rot270_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot270v8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
- return __builtin_mve_vcmlaq_rot90_fv8hf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c);
}
__extension__ extern __inline float16x8_t
@@ -18130,28 +18130,28 @@ __extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaqv4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot180_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_rot180_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot180v4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot270_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_rot270_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot270v4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
- return __builtin_mve_vcmlaq_rot90_fv4sf (__a, __b, __c);
+ return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c);
}
__extension__ extern __inline float32x4_t
@@ -125,8 +125,6 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si)
@@ -202,8 +200,6 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si)
-VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si)
-VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si)
VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si)
@@ -264,12 +260,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vmaxnmq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vmaxnmavq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vmaxnmaq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, veorq_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf)
-VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf)
VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf)
@@ -472,10 +462,6 @@ VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmsq_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmasq_n_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_n_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_NONE, vfmaq_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180_f, v8hf, v4sf)
-VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_f, v8hf, v4sf)
VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrntq_n_s, v8hi, v4si)
VAR2 (TERNOP_NONE_NONE_NONE_IMM, vshrnbq_n_s, v8hi, v4si)
VAR2 (TERNOP_NONE_NONE_NONE_IMM, vrshrntq_n_s, v8hi, v4si)
@@ -904,3 +890,15 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si)
VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si)
+
+/* optabs without any suffixes. */
+VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf)
+VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180, v8hf, v4sf)
+VAR2 (BINOP_NONE_NONE_NONE, vcmulq, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot90, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot270, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq_rot180, v8hf, v4sf)
+VAR2 (TERNOP_NONE_NONE_NONE_NONE, vcmlaq, v8hf, v4sf)
@@ -310,7 +310,7 @@ (define_constraint "Dz"
"@internal
In ARM/Thumb-2 state a vector of constant zeros."
(and (match_code "const_vector")
- (match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
+ (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)")))
(define_constraint "Da"
"@internal
@@ -1168,6 +1168,21 @@ (define_int_attr rotsplit2 [(UNSPEC_VCMLA "90")
(UNSPEC_VCMLS "180")
(UNSPEC_VCMLS180 "180")])
+(define_int_attr mve_rotsplit1 [(UNSPEC_VCMLA "")
+ (UNSPEC_VCMLA180 "")
+ (UNSPEC_VCMUL "")
+ (UNSPEC_VCMUL180 "")
+ (UNSPEC_VCMLS "_rot270")
+ (UNSPEC_VCMLS180 "_rot90")])
+
+(define_int_attr mve_rotsplit2 [(UNSPEC_VCMLA "_rot90")
+ (UNSPEC_VCMLA180 "_rot270")
+ (UNSPEC_VCMUL "_rot90")
+ (UNSPEC_VCMUL180 "_rot270")
+ (UNSPEC_VCMLS "_rot180")
+ (UNSPEC_VCMLS180 "_rot180")])
+
+
(define_int_attr fcmac1 [(UNSPEC_VCMLA "a") (UNSPEC_VCMLA180 "a")
(UNSPEC_VCMLS "s") (UNSPEC_VCMLS180 "s")])
@@ -42,7 +42,7 @@ (define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U VADDLVQ_P_S
VADDLVQ_P_U VCMPNEQ_U VCMPNEQ_S VSHLQ_S VSHLQ_U VABDQ_S
VADDQ_N_S VADDVAQ_S VADDVQ_P_S VANDQ_S VBICQ_S
- VBRSRQ_N_S VCADDQ_ROT270_S VCADDQ_ROT90_S VCMPEQQ_S
+ VBRSRQ_N_S VCMPEQQ_S
VCMPEQQ_N_S VCMPNEQ_N_S VEORQ_S VHADDQ_S VHADDQ_N_S
VHSUBQ_S VHSUBQ_N_S VMAXQ_S VMAXVQ_S VMINQ_S VMINVQ_S
VMLADAVQ_S VMULHQ_S VMULLBQ_INT_S VMULLTQ_INT_S VMULQ_S
@@ -51,7 +51,7 @@ (define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
VQSUBQ_N_S VRHADDQ_S VRMULHQ_S VRSHLQ_S VRSHLQ_N_S
VRSHRQ_N_S VSHLQ_N_S VSHLQ_R_S VSUBQ_S VSUBQ_N_S
VABDQ_U VADDQ_N_U VADDVAQ_U VADDVQ_P_U VANDQ_U VBICQ_U
- VBRSRQ_N_U VCADDQ_ROT270_U VCADDQ_ROT90_U VCMPEQQ_U
+ VBRSRQ_N_U VCMPEQQ_U
VCMPEQQ_N_U VCMPNEQ_N_U VEORQ_U VHADDQ_U VHADDQ_N_U
VHSUBQ_U VHSUBQ_N_U VMAXQ_U VMAXVQ_U VMINQ_U VMINVQ_U
VMLADAVQ_U VMULHQ_U VMULLBQ_INT_U VMULLTQ_INT_U VMULQ_U
@@ -66,10 +66,9 @@ (define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
VQDMULHQ_S VQRDMULHQ_N_S VQRDMULHQ_S VQSHLUQ_N_S
VCMPCSQ_N_U VCMPCSQ_U VCMPHIQ_N_U VCMPHIQ_U VABDQ_M_S
VABDQ_M_U VABDQ_F VADDQ_N_F VANDQ_F VBICQ_F
- VCADDQ_ROT270_F VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F
+ VCMPEQQ_F VCMPEQQ_N_F
VCMPGEQ_F VCMPGEQ_N_F VCMPGTQ_F VCMPGTQ_N_F VCMPLEQ_F
VCMPLEQ_N_F VCMPLTQ_F VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F
- VCMULQ_F VCMULQ_ROT180_F VCMULQ_ROT270_F VCMULQ_ROT90_F
VEORQ_F VMAXNMAQ_F VMAXNMAVQ_F VMAXNMQ_F VMAXNMVQ_F
VMINNMAQ_F VMINNMAVQ_F VMINNMQ_F VMINNMVQ_F VMULQ_F
VMULQ_N_F VORNQ_F VORRQ_F VSUBQ_F VADDLVAQ_U
@@ -112,18 +111,18 @@ (define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
VMLSDAVAXQ_S VMLSDAVAQ_S VMLADAVAXQ_S
VCMPGEQ_M_F VCMPGTQ_M_N_F VMLSLDAVQ_P_S VRMLALDAVHAXQ_S
VMLSLDAVXQ_P_S VFMAQ_F VMLSLDAVAQ_S VQSHRUNBQ_N_S
- VQRSHRUNTQ_N_S VCMLAQ_F VMINNMAQ_M_F VFMASQ_N_F
+ VQRSHRUNTQ_N_S VMINNMAQ_M_F VFMASQ_N_F
VDUPQ_M_N_F VCMPGTQ_M_F VCMPLTQ_M_F VRMLSLDAVHQ_P_S
VQSHRUNTQ_N_S VABSQ_M_F VMAXNMAVQ_P_F VFMAQ_N_F
VRMLSLDAVHXQ_P_S VREV32Q_M_F VRMLSLDAVHAQ_S
VRMLSLDAVHAXQ_S VCMPLTQ_M_N_F VCMPNEQ_M_F VRNDAQ_M_F
VRNDPQ_M_F VADDLVAQ_P_S VQMOVUNBQ_M_S VCMPLEQ_M_F
- VCMLAQ_ROT180_F VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F
- VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F VCMLAQ_ROT90_F
+ VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F
+ VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F
VQMOVUNTQ_M_S VREV64Q_M_F VNEGQ_M_F VRNDMQ_M_F
VCMPLEQ_M_N_F VCMPGEQ_M_N_F VRNDNQ_M_F VMINNMAVQ_P_F
VCMPNEQ_M_N_F VRMLALDAVHQ_P_S VRMLALDAVHXQ_P_S
- VCMPEQQ_M_N_F VCMLAQ_ROT270_F VMAXNMAQ_M_F VRNDQ_M_F
+ VCMPEQQ_M_N_F VMAXNMAQ_M_F VRNDQ_M_F
VMLALDAVQ_P_U VMLALDAVQ_P_S VQMOVNBQ_M_S VQMOVNBQ_M_U
VMOVLTQ_M_U VMOVLTQ_M_S VMOVNBQ_M_U VMOVNBQ_M_S
VRSHRNTQ_N_U VRSHRNTQ_N_S VORRQ_M_N_S VORRQ_M_N_U
@@ -240,9 +239,8 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
(VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
(VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
(VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
- (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
- (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
- (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
+ (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
+ (VCMPEQQ_S "s") (VCMPEQQ_U "u")
(VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
(VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
(VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
@@ -421,6 +419,19 @@ (define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32")
(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
(V8HF "=w") (V4SF "=&w")])
+(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90")
+ (UNSPEC_VCADD270 "_rot270")
+ (UNSPEC_VCMLA "")
+ (UNSPEC_VCMLA90 "_rot90")
+ (UNSPEC_VCMLA180 "_rot180")
+ (UNSPEC_VCMLA270 "_rot270")
+ (UNSPEC_VCMUL "")
+ (UNSPEC_VCMUL90 "_rot90")
+ (UNSPEC_VCMUL180 "_rot180")
+ (UNSPEC_VCMUL270 "_rot270")])
+
+(define_int_iterator VCMUL [UNSPEC_VCMUL UNSPEC_VCMUL90 UNSPEC_VCMUL180 UNSPEC_VCMUL270])
+
(define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
@@ -454,8 +465,6 @@ (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
-(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
-(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
@@ -1585,34 +1594,28 @@ (define_insn "mve_vbrsrq_n_<supf><mode>"
])
;;
-;; [vcaddq_rot270_s, vcaddq_rot270_u])
+;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
-(define_insn "mve_vcaddq_rot270_<supf><mode>"
+(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
(match_operand:MVE_2 2 "s_register_operand" "w")]
- VCADDQ_ROT270))
+ VCADD))
]
"TARGET_HAVE_MVE"
- "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #270"
+ "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
-;;
-;; [vcaddq_rot90_u, vcaddq_rot90_s])
-;;
-(define_insn "mve_vcaddq_rot90_<supf><mode>"
- [
- (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
- (match_operand:MVE_2 2 "s_register_operand" "w")]
- VCADDQ_ROT90))
- ]
+;; Auto vectorizer pattern for int vcadd
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:MVE_2 0 "register_operand")
+ (unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand")
+ (match_operand:MVE_2 2 "register_operand")]
+ VCADD))]
"TARGET_HAVE_MVE"
- "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #90"
- [(set_attr "type" "mve_move")
-])
+)
;;
;; [vcmpcsq_n_u])
@@ -2665,32 +2668,17 @@ (define_insn "mve_vbicq_n_<supf><mode>"
])
;;
-;; [vcaddq_rot270_f])
+;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270])
;;
-(define_insn "mve_vcaddq_rot270_f<mode>"
+(define_insn "mve_vcaddq<mve_rot><mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")]
- VCADDQ_ROT270_F))
+ VCADD))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcaddq_rot90_f])
-;;
-(define_insn "mve_vcaddq_rot90_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCADDQ_ROT90_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90"
+ "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
@@ -2875,62 +2863,17 @@ (define_insn "mve_vcmpneq_n_f<mode>"
])
;;
-;; [vcmulq_f])
-;;
-(define_insn "mve_vcmulq_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #0"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmulq_rot180_f])
-;;
-(define_insn "mve_vcmulq_rot180_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_ROT180_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #180"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmulq_rot270_f])
-;;
-(define_insn "mve_vcmulq_rot270_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_ROT270_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #270"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmulq_rot90_f])
+;; [vcmulq, vcmulq_rot90, vcmulq_rot180, vcmulq_rot270])
;;
-(define_insn "mve_vcmulq_rot90_f<mode>"
+(define_insn "mve_vcmulq<mve_rot><mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
(match_operand:MVE_0 2 "s_register_operand" "w")]
- VCMULQ_ROT90_F))
+ VCMUL))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #90"
+ "vcmul.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>"
[(set_attr "type" "mve_move")
])
@@ -4692,66 +4635,20 @@ (define_insn "mve_vaddlvaq_p_<supf>v4si"
[(set_attr "type" "mve_move")
(set_attr "length""8")])
;;
-;; [vcmlaq_f])
-;;
-(define_insn "mve_vcmlaq_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #0"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmlaq_rot180_f])
-;;
-(define_insn "mve_vcmlaq_rot180_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_ROT180_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #180"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmlaq_rot270_f])
-;;
-(define_insn "mve_vcmlaq_rot270_f<mode>"
- [
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_ROT270_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #270"
- [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vcmlaq_rot90_f])
+;; [vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270])
;;
-(define_insn "mve_vcmlaq_rot90_f<mode>"
+(define_insn "mve_vcmlaq<mve_rot><mode>"
[
- (set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
- (match_operand:MVE_0 2 "s_register_operand" "w")
- (match_operand:MVE_0 3 "s_register_operand" "w")]
- VCMLAQ_ROT90_F))
+ (set (match_operand:MVE_0 0 "s_register_operand" "=w,w")
+ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0,Dz")
+ (match_operand:MVE_0 2 "s_register_operand" "w,w")
+ (match_operand:MVE_0 3 "s_register_operand" "w,w")]
+ VCMLA))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
- "vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #90"
+ "@
+ vcmla.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>
+ vcmul.f%#<V_sz_elem> %q0, %q2, %q3, #<rot>"
[(set_attr "type" "mve_move")
])
@@ -3174,14 +3174,6 @@ (define_insn "neon_vcadd<rot><mode>"
[(set_attr "type" "neon_fcadd")]
)
-(define_expand "cadd<rot><mode>3"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF [(match_operand:VF 1 "register_operand")
- (match_operand:VF 2 "register_operand")]
- VCADD))]
- "TARGET_COMPLEX"
-)
-
(define_insn "neon_vcmla<rot><mode>"
[(set (match_operand:VF 0 "register_operand" "=w")
(plus:VF (match_operand:VF 1 "register_operand" "0")
@@ -3238,32 +3230,13 @@ (define_insn "neon_vcmlaq_lane<rot><mode>"
[(set_attr "type" "neon_fcmla")]
)
-
-;; The complex mla/mls operations always need to expand to two instructions.
-;; The first operation does half the computation and the second does the
-;; remainder. Because of this, expand early.
-(define_expand "cml<fcmac1><rot_op><mode>4"
- [(set (match_operand:VF 0 "register_operand")
- (plus:VF (match_operand:VF 1 "register_operand")
- (unspec:VF [(match_operand:VF 2 "register_operand")
- (match_operand:VF 3 "register_operand")]
- VCMLA_OP)))]
- "TARGET_COMPLEX"
-{
- emit_insn (gen_neon_vcmla<rotsplit1><mode> (operands[0], operands[1],
- operands[2], operands[3]));
- emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], operands[0],
- operands[2], operands[3]));
- DONE;
-})
-
;; The complex mul operations always need to expand to two instructions.
;; The first operation does half the computation and the second does the
;; remainder. Because of this, expand early.
(define_expand "cmul<rot_op><mode>3"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF [(match_operand:VF 1 "register_operand")
- (match_operand:VF 2 "register_operand")]
+ [(set (match_operand:VDF 0 "register_operand")
+ (unspec:VDF [(match_operand:VDF 1 "register_operand")
+ (match_operand:VDF 2 "register_operand")]
VCMUL_OP))]
"TARGET_COMPLEX"
{
@@ -3276,6 +3249,7 @@ (define_expand "cmul<rot_op><mode>3"
DONE;
})
+
;; These instructions map to the __builtins for the Dot Product operations.
(define_insn "neon_<sup>dot<vsi2qi>"
[(set (match_operand:VCVTI 0 "register_operand" "=w")
@@ -511,7 +511,9 @@ (define_c_enum "unspec" [
UNSPEC_VCMLA180
UNSPEC_VCMLA270
UNSPEC_VCMUL
+ UNSPEC_VCMUL90
UNSPEC_VCMUL180
+ UNSPEC_VCMUL270
UNSPEC_VCMLS
UNSPEC_VCMLS180
UNSPEC_MATMUL_S
@@ -191,3 +191,70 @@ (define_expand "vec_set<mode>"
GEN_INT (elem), operands[0]));
DONE;
})
+
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF [(match_operand:VF 1 "register_operand")
+ (match_operand:VF 2 "register_operand")]
+ VCADD))]
+ "TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_NEON_<MODE>_ARITH)"
+)
+
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cmul<rot_op><mode>3"
+ [(set (match_operand:VQ_HSF 0 "register_operand")
+ (unspec:VQ_HSF [(match_operand:VQ_HSF 1 "register_operand")
+ (match_operand:VQ_HSF 2 "register_operand")]
+ VCMUL_OP))]
+ "TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT)"
+{
+ if (TARGET_COMPLEX)
+ {
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (tmp, CONST0_RTX (<MODE>mode));
+ emit_insn (gen_neon_vcmla<rotsplit1><mode> (operands[0], tmp,
+ operands[1], operands[2]));
+ emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], operands[0],
+ operands[1], operands[2]));
+ }
+ else
+ {
+ emit_insn (gen_mve_vcmulq<mve_rotsplit1><mode> (operands[0], operands[1],
+ operands[2]));
+ emit_insn (gen_mve_vcmulq<mve_rotsplit2><mode> (operands[0], operands[1],
+ operands[2]));
+ }
+ DONE;
+})
+
+(define_expand "arm_vcmla<rot><mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (plus:VF (match_operand:VF 1 "register_operand")
+ (unspec:VF [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")]
+ VCMLA)))]
+ "TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_NEON_<MODE>_ARITH)"
+)
+
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder. Because of this, expand early.
+(define_expand "cml<fcmac1><rot_op><mode>4"
+ [(set (match_operand:VF 0 "register_operand")
+ (plus:VF (match_operand:VF 1 "register_operand")
+ (unspec:VF [(match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand")]
+ VCMLA_OP)))]
+ "TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
+ && ARM_HAVE_NEON_<MODE>_ARITH)"
+{
+ emit_insn (gen_arm_vcmla<rotsplit1><mode> (operands[0], operands[1],
+ operands[2], operands[3]));
+ emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], operands[0],
+ operands[2], operands[3]));
+ DONE;
+})