Message ID | 20240904132650.2720446-10-christophe.lyon@linaro.org |
---|---|
State | New |
Headers | show |
Series | arm: [MVE intrinsics] Re-implement more intrinsics | expand |
On 04/09/2024 14:26, Christophe Lyon wrote: > Implement vcvtbq_f16_f32, vcvttq_f16_f32, vcvtbq_f32_f16 and > vcvttq_f32_f16 using the new MVE builtins framework. > > 2024-07-11 Christophe Lyon <christophe.lyon@linaro.org> > > gcc/ > * config/arm/arm-mve-builtins-base.cc (class vcvtxq_impl): New. > (vcvtbq, vcvttq): New. > * config/arm/arm-mve-builtins-base.def (vcvtbq, vcvttq): New. > * config/arm/arm-mve-builtins-base.h (vcvtbq, vcvttq): New. > * config/arm/arm-mve-builtins.cc (cvt_f16_f32, cvt_f32_f16): New > types. > (function_instance::has_inactive_argument): Support vcvtbq and > vcvttq. > * config/arm/arm_mve.h (vcvttq_f32): Delete. > (vcvtbq_f32): Delete. > (vcvtbq_m): Delete. > (vcvttq_m): Delete. > (vcvttq_f32_f16): Delete. > (vcvtbq_f32_f16): Delete. > (vcvttq_f16_f32): Delete. > (vcvtbq_f16_f32): Delete. > (vcvtbq_m_f16_f32): Delete. > (vcvtbq_m_f32_f16): Delete. > (vcvttq_m_f16_f32): Delete. > (vcvttq_m_f32_f16): Delete. > (vcvtbq_x_f32_f16): Delete. > (vcvttq_x_f32_f16): Delete. > (__arm_vcvttq_f32_f16): Delete. > (__arm_vcvtbq_f32_f16): Delete. > (__arm_vcvttq_f16_f32): Delete. > (__arm_vcvtbq_f16_f32): Delete. > (__arm_vcvtbq_m_f16_f32): Delete. > (__arm_vcvtbq_m_f32_f16): Delete. > (__arm_vcvttq_m_f16_f32): Delete. > (__arm_vcvttq_m_f32_f16): Delete. > (__arm_vcvtbq_x_f32_f16): Delete. > (__arm_vcvttq_x_f32_f16): Delete. > (__arm_vcvttq_f32): Delete. > (__arm_vcvtbq_f32): Delete. > (__arm_vcvtbq_m): Delete. > (__arm_vcvttq_m): Delete. OK. R. > --- > gcc/config/arm/arm-mve-builtins-base.cc | 56 +++++++++ > gcc/config/arm/arm-mve-builtins-base.def | 4 + > gcc/config/arm/arm-mve-builtins-base.h | 2 + > gcc/config/arm/arm-mve-builtins.cc | 12 ++ > gcc/config/arm/arm_mve.h | 146 ----------------------- > 5 files changed, 74 insertions(+), 146 deletions(-) > > diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc > index a780d686eb1..760378c91b1 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.cc > +++ b/gcc/config/arm/arm-mve-builtins-base.cc > @@ -251,6 +251,60 @@ public: > } > }; > > + /* Implements vcvt[bt]q_f32_f16 and vcvt[bt]q_f16_f32 > + intrinsics. */ > +class vcvtxq_impl : public function_base > +{ > +public: > + CONSTEXPR vcvtxq_impl (int unspec_f16_f32, int unspec_for_m_f16_f32, > + int unspec_f32_f16, int unspec_for_m_f32_f16) > + : m_unspec_f16_f32 (unspec_f16_f32), > + m_unspec_for_m_f16_f32 (unspec_for_m_f16_f32), > + m_unspec_f32_f16 (unspec_f32_f16), > + m_unspec_for_m_f32_f16 (unspec_for_m_f32_f16) > + {} > + > + /* The unspec code associated with vcvt[bt]q. */ > + int m_unspec_f16_f32; > + int m_unspec_for_m_f16_f32; > + int m_unspec_f32_f16; > + int m_unspec_for_m_f32_f16; > + > + rtx > + expand (function_expander &e) const override > + { > + insn_code code; > + switch (e.pred) > + { > + case PRED_none: > + /* No predicate. */ > + if (e.type_suffix (0).element_bits == 16) > + code = code_for_mve_q_f16_f32v8hf (m_unspec_f16_f32); > + else > + code = code_for_mve_q_f32_f16v4sf (m_unspec_f32_f16); > + return e.use_exact_insn (code); > + > + case PRED_m: > + case PRED_x: > + /* "m" or "x" predicate. */ > + if (e.type_suffix (0).element_bits == 16) > + code = code_for_mve_q_m_f16_f32v8hf (m_unspec_for_m_f16_f32); > + else > + code = code_for_mve_q_m_f32_f16v4sf (m_unspec_for_m_f32_f16); > + > + if (e.pred == PRED_m) > + return e.use_cond_insn (code, 0); > + else > + return e.use_pred_x_insn (code); > + > + default: > + gcc_unreachable (); > + } > + > + gcc_unreachable (); > + } > +}; > + > } /* end anonymous namespace */ > > namespace arm_mve { > @@ -452,6 +506,8 @@ FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNK > FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN)) > FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ) > FUNCTION (vcvtq, vcvtq_impl,) > +FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, VCVTBQ_F32_F16, VCVTBQ_M_F32_F16)) > +FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, VCVTTQ_F32_F16, VCVTTQ_M_F32_F16)) > FUNCTION_ONLY_N (vdupq, VDUPQ) > FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ) > FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F)) > diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def > index 671f86b5096..85211d2adc2 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.def > +++ b/gcc/config/arm/arm-mve-builtins-base.def > @@ -179,7 +179,11 @@ DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none) > DEF_MVE_FUNCTION (vcmulq_rot270, binary, all_float, mx_or_none) > DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none) > DEF_MVE_FUNCTION (vcreateq, create, all_float, none) > +DEF_MVE_FUNCTION (vcvtbq, vcvt_f16_f32, cvt_f16_f32, mx_or_none) > +DEF_MVE_FUNCTION (vcvtbq, vcvt_f32_f16, cvt_f32_f16, mx_or_none) > DEF_MVE_FUNCTION (vcvtq, vcvt, cvt, mx_or_none) > +DEF_MVE_FUNCTION (vcvttq, vcvt_f16_f32, cvt_f16_f32, mx_or_none) > +DEF_MVE_FUNCTION (vcvttq, vcvt_f32_f16, cvt_f32_f16, mx_or_none) > DEF_MVE_FUNCTION (vdupq, unary_n, all_float, mx_or_none) > DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none) > DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none) > diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h > index dee73d9c457..7b2107d9a0a 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.h > +++ b/gcc/config/arm/arm-mve-builtins-base.h > @@ -54,7 +54,9 @@ extern const function_base *const vcmulq_rot180; > extern const function_base *const vcmulq_rot270; > extern const function_base *const vcmulq_rot90; > extern const function_base *const vcreateq; > +extern const function_base *const vcvtbq; > extern const function_base *const vcvtq; > +extern const function_base *const vcvttq; > extern const function_base *const vdupq; > extern const function_base *const veorq; > extern const function_base *const vfmaq; > diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc > index 3c5b54dade1..4c554a47d85 100644 > --- a/gcc/config/arm/arm-mve-builtins.cc > +++ b/gcc/config/arm/arm-mve-builtins.cc > @@ -219,6 +219,14 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { > D (u16, f16), \ > D (u32, f32) > > +/* vcvt[bt]q_f16_f132. */ > +#define TYPES_cvt_f16_f32(S, D) \ > + D (f16, f32) > + > +/* vcvt[bt]q_f32_f16. */ > +#define TYPES_cvt_f32_f16(S, D) \ > + D (f32, f16) > + > #define TYPES_reinterpret_signed1(D, A) \ > D (A, s8), D (A, s16), D (A, s32), D (A, s64) > > @@ -299,6 +307,8 @@ DEF_MVE_TYPES_ARRAY (poly_8_16); > DEF_MVE_TYPES_ARRAY (signed_16_32); > DEF_MVE_TYPES_ARRAY (signed_32); > DEF_MVE_TYPES_ARRAY (cvt); > +DEF_MVE_TYPES_ARRAY (cvt_f16_f32); > +DEF_MVE_TYPES_ARRAY (cvt_f32_f16); > DEF_MVE_TYPES_ARRAY (reinterpret_integer); > DEF_MVE_TYPES_ARRAY (reinterpret_float); > > @@ -730,6 +740,8 @@ function_instance::has_inactive_argument () const > || base == functions::vcmpltq > || base == functions::vcmpcsq > || base == functions::vcmphiq > + || (base == functions::vcvtbq && type_suffix (0).element_bits == 16) > + || (base == functions::vcvttq && type_suffix (0).element_bits == 16) > || base == functions::vfmaq > || base == functions::vfmasq > || base == functions::vfmsq > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h > index 07897f510f5..5c35e08d754 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -137,11 +137,7 @@ > #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx) > #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx) > #define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p) > -#define vcvttq_f32(__a) __arm_vcvttq_f32(__a) > -#define vcvtbq_f32(__a) __arm_vcvtbq_f32(__a) > #define vcvtaq_m(__inactive, __a, __p) __arm_vcvtaq_m(__inactive, __a, __p) > -#define vcvtbq_m(__a, __b, __p) __arm_vcvtbq_m(__a, __b, __p) > -#define vcvttq_m(__a, __b, __p) __arm_vcvttq_m(__a, __b, __p) > #define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p) > #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p) > #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p) > @@ -155,8 +151,6 @@ > #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value) > #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value) > #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value) > -#define vcvttq_f32_f16(__a) __arm_vcvttq_f32_f16(__a) > -#define vcvtbq_f32_f16(__a) __arm_vcvtbq_f32_f16(__a) > #define vcvtaq_s16_f16(__a) __arm_vcvtaq_s16_f16(__a) > #define vcvtaq_s32_f32(__a) __arm_vcvtaq_s32_f32(__a) > #define vcvtnq_s16_f16(__a) __arm_vcvtnq_s16_f16(__a) > @@ -202,8 +196,6 @@ > #define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p) > #define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p) > #define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p) > -#define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b) > -#define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b) > #define vbicq_m_n_s16(__a, __imm, __p) __arm_vbicq_m_n_s16(__a, __imm, __p) > #define vbicq_m_n_s32(__a, __imm, __p) __arm_vbicq_m_n_s32(__a, __imm, __p) > #define vbicq_m_n_u16(__a, __imm, __p) __arm_vbicq_m_n_u16(__a, __imm, __p) > @@ -218,10 +210,6 @@ > #define vshlcq_u16(__a, __b, __imm) __arm_vshlcq_u16(__a, __b, __imm) > #define vshlcq_s32(__a, __b, __imm) __arm_vshlcq_s32(__a, __b, __imm) > #define vshlcq_u32(__a, __b, __imm) __arm_vshlcq_u32(__a, __b, __imm) > -#define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p) > -#define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p) > -#define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p) > -#define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p) > #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p) > #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p) > #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p) > @@ -560,8 +548,6 @@ > #define vcvtmq_x_s32_f32(__a, __p) __arm_vcvtmq_x_s32_f32(__a, __p) > #define vcvtmq_x_u16_f16(__a, __p) __arm_vcvtmq_x_u16_f16(__a, __p) > #define vcvtmq_x_u32_f32(__a, __p) __arm_vcvtmq_x_u32_f32(__a, __p) > -#define vcvtbq_x_f32_f16(__a, __p) __arm_vcvtbq_x_f32_f16(__a, __p) > -#define vcvttq_x_f32_f16(__a, __p) __arm_vcvttq_x_f32_f16(__a, __p) > #define vbicq_x_f16(__a, __b, __p) __arm_vbicq_x_f16(__a, __b, __p) > #define vbicq_x_f32(__a, __b, __p) __arm_vbicq_x_f32(__a, __b, __p) > #define vornq_x_f16(__a, __b, __p) __arm_vornq_x_f16(__a, __b, __p) > @@ -3704,20 +3690,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value) > __builtin_mve_vst4qv4sf (__addr, __rv.__o); > } > > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_f32_f16 (float16x8_t __a) > -{ > - return __builtin_mve_vcvttq_f32_f16v4sf (__a); > -} > - > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_f32_f16 (float16x8_t __a) > -{ > - return __builtin_mve_vcvtbq_f32_f16v4sf (__a); > -} > - > __extension__ extern __inline uint16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vcvtpq_u16_f16 (float16x8_t __a) > @@ -3858,20 +3830,6 @@ __arm_vbicq_f32 (float32x4_t __a, float32x4_t __b) > return __builtin_mve_vbicq_fv4sf (__a, __b); > } > > -__extension__ extern __inline float16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b) > -{ > - return __builtin_mve_vcvttq_f16_f32v8hf (__a, __b); > -} > - > -__extension__ extern __inline float16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_f16_f32 (float16x8_t __a, float32x4_t __b) > -{ > - return __builtin_mve_vcvtbq_f16_f32v8hf (__a, __b); > -} > - > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vcvtaq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) > @@ -3901,34 +3859,6 @@ __arm_vcvtaq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p > } > > > -__extension__ extern __inline float16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) > -{ > - return __builtin_mve_vcvtbq_m_f16_f32v8hf (__a, __b, __p); > -} > - > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vcvtbq_m_f32_f16v4sf (__inactive, __a, __p); > -} > - > -__extension__ extern __inline float16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) > -{ > - return __builtin_mve_vcvttq_m_f16_f32v8hf (__a, __b, __p); > -} > - > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p); > -} > - > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) > @@ -4383,20 +4313,6 @@ __arm_vcvtmq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p) > return __builtin_mve_vcvtmq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p); > } > > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vcvtbq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p); > -} > - > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p) > -{ > - return __builtin_mve_vcvttq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p); > -} > - > __extension__ extern __inline float16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vbicq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) > @@ -6827,20 +6743,6 @@ __arm_vst4q (float32_t * __addr, float32x4x4_t __value) > __arm_vst4q_f32 (__addr, __value); > } > > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_f32 (float16x8_t __a) > -{ > - return __arm_vcvttq_f32_f16 (__a); > -} > - > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_f32 (float16x8_t __a) > -{ > - return __arm_vcvtbq_f32_f16 (__a); > -} > - > __extension__ extern __inline float16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vornq (float16x8_t __a, float16x8_t __b) > @@ -6897,34 +6799,6 @@ __arm_vcvtaq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p) > return __arm_vcvtaq_m_u32_f32 (__inactive, __a, __p); > } > > -__extension__ extern __inline float16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) > -{ > - return __arm_vcvtbq_m_f16_f32 (__a, __b, __p); > -} > - > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvtbq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) > -{ > - return __arm_vcvtbq_m_f32_f16 (__inactive, __a, __p); > -} > - > -__extension__ extern __inline float16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) > -{ > - return __arm_vcvttq_m_f16_f32 (__a, __b, __p); > -} > - > -__extension__ extern __inline float32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) > -{ > - return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p); > -} > - > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) > @@ -7654,14 +7528,6 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \ > int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));}) > > -#define __arm_vcvtbq_f32(p0) ({ __typeof(p0) __p0 = (p0); \ > - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ > - int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvtbq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));}) > - > -#define __arm_vcvttq_f32(p0) ({ __typeof(p0) __p0 = (p0); \ > - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ > - int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvttq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));}) > - > #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ > @@ -7714,18 +7580,6 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtaq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ > int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtaq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) > > -#define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > - __typeof(p1) __p1 = (p1); \ > - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ > - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvtbq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ > - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvtbq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) > - > -#define __arm_vcvttq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > - __typeof(p1) __p1 = (p1); \ > - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ > - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvttq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ > - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvttq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) > - > #define __arm_vcvtmq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index a780d686eb1..760378c91b1 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -251,6 +251,60 @@ public: } }; + /* Implements vcvt[bt]q_f32_f16 and vcvt[bt]q_f16_f32 + intrinsics. */ +class vcvtxq_impl : public function_base +{ +public: + CONSTEXPR vcvtxq_impl (int unspec_f16_f32, int unspec_for_m_f16_f32, + int unspec_f32_f16, int unspec_for_m_f32_f16) + : m_unspec_f16_f32 (unspec_f16_f32), + m_unspec_for_m_f16_f32 (unspec_for_m_f16_f32), + m_unspec_f32_f16 (unspec_f32_f16), + m_unspec_for_m_f32_f16 (unspec_for_m_f32_f16) + {} + + /* The unspec code associated with vcvt[bt]q. */ + int m_unspec_f16_f32; + int m_unspec_for_m_f16_f32; + int m_unspec_f32_f16; + int m_unspec_for_m_f32_f16; + + rtx + expand (function_expander &e) const override + { + insn_code code; + switch (e.pred) + { + case PRED_none: + /* No predicate. */ + if (e.type_suffix (0).element_bits == 16) + code = code_for_mve_q_f16_f32v8hf (m_unspec_f16_f32); + else + code = code_for_mve_q_f32_f16v4sf (m_unspec_f32_f16); + return e.use_exact_insn (code); + + case PRED_m: + case PRED_x: + /* "m" or "x" predicate. */ + if (e.type_suffix (0).element_bits == 16) + code = code_for_mve_q_m_f16_f32v8hf (m_unspec_for_m_f16_f32); + else + code = code_for_mve_q_m_f32_f16v4sf (m_unspec_for_m_f32_f16); + + if (e.pred == PRED_m) + return e.use_cond_insn (code, 0); + else + return e.use_pred_x_insn (code); + + default: + gcc_unreachable (); + } + + gcc_unreachable (); + } +}; + } /* end anonymous namespace */ namespace arm_mve { @@ -452,6 +506,8 @@ FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNK FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN)) FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ) FUNCTION (vcvtq, vcvtq_impl,) +FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, VCVTBQ_F32_F16, VCVTBQ_M_F32_F16)) +FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, VCVTTQ_F32_F16, VCVTTQ_M_F32_F16)) FUNCTION_ONLY_N (vdupq, VDUPQ) FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ) FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F)) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index 671f86b5096..85211d2adc2 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -179,7 +179,11 @@ DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vcmulq_rot270, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vcreateq, create, all_float, none) +DEF_MVE_FUNCTION (vcvtbq, vcvt_f16_f32, cvt_f16_f32, mx_or_none) +DEF_MVE_FUNCTION (vcvtbq, vcvt_f32_f16, cvt_f32_f16, mx_or_none) DEF_MVE_FUNCTION (vcvtq, vcvt, cvt, mx_or_none) +DEF_MVE_FUNCTION (vcvttq, vcvt_f16_f32, cvt_f16_f32, mx_or_none) +DEF_MVE_FUNCTION (vcvttq, vcvt_f32_f16, cvt_f32_f16, mx_or_none) DEF_MVE_FUNCTION (vdupq, unary_n, all_float, mx_or_none) DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index dee73d9c457..7b2107d9a0a 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -54,7 +54,9 @@ extern const function_base *const vcmulq_rot180; extern const function_base *const vcmulq_rot270; extern const function_base *const vcmulq_rot90; extern const function_base *const vcreateq; +extern const function_base *const vcvtbq; extern const function_base *const vcvtq; +extern const function_base *const vcvttq; extern const function_base *const vdupq; extern const function_base *const veorq; extern const function_base *const vfmaq; diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index 3c5b54dade1..4c554a47d85 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -219,6 +219,14 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { D (u16, f16), \ D (u32, f32) +/* vcvt[bt]q_f16_f132. */ +#define TYPES_cvt_f16_f32(S, D) \ + D (f16, f32) + +/* vcvt[bt]q_f32_f16. */ +#define TYPES_cvt_f32_f16(S, D) \ + D (f32, f16) + #define TYPES_reinterpret_signed1(D, A) \ D (A, s8), D (A, s16), D (A, s32), D (A, s64) @@ -299,6 +307,8 @@ DEF_MVE_TYPES_ARRAY (poly_8_16); DEF_MVE_TYPES_ARRAY (signed_16_32); DEF_MVE_TYPES_ARRAY (signed_32); DEF_MVE_TYPES_ARRAY (cvt); +DEF_MVE_TYPES_ARRAY (cvt_f16_f32); +DEF_MVE_TYPES_ARRAY (cvt_f32_f16); DEF_MVE_TYPES_ARRAY (reinterpret_integer); DEF_MVE_TYPES_ARRAY (reinterpret_float); @@ -730,6 +740,8 @@ function_instance::has_inactive_argument () const || base == functions::vcmpltq || base == functions::vcmpcsq || base == functions::vcmphiq + || (base == functions::vcvtbq && type_suffix (0).element_bits == 16) + || (base == functions::vcvttq && type_suffix (0).element_bits == 16) || base == functions::vfmaq || base == functions::vfmasq || base == functions::vfmsq diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 07897f510f5..5c35e08d754 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -137,11 +137,7 @@ #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx) #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx) #define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p) -#define vcvttq_f32(__a) __arm_vcvttq_f32(__a) -#define vcvtbq_f32(__a) __arm_vcvtbq_f32(__a) #define vcvtaq_m(__inactive, __a, __p) __arm_vcvtaq_m(__inactive, __a, __p) -#define vcvtbq_m(__a, __b, __p) __arm_vcvtbq_m(__a, __b, __p) -#define vcvttq_m(__a, __b, __p) __arm_vcvttq_m(__a, __b, __p) #define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p) #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p) #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p) @@ -155,8 +151,6 @@ #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value) #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value) #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value) -#define vcvttq_f32_f16(__a) __arm_vcvttq_f32_f16(__a) -#define vcvtbq_f32_f16(__a) __arm_vcvtbq_f32_f16(__a) #define vcvtaq_s16_f16(__a) __arm_vcvtaq_s16_f16(__a) #define vcvtaq_s32_f32(__a) __arm_vcvtaq_s32_f32(__a) #define vcvtnq_s16_f16(__a) __arm_vcvtnq_s16_f16(__a) @@ -202,8 +196,6 @@ #define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p) #define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p) #define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p) -#define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b) -#define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b) #define vbicq_m_n_s16(__a, __imm, __p) __arm_vbicq_m_n_s16(__a, __imm, __p) #define vbicq_m_n_s32(__a, __imm, __p) __arm_vbicq_m_n_s32(__a, __imm, __p) #define vbicq_m_n_u16(__a, __imm, __p) __arm_vbicq_m_n_u16(__a, __imm, __p) @@ -218,10 +210,6 @@ #define vshlcq_u16(__a, __b, __imm) __arm_vshlcq_u16(__a, __b, __imm) #define vshlcq_s32(__a, __b, __imm) __arm_vshlcq_s32(__a, __b, __imm) #define vshlcq_u32(__a, __b, __imm) __arm_vshlcq_u32(__a, __b, __imm) -#define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p) -#define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p) -#define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p) -#define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p) #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p) #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p) #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p) @@ -560,8 +548,6 @@ #define vcvtmq_x_s32_f32(__a, __p) __arm_vcvtmq_x_s32_f32(__a, __p) #define vcvtmq_x_u16_f16(__a, __p) __arm_vcvtmq_x_u16_f16(__a, __p) #define vcvtmq_x_u32_f32(__a, __p) __arm_vcvtmq_x_u32_f32(__a, __p) -#define vcvtbq_x_f32_f16(__a, __p) __arm_vcvtbq_x_f32_f16(__a, __p) -#define vcvttq_x_f32_f16(__a, __p) __arm_vcvttq_x_f32_f16(__a, __p) #define vbicq_x_f16(__a, __b, __p) __arm_vbicq_x_f16(__a, __b, __p) #define vbicq_x_f32(__a, __b, __p) __arm_vbicq_x_f32(__a, __b, __p) #define vornq_x_f16(__a, __b, __p) __arm_vornq_x_f16(__a, __b, __p) @@ -3704,20 +3690,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value) __builtin_mve_vst4qv4sf (__addr, __rv.__o); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_f32_f16 (float16x8_t __a) -{ - return __builtin_mve_vcvttq_f32_f16v4sf (__a); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_f32_f16 (float16x8_t __a) -{ - return __builtin_mve_vcvtbq_f32_f16v4sf (__a); -} - __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtpq_u16_f16 (float16x8_t __a) @@ -3858,20 +3830,6 @@ __arm_vbicq_f32 (float32x4_t __a, float32x4_t __b) return __builtin_mve_vbicq_fv4sf (__a, __b); } -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b) -{ - return __builtin_mve_vcvttq_f16_f32v8hf (__a, __b); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_f16_f32 (float16x8_t __a, float32x4_t __b) -{ - return __builtin_mve_vcvtbq_f16_f32v8hf (__a, __b); -} - __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtaq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) @@ -3901,34 +3859,6 @@ __arm_vcvtaq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p } -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vcvtbq_m_f16_f32v8hf (__a, __b, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) -{ - return __builtin_mve_vcvtbq_m_f32_f16v4sf (__inactive, __a, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __builtin_mve_vcvttq_m_f16_f32v8hf (__a, __b, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) -{ - return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p); -} - __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) @@ -4383,20 +4313,6 @@ __arm_vcvtmq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p) return __builtin_mve_vcvtmq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p) -{ - return __builtin_mve_vcvtbq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p) -{ - return __builtin_mve_vcvttq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vbicq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p) @@ -6827,20 +6743,6 @@ __arm_vst4q (float32_t * __addr, float32x4x4_t __value) __arm_vst4q_f32 (__addr, __value); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_f32 (float16x8_t __a) -{ - return __arm_vcvttq_f32_f16 (__a); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_f32 (float16x8_t __a) -{ - return __arm_vcvtbq_f32_f16 (__a); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vornq (float16x8_t __a, float16x8_t __b) @@ -6897,34 +6799,6 @@ __arm_vcvtaq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p) return __arm_vcvtaq_m_u32_f32 (__inactive, __a, __p); } -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __arm_vcvtbq_m_f16_f32 (__a, __b, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvtbq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) -{ - return __arm_vcvtbq_m_f32_f16 (__inactive, __a, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p) -{ - return __arm_vcvttq_m_f16_f32 (__a, __b, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) -{ - return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p); -} - __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) @@ -7654,14 +7528,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \ int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));}) -#define __arm_vcvtbq_f32(p0) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvtbq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));}) - -#define __arm_vcvttq_f32(p0) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvttq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));}) - #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ @@ -7714,18 +7580,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtaq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtaq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) -#define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvtbq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvtbq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - -#define __arm_vcvttq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvttq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvttq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - #define __arm_vcvtmq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \