diff mbox series

[v2,09/36] arm: [MVE intrinsics] rework vcvtbq_f16_f32 vcvttq_f16_f32 vcvtbq_f32_f16 vcvttq_f32_f16

Message ID 20240904132650.2720446-10-christophe.lyon@linaro.org
State New
Headers show
Series arm: [MVE intrinsics] Re-implement more intrinsics | expand

Commit Message

Christophe Lyon Sept. 4, 2024, 1:26 p.m. UTC
Implement vcvtbq_f16_f32, vcvttq_f16_f32, vcvtbq_f32_f16 and
vcvttq_f32_f16 using the new MVE builtins framework.

2024-07-11 Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (class vcvtxq_impl): New.
	(vcvtbq, vcvttq): New.
	* config/arm/arm-mve-builtins-base.def (vcvtbq, vcvttq): New.
	* config/arm/arm-mve-builtins-base.h (vcvtbq, vcvttq): New.
	* config/arm/arm-mve-builtins.cc (cvt_f16_f32, cvt_f32_f16): New
	types.
	(function_instance::has_inactive_argument): Support vcvtbq and
	vcvttq.
	* config/arm/arm_mve.h (vcvttq_f32): Delete.
	(vcvtbq_f32): Delete.
	(vcvtbq_m): Delete.
	(vcvttq_m): Delete.
	(vcvttq_f32_f16): Delete.
	(vcvtbq_f32_f16): Delete.
	(vcvttq_f16_f32): Delete.
	(vcvtbq_f16_f32): Delete.
	(vcvtbq_m_f16_f32): Delete.
	(vcvtbq_m_f32_f16): Delete.
	(vcvttq_m_f16_f32): Delete.
	(vcvttq_m_f32_f16): Delete.
	(vcvtbq_x_f32_f16): Delete.
	(vcvttq_x_f32_f16): Delete.
	(__arm_vcvttq_f32_f16): Delete.
	(__arm_vcvtbq_f32_f16): Delete.
	(__arm_vcvttq_f16_f32): Delete.
	(__arm_vcvtbq_f16_f32): Delete.
	(__arm_vcvtbq_m_f16_f32): Delete.
	(__arm_vcvtbq_m_f32_f16): Delete.
	(__arm_vcvttq_m_f16_f32): Delete.
	(__arm_vcvttq_m_f32_f16): Delete.
	(__arm_vcvtbq_x_f32_f16): Delete.
	(__arm_vcvttq_x_f32_f16): Delete.
	(__arm_vcvttq_f32): Delete.
	(__arm_vcvtbq_f32): Delete.
	(__arm_vcvtbq_m): Delete.
	(__arm_vcvttq_m): Delete.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  56 +++++++++
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm-mve-builtins.cc       |  12 ++
 gcc/config/arm/arm_mve.h                 | 146 -----------------------
 5 files changed, 74 insertions(+), 146 deletions(-)

Comments

Richard Earnshaw (lists) Oct. 14, 2024, 5:13 p.m. UTC | #1
On 04/09/2024 14:26, Christophe Lyon wrote:
> Implement vcvtbq_f16_f32, vcvttq_f16_f32, vcvtbq_f32_f16 and
> vcvttq_f32_f16 using the new MVE builtins framework.
> 
> 2024-07-11 Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (class vcvtxq_impl): New.
> 	(vcvtbq, vcvttq): New.
> 	* config/arm/arm-mve-builtins-base.def (vcvtbq, vcvttq): New.
> 	* config/arm/arm-mve-builtins-base.h (vcvtbq, vcvttq): New.
> 	* config/arm/arm-mve-builtins.cc (cvt_f16_f32, cvt_f32_f16): New
> 	types.
> 	(function_instance::has_inactive_argument): Support vcvtbq and
> 	vcvttq.
> 	* config/arm/arm_mve.h (vcvttq_f32): Delete.
> 	(vcvtbq_f32): Delete.
> 	(vcvtbq_m): Delete.
> 	(vcvttq_m): Delete.
> 	(vcvttq_f32_f16): Delete.
> 	(vcvtbq_f32_f16): Delete.
> 	(vcvttq_f16_f32): Delete.
> 	(vcvtbq_f16_f32): Delete.
> 	(vcvtbq_m_f16_f32): Delete.
> 	(vcvtbq_m_f32_f16): Delete.
> 	(vcvttq_m_f16_f32): Delete.
> 	(vcvttq_m_f32_f16): Delete.
> 	(vcvtbq_x_f32_f16): Delete.
> 	(vcvttq_x_f32_f16): Delete.
> 	(__arm_vcvttq_f32_f16): Delete.
> 	(__arm_vcvtbq_f32_f16): Delete.
> 	(__arm_vcvttq_f16_f32): Delete.
> 	(__arm_vcvtbq_f16_f32): Delete.
> 	(__arm_vcvtbq_m_f16_f32): Delete.
> 	(__arm_vcvtbq_m_f32_f16): Delete.
> 	(__arm_vcvttq_m_f16_f32): Delete.
> 	(__arm_vcvttq_m_f32_f16): Delete.
> 	(__arm_vcvtbq_x_f32_f16): Delete.
> 	(__arm_vcvttq_x_f32_f16): Delete.
> 	(__arm_vcvttq_f32): Delete.
> 	(__arm_vcvtbq_f32): Delete.
> 	(__arm_vcvtbq_m): Delete.
> 	(__arm_vcvttq_m): Delete.

OK.

R.

> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |  56 +++++++++
>  gcc/config/arm/arm-mve-builtins-base.def |   4 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   2 +
>  gcc/config/arm/arm-mve-builtins.cc       |  12 ++
>  gcc/config/arm/arm_mve.h                 | 146 -----------------------
>  5 files changed, 74 insertions(+), 146 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
> index a780d686eb1..760378c91b1 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -251,6 +251,60 @@ public:
>    }
>  };
>  
> +  /* Implements vcvt[bt]q_f32_f16 and vcvt[bt]q_f16_f32
> +     intrinsics.  */
> +class vcvtxq_impl : public function_base
> +{
> +public:
> +  CONSTEXPR vcvtxq_impl (int unspec_f16_f32, int unspec_for_m_f16_f32,
> +			 int unspec_f32_f16, int unspec_for_m_f32_f16)
> +    : m_unspec_f16_f32 (unspec_f16_f32),
> +      m_unspec_for_m_f16_f32 (unspec_for_m_f16_f32),
> +      m_unspec_f32_f16 (unspec_f32_f16),
> +      m_unspec_for_m_f32_f16 (unspec_for_m_f32_f16)
> +  {}
> +
> +  /* The unspec code associated with vcvt[bt]q.  */
> +  int m_unspec_f16_f32;
> +  int m_unspec_for_m_f16_f32;
> +  int m_unspec_f32_f16;
> +  int m_unspec_for_m_f32_f16;
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    insn_code code;
> +    switch (e.pred)
> +      {
> +      case PRED_none:
> +	/* No predicate.  */
> +	if (e.type_suffix (0).element_bits == 16)
> +	  code = code_for_mve_q_f16_f32v8hf (m_unspec_f16_f32);
> +	else
> +	  code = code_for_mve_q_f32_f16v4sf (m_unspec_f32_f16);
> +	return e.use_exact_insn (code);
> +
> +      case PRED_m:
> +      case PRED_x:
> +	/* "m" or "x" predicate.  */
> +	if (e.type_suffix (0).element_bits == 16)
> +	  code = code_for_mve_q_m_f16_f32v8hf (m_unspec_for_m_f16_f32);
> +	else
> +	  code = code_for_mve_q_m_f32_f16v4sf (m_unspec_for_m_f32_f16);
> +
> +	if (e.pred == PRED_m)
> +	  return e.use_cond_insn (code, 0);
> +	else
> +	  return e.use_pred_x_insn (code);
> +
> +      default:
> +	gcc_unreachable ();
> +      }
> +
> +    gcc_unreachable ();
> +  }
> +};
> +
>  } /* end anonymous namespace */
>  
>  namespace arm_mve {
> @@ -452,6 +506,8 @@ FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNK
>  FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
>  FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
>  FUNCTION (vcvtq, vcvtq_impl,)
> +FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, VCVTBQ_F32_F16, VCVTBQ_M_F32_F16))
> +FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, VCVTTQ_F32_F16, VCVTTQ_M_F32_F16))
>  FUNCTION_ONLY_N (vdupq, VDUPQ)
>  FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
>  FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F))
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
> index 671f86b5096..85211d2adc2 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -179,7 +179,11 @@ DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vcmulq_rot270, binary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
> +DEF_MVE_FUNCTION (vcvtbq, vcvt_f16_f32, cvt_f16_f32, mx_or_none)
> +DEF_MVE_FUNCTION (vcvtbq, vcvt_f32_f16, cvt_f32_f16, mx_or_none)
>  DEF_MVE_FUNCTION (vcvtq, vcvt, cvt, mx_or_none)
> +DEF_MVE_FUNCTION (vcvttq, vcvt_f16_f32, cvt_f16_f32, mx_or_none)
> +DEF_MVE_FUNCTION (vcvttq, vcvt_f32_f16, cvt_f32_f16, mx_or_none)
>  DEF_MVE_FUNCTION (vdupq, unary_n, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
> index dee73d9c457..7b2107d9a0a 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -54,7 +54,9 @@ extern const function_base *const vcmulq_rot180;
>  extern const function_base *const vcmulq_rot270;
>  extern const function_base *const vcmulq_rot90;
>  extern const function_base *const vcreateq;
> +extern const function_base *const vcvtbq;
>  extern const function_base *const vcvtq;
> +extern const function_base *const vcvttq;
>  extern const function_base *const vdupq;
>  extern const function_base *const veorq;
>  extern const function_base *const vfmaq;
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
> index 3c5b54dade1..4c554a47d85 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -219,6 +219,14 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
>    D (u16, f16), \
>    D (u32, f32)
>  
> +/* vcvt[bt]q_f16_f132.  */
> +#define TYPES_cvt_f16_f32(S, D) \
> +  D (f16, f32)
> +
> +/* vcvt[bt]q_f32_f16.  */
> +#define TYPES_cvt_f32_f16(S, D) \
> +  D (f32, f16)
> +
>  #define TYPES_reinterpret_signed1(D, A) \
>    D (A, s8), D (A, s16), D (A, s32), D (A, s64)
>  
> @@ -299,6 +307,8 @@ DEF_MVE_TYPES_ARRAY (poly_8_16);
>  DEF_MVE_TYPES_ARRAY (signed_16_32);
>  DEF_MVE_TYPES_ARRAY (signed_32);
>  DEF_MVE_TYPES_ARRAY (cvt);
> +DEF_MVE_TYPES_ARRAY (cvt_f16_f32);
> +DEF_MVE_TYPES_ARRAY (cvt_f32_f16);
>  DEF_MVE_TYPES_ARRAY (reinterpret_integer);
>  DEF_MVE_TYPES_ARRAY (reinterpret_float);
>  
> @@ -730,6 +740,8 @@ function_instance::has_inactive_argument () const
>        || base == functions::vcmpltq
>        || base == functions::vcmpcsq
>        || base == functions::vcmphiq
> +      || (base == functions::vcvtbq && type_suffix (0).element_bits == 16)
> +      || (base == functions::vcvttq && type_suffix (0).element_bits == 16)
>        || base == functions::vfmaq
>        || base == functions::vfmasq
>        || base == functions::vfmsq
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 07897f510f5..5c35e08d754 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -137,11 +137,7 @@
>  #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
>  #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx)
>  #define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p)
> -#define vcvttq_f32(__a) __arm_vcvttq_f32(__a)
> -#define vcvtbq_f32(__a) __arm_vcvtbq_f32(__a)
>  #define vcvtaq_m(__inactive, __a, __p) __arm_vcvtaq_m(__inactive, __a, __p)
> -#define vcvtbq_m(__a, __b, __p) __arm_vcvtbq_m(__a, __b, __p)
> -#define vcvttq_m(__a, __b, __p) __arm_vcvttq_m(__a, __b, __p)
>  #define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p)
>  #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p)
>  #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p)
> @@ -155,8 +151,6 @@
>  #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
>  #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
>  #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
> -#define vcvttq_f32_f16(__a) __arm_vcvttq_f32_f16(__a)
> -#define vcvtbq_f32_f16(__a) __arm_vcvtbq_f32_f16(__a)
>  #define vcvtaq_s16_f16(__a) __arm_vcvtaq_s16_f16(__a)
>  #define vcvtaq_s32_f32(__a) __arm_vcvtaq_s32_f32(__a)
>  #define vcvtnq_s16_f16(__a) __arm_vcvtnq_s16_f16(__a)
> @@ -202,8 +196,6 @@
>  #define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
>  #define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
>  #define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
> -#define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b)
> -#define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b)
>  #define vbicq_m_n_s16(__a,  __imm, __p) __arm_vbicq_m_n_s16(__a,  __imm, __p)
>  #define vbicq_m_n_s32(__a,  __imm, __p) __arm_vbicq_m_n_s32(__a,  __imm, __p)
>  #define vbicq_m_n_u16(__a,  __imm, __p) __arm_vbicq_m_n_u16(__a,  __imm, __p)
> @@ -218,10 +210,6 @@
>  #define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
>  #define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
>  #define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
> -#define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
> -#define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
> -#define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
> -#define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p)
>  #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p)
>  #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
>  #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
> @@ -560,8 +548,6 @@
>  #define vcvtmq_x_s32_f32(__a, __p) __arm_vcvtmq_x_s32_f32(__a, __p)
>  #define vcvtmq_x_u16_f16(__a, __p) __arm_vcvtmq_x_u16_f16(__a, __p)
>  #define vcvtmq_x_u32_f32(__a, __p) __arm_vcvtmq_x_u32_f32(__a, __p)
> -#define vcvtbq_x_f32_f16(__a, __p) __arm_vcvtbq_x_f32_f16(__a, __p)
> -#define vcvttq_x_f32_f16(__a, __p) __arm_vcvttq_x_f32_f16(__a, __p)
>  #define vbicq_x_f16(__a, __b, __p) __arm_vbicq_x_f16(__a, __b, __p)
>  #define vbicq_x_f32(__a, __b, __p) __arm_vbicq_x_f32(__a, __b, __p)
>  #define vornq_x_f16(__a, __b, __p) __arm_vornq_x_f16(__a, __b, __p)
> @@ -3704,20 +3690,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value)
>    __builtin_mve_vst4qv4sf (__addr, __rv.__o);
>  }
>  
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_f32_f16 (float16x8_t __a)
> -{
> -  return __builtin_mve_vcvttq_f32_f16v4sf (__a);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_f32_f16 (float16x8_t __a)
> -{
> -  return __builtin_mve_vcvtbq_f32_f16v4sf (__a);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcvtpq_u16_f16 (float16x8_t __a)
> @@ -3858,20 +3830,6 @@ __arm_vbicq_f32 (float32x4_t __a, float32x4_t __b)
>    return __builtin_mve_vbicq_fv4sf (__a, __b);
>  }
>  
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b)
> -{
> -  return __builtin_mve_vcvttq_f16_f32v8hf (__a, __b);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_f16_f32 (float16x8_t __a, float32x4_t __b)
> -{
> -  return __builtin_mve_vcvtbq_f16_f32v8hf (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcvtaq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
> @@ -3901,34 +3859,6 @@ __arm_vcvtaq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p
>  }
>  
>  
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vcvtbq_m_f16_f32v8hf (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vcvtbq_m_f32_f16v4sf (__inactive, __a, __p);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vcvttq_m_f16_f32v8hf (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
> @@ -4383,20 +4313,6 @@ __arm_vcvtmq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p)
>    return __builtin_mve_vcvtmq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
>  }
>  
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vcvtbq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vcvttq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p);
> -}
> -
>  __extension__ extern __inline float16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vbicq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
> @@ -6827,20 +6743,6 @@ __arm_vst4q (float32_t * __addr, float32x4x4_t __value)
>   __arm_vst4q_f32 (__addr, __value);
>  }
>  
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_f32 (float16x8_t __a)
> -{
> - return __arm_vcvttq_f32_f16 (__a);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_f32 (float16x8_t __a)
> -{
> - return __arm_vcvtbq_f32_f16 (__a);
> -}
> -
>  __extension__ extern __inline float16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (float16x8_t __a, float16x8_t __b)
> @@ -6897,34 +6799,6 @@ __arm_vcvtaq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
>   return __arm_vcvtaq_m_u32_f32 (__inactive, __a, __p);
>  }
>  
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vcvtbq_m_f16_f32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvtbq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
> -{
> - return __arm_vcvtbq_m_f32_f16 (__inactive, __a, __p);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vcvttq_m_f16_f32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
> -{
> - return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
> @@ -7654,14 +7528,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
>    int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
>  
> -#define __arm_vcvtbq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvtbq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
> -
> -#define __arm_vcvttq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvttq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
> -
>  #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> @@ -7714,18 +7580,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtaq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtaq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
>  
> -#define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> -  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvtbq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
> -  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvtbq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
> -
> -#define __arm_vcvttq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
> -  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvttq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
> -  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvttq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
> -
>  #define __arm_vcvtmq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index a780d686eb1..760378c91b1 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -251,6 +251,60 @@  public:
   }
 };
 
+  /* Implements vcvt[bt]q_f32_f16 and vcvt[bt]q_f16_f32
+     intrinsics.  */
+class vcvtxq_impl : public function_base
+{
+public:
+  CONSTEXPR vcvtxq_impl (int unspec_f16_f32, int unspec_for_m_f16_f32,
+			 int unspec_f32_f16, int unspec_for_m_f32_f16)
+    : m_unspec_f16_f32 (unspec_f16_f32),
+      m_unspec_for_m_f16_f32 (unspec_for_m_f16_f32),
+      m_unspec_f32_f16 (unspec_f32_f16),
+      m_unspec_for_m_f32_f16 (unspec_for_m_f32_f16)
+  {}
+
+  /* The unspec code associated with vcvt[bt]q.  */
+  int m_unspec_f16_f32;
+  int m_unspec_for_m_f16_f32;
+  int m_unspec_f32_f16;
+  int m_unspec_for_m_f32_f16;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate.  */
+	if (e.type_suffix (0).element_bits == 16)
+	  code = code_for_mve_q_f16_f32v8hf (m_unspec_f16_f32);
+	else
+	  code = code_for_mve_q_f32_f16v4sf (m_unspec_f32_f16);
+	return e.use_exact_insn (code);
+
+      case PRED_m:
+      case PRED_x:
+	/* "m" or "x" predicate.  */
+	if (e.type_suffix (0).element_bits == 16)
+	  code = code_for_mve_q_m_f16_f32v8hf (m_unspec_for_m_f16_f32);
+	else
+	  code = code_for_mve_q_m_f32_f16v4sf (m_unspec_for_m_f32_f16);
+
+	if (e.pred == PRED_m)
+	  return e.use_cond_insn (code, 0);
+	else
+	  return e.use_pred_x_insn (code);
+
+      default:
+	gcc_unreachable ();
+      }
+
+    gcc_unreachable ();
+  }
+};
+
 } /* end anonymous namespace */
 
 namespace arm_mve {
@@ -452,6 +506,8 @@  FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNK
 FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
 FUNCTION (vcvtq, vcvtq_impl,)
+FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, VCVTBQ_F32_F16, VCVTBQ_M_F32_F16))
+FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, VCVTTQ_F32_F16, VCVTTQ_M_F32_F16))
 FUNCTION_ONLY_N (vdupq, VDUPQ)
 FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
 FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 671f86b5096..85211d2adc2 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -179,7 +179,11 @@  DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcmulq_rot270, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
+DEF_MVE_FUNCTION (vcvtbq, vcvt_f16_f32, cvt_f16_f32, mx_or_none)
+DEF_MVE_FUNCTION (vcvtbq, vcvt_f32_f16, cvt_f32_f16, mx_or_none)
 DEF_MVE_FUNCTION (vcvtq, vcvt, cvt, mx_or_none)
+DEF_MVE_FUNCTION (vcvttq, vcvt_f16_f32, cvt_f16_f32, mx_or_none)
+DEF_MVE_FUNCTION (vcvttq, vcvt_f32_f16, cvt_f32_f16, mx_or_none)
 DEF_MVE_FUNCTION (vdupq, unary_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index dee73d9c457..7b2107d9a0a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -54,7 +54,9 @@  extern const function_base *const vcmulq_rot180;
 extern const function_base *const vcmulq_rot270;
 extern const function_base *const vcmulq_rot90;
 extern const function_base *const vcreateq;
+extern const function_base *const vcvtbq;
 extern const function_base *const vcvtq;
+extern const function_base *const vcvttq;
 extern const function_base *const vdupq;
 extern const function_base *const veorq;
 extern const function_base *const vfmaq;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 3c5b54dade1..4c554a47d85 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -219,6 +219,14 @@  CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
   D (u16, f16), \
   D (u32, f32)
 
+/* vcvt[bt]q_f16_f132.  */
+#define TYPES_cvt_f16_f32(S, D) \
+  D (f16, f32)
+
+/* vcvt[bt]q_f32_f16.  */
+#define TYPES_cvt_f32_f16(S, D) \
+  D (f32, f16)
+
 #define TYPES_reinterpret_signed1(D, A) \
   D (A, s8), D (A, s16), D (A, s32), D (A, s64)
 
@@ -299,6 +307,8 @@  DEF_MVE_TYPES_ARRAY (poly_8_16);
 DEF_MVE_TYPES_ARRAY (signed_16_32);
 DEF_MVE_TYPES_ARRAY (signed_32);
 DEF_MVE_TYPES_ARRAY (cvt);
+DEF_MVE_TYPES_ARRAY (cvt_f16_f32);
+DEF_MVE_TYPES_ARRAY (cvt_f32_f16);
 DEF_MVE_TYPES_ARRAY (reinterpret_integer);
 DEF_MVE_TYPES_ARRAY (reinterpret_float);
 
@@ -730,6 +740,8 @@  function_instance::has_inactive_argument () const
       || base == functions::vcmpltq
       || base == functions::vcmpcsq
       || base == functions::vcmphiq
+      || (base == functions::vcvtbq && type_suffix (0).element_bits == 16)
+      || (base == functions::vcvttq && type_suffix (0).element_bits == 16)
       || base == functions::vfmaq
       || base == functions::vfmasq
       || base == functions::vfmsq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 07897f510f5..5c35e08d754 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -137,11 +137,7 @@ 
 #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
 #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx)
 #define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p)
-#define vcvttq_f32(__a) __arm_vcvttq_f32(__a)
-#define vcvtbq_f32(__a) __arm_vcvtbq_f32(__a)
 #define vcvtaq_m(__inactive, __a, __p) __arm_vcvtaq_m(__inactive, __a, __p)
-#define vcvtbq_m(__a, __b, __p) __arm_vcvtbq_m(__a, __b, __p)
-#define vcvttq_m(__a, __b, __p) __arm_vcvttq_m(__a, __b, __p)
 #define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p)
 #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p)
 #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p)
@@ -155,8 +151,6 @@ 
 #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
 #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
 #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
-#define vcvttq_f32_f16(__a) __arm_vcvttq_f32_f16(__a)
-#define vcvtbq_f32_f16(__a) __arm_vcvtbq_f32_f16(__a)
 #define vcvtaq_s16_f16(__a) __arm_vcvtaq_s16_f16(__a)
 #define vcvtaq_s32_f32(__a) __arm_vcvtaq_s32_f32(__a)
 #define vcvtnq_s16_f16(__a) __arm_vcvtnq_s16_f16(__a)
@@ -202,8 +196,6 @@ 
 #define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
 #define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
 #define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
-#define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b)
-#define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b)
 #define vbicq_m_n_s16(__a,  __imm, __p) __arm_vbicq_m_n_s16(__a,  __imm, __p)
 #define vbicq_m_n_s32(__a,  __imm, __p) __arm_vbicq_m_n_s32(__a,  __imm, __p)
 #define vbicq_m_n_u16(__a,  __imm, __p) __arm_vbicq_m_n_u16(__a,  __imm, __p)
@@ -218,10 +210,6 @@ 
 #define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
 #define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
 #define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
-#define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
-#define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
-#define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
-#define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p)
 #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p)
 #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
@@ -560,8 +548,6 @@ 
 #define vcvtmq_x_s32_f32(__a, __p) __arm_vcvtmq_x_s32_f32(__a, __p)
 #define vcvtmq_x_u16_f16(__a, __p) __arm_vcvtmq_x_u16_f16(__a, __p)
 #define vcvtmq_x_u32_f32(__a, __p) __arm_vcvtmq_x_u32_f32(__a, __p)
-#define vcvtbq_x_f32_f16(__a, __p) __arm_vcvtbq_x_f32_f16(__a, __p)
-#define vcvttq_x_f32_f16(__a, __p) __arm_vcvttq_x_f32_f16(__a, __p)
 #define vbicq_x_f16(__a, __b, __p) __arm_vbicq_x_f16(__a, __b, __p)
 #define vbicq_x_f32(__a, __b, __p) __arm_vbicq_x_f32(__a, __b, __p)
 #define vornq_x_f16(__a, __b, __p) __arm_vornq_x_f16(__a, __b, __p)
@@ -3704,20 +3690,6 @@  __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value)
   __builtin_mve_vst4qv4sf (__addr, __rv.__o);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_f32_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvttq_f32_f16v4sf (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_f32_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtbq_f32_f16v4sf (__a);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtpq_u16_f16 (float16x8_t __a)
@@ -3858,20 +3830,6 @@  __arm_vbicq_f32 (float32x4_t __a, float32x4_t __b)
   return __builtin_mve_vbicq_fv4sf (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcvttq_f16_f32v8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_f16_f32 (float16x8_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcvtbq_f16_f32v8hf (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtaq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -3901,34 +3859,6 @@  __arm_vcvtaq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p
 }
 
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtbq_m_f16_f32v8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtbq_m_f32_f16v4sf (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvttq_m_f16_f32v8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -4383,20 +4313,6 @@  __arm_vcvtmq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vcvtmq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtbq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvttq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
@@ -6827,20 +6743,6 @@  __arm_vst4q (float32_t * __addr, float32x4x4_t __value)
  __arm_vst4q_f32 (__addr, __value);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_f32 (float16x8_t __a)
-{
- return __arm_vcvttq_f32_f16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_f32 (float16x8_t __a)
-{
- return __arm_vcvtbq_f32_f16 (__a);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (float16x8_t __a, float16x8_t __b)
@@ -6897,34 +6799,6 @@  __arm_vcvtaq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
  return __arm_vcvtaq_m_u32_f32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcvtbq_m_f16_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtbq_m_f32_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcvttq_m_f16_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -7654,14 +7528,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
   int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
 
-#define __arm_vcvtbq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvtbq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
-
-#define __arm_vcvttq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvttq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
-
 #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -7714,18 +7580,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtaq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtaq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
-#define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvtbq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvtbq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcvttq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvttq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvttq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
 #define __arm_vcvtmq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \