diff mbox series

[v2,16/36] arm: [MVE intrinsics] rework vctp

Message ID 20240904132650.2720446-17-christophe.lyon@linaro.org
State New
Headers show
Series arm: [MVE intrinsics] Re-implement more intrinsics | expand

Commit Message

Christophe Lyon Sept. 4, 2024, 1:26 p.m. UTC
Implement vctp using the new MVE builtins framework.

2024-08-21  Christophe Lyon  <christophe.lyon@linaro.org>

gcc/ChangeLog:

	* config/arm/arm-mve-builtins-base.cc (class vctpq_impl): New.
	(vctp16q): New.
	(vctp32q): New.
	(vctp64q): New.
	(vctp8q): New.
	* config/arm/arm-mve-builtins-base.def (vctp16q): New.
	(vctp32q): New.
	(vctp64q): New.
	(vctp8q): New.
	* config/arm/arm-mve-builtins-base.h (vctp16q): New.
	(vctp32q): New.
	(vctp64q): New.
	(vctp8q): New.
	* config/arm/arm-mve-builtins-shapes.cc (vctp): New.
	* config/arm/arm-mve-builtins-shapes.h (vctp): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Add support for vctp.
	* config/arm/arm_mve.h (vctp16q): Delete.
	(vctp32q): Delete.
	(vctp64q): Delete.
	(vctp8q): Delete.
	(vctp8q_m): Delete.
	(vctp64q_m): Delete.
	(vctp32q_m): Delete.
	(vctp16q_m): Delete.
	(__arm_vctp16q): Delete.
	(__arm_vctp32q): Delete.
	(__arm_vctp64q): Delete.
	(__arm_vctp8q): Delete.
	(__arm_vctp8q_m): Delete.
	(__arm_vctp64q_m): Delete.
	(__arm_vctp32q_m): Delete.
	(__arm_vctp16q_m): Delete.
	* config/arm/mve.md (mve_vctp<MVE_vctp>q<MVE_vpred>): Add '@'
	prefix.
	(mve_vctp<MVE_vctp>q_m<MVE_vpred>): Likewise.
---
 gcc/config/arm/arm-mve-builtins-base.cc   | 48 +++++++++++++++++
 gcc/config/arm/arm-mve-builtins-base.def  |  4 ++
 gcc/config/arm/arm-mve-builtins-base.h    |  4 ++
 gcc/config/arm/arm-mve-builtins-shapes.cc | 16 ++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 gcc/config/arm/arm-mve-builtins.cc        |  4 ++
 gcc/config/arm/arm_mve.h                  | 64 -----------------------
 gcc/config/arm/mve.md                     |  4 +-
 8 files changed, 79 insertions(+), 66 deletions(-)

Comments

Richard Earnshaw (lists) Oct. 14, 2024, 5:28 p.m. UTC | #1
On 04/09/2024 14:26, Christophe Lyon wrote:
> Implement vctp using the new MVE builtins framework.
> 
> 2024-08-21  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> gcc/ChangeLog:
> 
> 	* config/arm/arm-mve-builtins-base.cc (class vctpq_impl): New.
> 	(vctp16q): New.
> 	(vctp32q): New.
> 	(vctp64q): New.
> 	(vctp8q): New.
> 	* config/arm/arm-mve-builtins-base.def (vctp16q): New.
> 	(vctp32q): New.
> 	(vctp64q): New.
> 	(vctp8q): New.
> 	* config/arm/arm-mve-builtins-base.h (vctp16q): New.
> 	(vctp32q): New.
> 	(vctp64q): New.
> 	(vctp8q): New.
> 	* config/arm/arm-mve-builtins-shapes.cc (vctp): New.
> 	* config/arm/arm-mve-builtins-shapes.h (vctp): New.
> 	* config/arm/arm-mve-builtins.cc
> 	(function_instance::has_inactive_argument): Add support for vctp.
> 	* config/arm/arm_mve.h (vctp16q): Delete.
> 	(vctp32q): Delete.
> 	(vctp64q): Delete.
> 	(vctp8q): Delete.
> 	(vctp8q_m): Delete.
> 	(vctp64q_m): Delete.
> 	(vctp32q_m): Delete.
> 	(vctp16q_m): Delete.
> 	(__arm_vctp16q): Delete.
> 	(__arm_vctp32q): Delete.
> 	(__arm_vctp64q): Delete.
> 	(__arm_vctp8q): Delete.
> 	(__arm_vctp8q_m): Delete.
> 	(__arm_vctp64q_m): Delete.
> 	(__arm_vctp32q_m): Delete.
> 	(__arm_vctp16q_m): Delete.
> 	* config/arm/mve.md (mve_vctp<MVE_vctp>q<MVE_vpred>): Add '@'
> 	prefix.
> 	(mve_vctp<MVE_vctp>q_m<MVE_vpred>): Likewise.

OK.

R.

> ---
>  gcc/config/arm/arm-mve-builtins-base.cc   | 48 +++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-base.def  |  4 ++
>  gcc/config/arm/arm-mve-builtins-base.h    |  4 ++
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 16 ++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  gcc/config/arm/arm-mve-builtins.cc        |  4 ++
>  gcc/config/arm/arm_mve.h                  | 64 -----------------------
>  gcc/config/arm/mve.md                     |  4 +-
>  8 files changed, 79 insertions(+), 66 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
> index f8260f5f483..89724320d43 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -139,6 +139,50 @@ public:
>    }
>  };
>  
> +  /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics.  */
> +class vctpq_impl : public function_base
> +{
> +public:
> +  CONSTEXPR vctpq_impl (machine_mode mode)
> +    : m_mode (mode)
> +  {}
> +
> +  /* Mode this intrinsic operates on.  */
> +  machine_mode m_mode;
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    insn_code code;
> +    rtx target;
> +
> +    if (e.mode_suffix_id != MODE_none)
> +      gcc_unreachable ();
> +
> +    switch (e.pred)
> +      {
> +      case PRED_none:
> +	/* No predicate, no suffix.  */
> +	code = code_for_mve_vctpq (m_mode, m_mode);
> +	target = e.use_exact_insn (code);
> +	break;
> +
> +      case PRED_m:
> +	/* No suffix, "m" predicate.  */
> +	code = code_for_mve_vctpq_m (m_mode, m_mode);
> +	target = e.use_cond_insn (code, 0);
> +	break;
> +
> +      default:
> +	gcc_unreachable ();
> +      }
> +
> +    rtx HItarget = gen_reg_rtx (HImode);
> +    emit_move_insn (HItarget, gen_lowpart (HImode, target));
> +    return HItarget;
> +  }
> +};
> +
>    /* Implements vcvtq intrinsics.  */
>  class vcvtq_impl : public function_base
>  {
> @@ -506,6 +550,10 @@ FUNCTION (vcmpltq, unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT,
>  FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
>  FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
>  FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
> +FUNCTION (vctp8q, vctpq_impl, (V16BImode))
> +FUNCTION (vctp16q, vctpq_impl, (V8BImode))
> +FUNCTION (vctp32q, vctpq_impl, (V4BImode))
> +FUNCTION (vctp64q, vctpq_impl, (V2QImode))
>  FUNCTION (vcvtq, vcvtq_impl,)
>  FUNCTION_WITHOUT_N_NO_F (vcvtaq, VCVTAQ)
>  FUNCTION_WITHOUT_N_NO_F (vcvtmq, VCVTMQ)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
> index cc76db3e0b9..dd46d882882 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -42,6 +42,10 @@ DEF_MVE_FUNCTION (vcmpleq, cmp, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
> +DEF_MVE_FUNCTION (vctp16q, vctp, none, m_or_none)
> +DEF_MVE_FUNCTION (vctp32q, vctp, none, m_or_none)
> +DEF_MVE_FUNCTION (vctp64q, vctp, none, m_or_none)
> +DEF_MVE_FUNCTION (vctp8q, vctp, none, m_or_none)
>  DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
> index ad2647b6758..41fcf666b11 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -55,6 +55,10 @@ extern const function_base *const vcmulq_rot180;
>  extern const function_base *const vcmulq_rot270;
>  extern const function_base *const vcmulq_rot90;
>  extern const function_base *const vcreateq;
> +extern const function_base *const vctp16q;
> +extern const function_base *const vctp32q;
> +extern const function_base *const vctp64q;
> +extern const function_base *const vctp8q;
>  extern const function_base *const vcvtaq;
>  extern const function_base *const vcvtbq;
>  extern const function_base *const vcvtmq;
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
> index 6632ee49067..8a849c2bc02 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -1981,6 +1981,22 @@ struct unary_widen_acc_def : public overloaded_base<0>
>  };
>  SHAPE (unary_widen_acc)
>  
> +/* mve_pred16_t foo_t0(uint32_t)
> +
> +   Example: vctp16q.
> +   mve_pred16_t [__arm_]vctp16q(uint32_t a)
> +   mve_pred16_t [__arm_]vctp16q_m(uint32_t a, mve_pred16_t p)  */
> +struct vctp_def : public nonoverloaded_base
> +{
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    build_all (b, "p,su32", group, MODE_none, preserve_user_namespace);
> +  }
> +};
> +SHAPE (vctp)
> +
>  /* <T0>_t foo_t0[_t1](<T1>_t)
>     <T0>_t foo_t0_n[_t1](<T1>_t, const int)
>  
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
> index ef497b6c97a..80340dc33ec 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -77,6 +77,7 @@ namespace arm_mve
>      extern const function_shape *const unary_n;
>      extern const function_shape *const unary_widen;
>      extern const function_shape *const unary_widen_acc;
> +    extern const function_shape *const vctp;
>      extern const function_shape *const vcvt;
>      extern const function_shape *const vcvt_f16_f32;
>      extern const function_shape *const vcvt_f32_f16;
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
> index 13c666b8f6a..84d94bb634f 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -750,6 +750,10 @@ function_instance::has_inactive_argument () const
>        || base == functions::vcmpltq
>        || base == functions::vcmpcsq
>        || base == functions::vcmphiq
> +      || base == functions::vctp16q
> +      || base == functions::vctp32q
> +      || base == functions::vctp64q
> +      || base == functions::vctp8q
>        || (base == functions::vcvtbq && type_suffix (0).element_bits == 16)
>        || (base == functions::vcvttq && type_suffix (0).element_bits == 16)
>        || base == functions::vfmaq
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 7aa61103a7d..49c4ea9afee 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -140,15 +140,7 @@
>  #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
>  #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
>  #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
> -#define vctp16q(__a) __arm_vctp16q(__a)
> -#define vctp32q(__a) __arm_vctp32q(__a)
> -#define vctp64q(__a) __arm_vctp64q(__a)
> -#define vctp8q(__a) __arm_vctp8q(__a)
>  #define vpnot(__a) __arm_vpnot(__a)
> -#define vctp8q_m(__a, __p) __arm_vctp8q_m(__a, __p)
> -#define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
> -#define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
> -#define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
>  #define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
>  #define vshlcq_u8(__a,  __b,  __imm) __arm_vshlcq_u8(__a,  __b,  __imm)
>  #define vshlcq_s16(__a,  __b,  __imm) __arm_vshlcq_s16(__a,  __b,  __imm)
> @@ -603,34 +595,6 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
>    __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
>  }
>  
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp16q (uint32_t __a)
> -{
> -  return __builtin_mve_vctp16qv8bi (__a);
> -}
> -
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp32q (uint32_t __a)
> -{
> -  return __builtin_mve_vctp32qv4bi (__a);
> -}
> -
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp64q (uint32_t __a)
> -{
> -  return __builtin_mve_vctp64qv2qi (__a);
> -}
> -
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp8q (uint32_t __a)
> -{
> -  return __builtin_mve_vctp8qv16bi (__a);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vpnot (mve_pred16_t __a)
> @@ -638,34 +602,6 @@ __arm_vpnot (mve_pred16_t __a)
>    return __builtin_mve_vpnotv16bi (__a);
>  }
>  
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp8q_m (uint32_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vctp8q_mv16bi (__a, __p);
> -}
> -
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp64q_m (uint32_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vctp64q_mv2qi (__a, __p);
> -}
> -
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp32q_m (uint32_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vctp32q_mv4bi (__a, __p);
> -}
> -
> -__extension__ extern __inline mve_pred16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vctp16q_m (uint32_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vctp16q_mv8bi (__a, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 3d8b199d9d6..62cffebd6ed 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -482,7 +482,7 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
>  ;;
>  ;; [vctp8q vctp16q vctp32q vctp64q])
>  ;;
> -(define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>"
> +(define_insn "@mve_vctp<MVE_vctp>q<MVE_vpred>"
>    [
>     (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
>  	(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")]
> @@ -1272,7 +1272,7 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>"
>  ;;
>  ;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m])
>  ;;
> -(define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>"
> +(define_insn "@mve_vctp<MVE_vctp>q_m<MVE_vpred>"
>    [
>     (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
>  	(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index f8260f5f483..89724320d43 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -139,6 +139,50 @@  public:
   }
 };
 
+  /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics.  */
+class vctpq_impl : public function_base
+{
+public:
+  CONSTEXPR vctpq_impl (machine_mode mode)
+    : m_mode (mode)
+  {}
+
+  /* Mode this intrinsic operates on.  */
+  machine_mode m_mode;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+    rtx target;
+
+    if (e.mode_suffix_id != MODE_none)
+      gcc_unreachable ();
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate, no suffix.  */
+	code = code_for_mve_vctpq (m_mode, m_mode);
+	target = e.use_exact_insn (code);
+	break;
+
+      case PRED_m:
+	/* No suffix, "m" predicate.  */
+	code = code_for_mve_vctpq_m (m_mode, m_mode);
+	target = e.use_cond_insn (code, 0);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    rtx HItarget = gen_reg_rtx (HImode);
+    emit_move_insn (HItarget, gen_lowpart (HImode, target));
+    return HItarget;
+  }
+};
+
   /* Implements vcvtq intrinsics.  */
 class vcvtq_impl : public function_base
 {
@@ -506,6 +550,10 @@  FUNCTION (vcmpltq, unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT,
 FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
 FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
+FUNCTION (vctp8q, vctpq_impl, (V16BImode))
+FUNCTION (vctp16q, vctpq_impl, (V8BImode))
+FUNCTION (vctp32q, vctpq_impl, (V4BImode))
+FUNCTION (vctp64q, vctpq_impl, (V2QImode))
 FUNCTION (vcvtq, vcvtq_impl,)
 FUNCTION_WITHOUT_N_NO_F (vcvtaq, VCVTAQ)
 FUNCTION_WITHOUT_N_NO_F (vcvtmq, VCVTMQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index cc76db3e0b9..dd46d882882 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -42,6 +42,10 @@  DEF_MVE_FUNCTION (vcmpleq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
+DEF_MVE_FUNCTION (vctp16q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp32q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp64q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp8q, vctp, none, m_or_none)
 DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index ad2647b6758..41fcf666b11 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -55,6 +55,10 @@  extern const function_base *const vcmulq_rot180;
 extern const function_base *const vcmulq_rot270;
 extern const function_base *const vcmulq_rot90;
 extern const function_base *const vcreateq;
+extern const function_base *const vctp16q;
+extern const function_base *const vctp32q;
+extern const function_base *const vctp64q;
+extern const function_base *const vctp8q;
 extern const function_base *const vcvtaq;
 extern const function_base *const vcvtbq;
 extern const function_base *const vcvtmq;
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 6632ee49067..8a849c2bc02 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1981,6 +1981,22 @@  struct unary_widen_acc_def : public overloaded_base<0>
 };
 SHAPE (unary_widen_acc)
 
+/* mve_pred16_t foo_t0(uint32_t)
+
+   Example: vctp16q.
+   mve_pred16_t [__arm_]vctp16q(uint32_t a)
+   mve_pred16_t [__arm_]vctp16q_m(uint32_t a, mve_pred16_t p)  */
+struct vctp_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    build_all (b, "p,su32", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (vctp)
+
 /* <T0>_t foo_t0[_t1](<T1>_t)
    <T0>_t foo_t0_n[_t1](<T1>_t, const int)
 
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index ef497b6c97a..80340dc33ec 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -77,6 +77,7 @@  namespace arm_mve
     extern const function_shape *const unary_n;
     extern const function_shape *const unary_widen;
     extern const function_shape *const unary_widen_acc;
+    extern const function_shape *const vctp;
     extern const function_shape *const vcvt;
     extern const function_shape *const vcvt_f16_f32;
     extern const function_shape *const vcvt_f32_f16;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 13c666b8f6a..84d94bb634f 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -750,6 +750,10 @@  function_instance::has_inactive_argument () const
       || base == functions::vcmpltq
       || base == functions::vcmpcsq
       || base == functions::vcmphiq
+      || base == functions::vctp16q
+      || base == functions::vctp32q
+      || base == functions::vctp64q
+      || base == functions::vctp8q
       || (base == functions::vcvtbq && type_suffix (0).element_bits == 16)
       || (base == functions::vcvttq && type_suffix (0).element_bits == 16)
       || base == functions::vfmaq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 7aa61103a7d..49c4ea9afee 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -140,15 +140,7 @@ 
 #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
 #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
 #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
-#define vctp16q(__a) __arm_vctp16q(__a)
-#define vctp32q(__a) __arm_vctp32q(__a)
-#define vctp64q(__a) __arm_vctp64q(__a)
-#define vctp8q(__a) __arm_vctp8q(__a)
 #define vpnot(__a) __arm_vpnot(__a)
-#define vctp8q_m(__a, __p) __arm_vctp8q_m(__a, __p)
-#define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
-#define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
-#define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
 #define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
 #define vshlcq_u8(__a,  __b,  __imm) __arm_vshlcq_u8(__a,  __b,  __imm)
 #define vshlcq_s16(__a,  __b,  __imm) __arm_vshlcq_s16(__a,  __b,  __imm)
@@ -603,34 +595,6 @@  __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
   __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp16q (uint32_t __a)
-{
-  return __builtin_mve_vctp16qv8bi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp32q (uint32_t __a)
-{
-  return __builtin_mve_vctp32qv4bi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp64q (uint32_t __a)
-{
-  return __builtin_mve_vctp64qv2qi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp8q (uint32_t __a)
-{
-  return __builtin_mve_vctp8qv16bi (__a);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vpnot (mve_pred16_t __a)
@@ -638,34 +602,6 @@  __arm_vpnot (mve_pred16_t __a)
   return __builtin_mve_vpnotv16bi (__a);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp8q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp8q_mv16bi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp64q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp64q_mv2qi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp32q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp32q_mv4bi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp16q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp16q_mv8bi (__a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 3d8b199d9d6..62cffebd6ed 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -482,7 +482,7 @@  (define_insn "@mve_<mve_insn>q_<supf>v4si"
 ;;
 ;; [vctp8q vctp16q vctp32q vctp64q])
 ;;
-(define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>"
+(define_insn "@mve_vctp<MVE_vctp>q<MVE_vpred>"
   [
    (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
 	(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")]
@@ -1272,7 +1272,7 @@  (define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>"
 ;;
 ;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m])
 ;;
-(define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>"
+(define_insn "@mve_vctp<MVE_vctp>q_m<MVE_vpred>"
   [
    (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
 	(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")