diff mbox series

[v2,29/36] arm: [MVE intrinsics] rework vshlcq

Message ID 20240904132650.2720446-30-christophe.lyon@linaro.org
State New
Headers show
Series arm: [MVE intrinsics] Re-implement more intrinsics | expand

Commit Message

Christophe Lyon Sept. 4, 2024, 1:26 p.m. UTC
Implement vshlc using the new MVE builtins framework.

2024-08-28  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (class vshlc_impl): New.
	(vshlc): New.
	* config/arm/arm-mve-builtins-base.def (vshlcq): New.
	* config/arm/arm-mve-builtins-base.h (vshlcq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vshlc.
	* config/arm/arm_mve.h (vshlcq): Delete.
	(vshlcq_m): Delete.
	(vshlcq_s8): Delete.
	(vshlcq_u8): Delete.
	(vshlcq_s16): Delete.
	(vshlcq_u16): Delete.
	(vshlcq_s32): Delete.
	(vshlcq_u32): Delete.
	(vshlcq_m_s8): Delete.
	(vshlcq_m_u8): Delete.
	(vshlcq_m_s16): Delete.
	(vshlcq_m_u16): Delete.
	(vshlcq_m_s32): Delete.
	(vshlcq_m_u32): Delete.
	(__arm_vshlcq_s8): Delete.
	(__arm_vshlcq_u8): Delete.
	(__arm_vshlcq_s16): Delete.
	(__arm_vshlcq_u16): Delete.
	(__arm_vshlcq_s32): Delete.
	(__arm_vshlcq_u32): Delete.
	(__arm_vshlcq_m_s8): Delete.
	(__arm_vshlcq_m_u8): Delete.
	(__arm_vshlcq_m_s16): Delete.
	(__arm_vshlcq_m_u16): Delete.
	(__arm_vshlcq_m_s32): Delete.
	(__arm_vshlcq_m_u32): Delete.
	(__arm_vshlcq): Delete.
	(__arm_vshlcq_m): Delete.
	* config/arm/mve.md (mve_vshlcq_<supf><mode>): Add '@' prefix.
	(mve_vshlcq_m_<supf><mode>): Likewise.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  72 +++++++
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm-mve-builtins.cc       |   1 +
 gcc/config/arm/arm_mve.h                 | 233 -----------------------
 gcc/config/arm/mve.md                    |   4 +-
 6 files changed, 77 insertions(+), 235 deletions(-)

Comments

Richard Earnshaw (lists) Oct. 14, 2024, 5:57 p.m. UTC | #1
On 04/09/2024 14:26, Christophe Lyon wrote:
> Implement vshlc using the new MVE builtins framework.
> 
> 2024-08-28  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (class vshlc_impl): New.
> 	(vshlc): New.
> 	* config/arm/arm-mve-builtins-base.def (vshlcq): New.
> 	* config/arm/arm-mve-builtins-base.h (vshlcq): New.
> 	* config/arm/arm-mve-builtins.cc
> 	(function_instance::has_inactive_argument): Handle vshlc.
> 	* config/arm/arm_mve.h (vshlcq): Delete.
> 	(vshlcq_m): Delete.
> 	(vshlcq_s8): Delete.
> 	(vshlcq_u8): Delete.
> 	(vshlcq_s16): Delete.
> 	(vshlcq_u16): Delete.
> 	(vshlcq_s32): Delete.
> 	(vshlcq_u32): Delete.
> 	(vshlcq_m_s8): Delete.
> 	(vshlcq_m_u8): Delete.
> 	(vshlcq_m_s16): Delete.
> 	(vshlcq_m_u16): Delete.
> 	(vshlcq_m_s32): Delete.
> 	(vshlcq_m_u32): Delete.
> 	(__arm_vshlcq_s8): Delete.
> 	(__arm_vshlcq_u8): Delete.
> 	(__arm_vshlcq_s16): Delete.
> 	(__arm_vshlcq_u16): Delete.
> 	(__arm_vshlcq_s32): Delete.
> 	(__arm_vshlcq_u32): Delete.
> 	(__arm_vshlcq_m_s8): Delete.
> 	(__arm_vshlcq_m_u8): Delete.
> 	(__arm_vshlcq_m_s16): Delete.
> 	(__arm_vshlcq_m_u16): Delete.
> 	(__arm_vshlcq_m_s32): Delete.
> 	(__arm_vshlcq_m_u32): Delete.
> 	(__arm_vshlcq): Delete.
> 	(__arm_vshlcq_m): Delete.
> 	* config/arm/mve.md (mve_vshlcq_<supf><mode>): Add '@' prefix.
> 	(mve_vshlcq_m_<supf><mode>): Likewise.

OK.

R.

> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |  72 +++++++
>  gcc/config/arm/arm-mve-builtins-base.def |   1 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   1 +
>  gcc/config/arm/arm-mve-builtins.cc       |   1 +
>  gcc/config/arm/arm_mve.h                 | 233 -----------------------
>  gcc/config/arm/mve.md                    |   4 +-
>  6 files changed, 77 insertions(+), 235 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
> index eaf054d9823..9f1f7e69c57 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -483,6 +483,77 @@ public:
>    }
>  };
>  
> +/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector
> +   mode associated with type suffix 0.  We need this special case because the
> +   intrinsics derefrence the second parameter and update its contents.  */
> +class vshlc_impl : public function_base
> +{
> +public:
> +  unsigned int
> +  call_properties (const function_instance &) const override
> +  {
> +    return CP_WRITE_MEMORY | CP_READ_MEMORY;
> +  }
> +
> +  tree
> +  memory_scalar_type (const function_instance &) const override
> +  {
> +    return get_typenode_from_name (UINT32_TYPE);
> +  }
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    machine_mode mode = e.vector_mode (0);
> +    insn_code code;
> +    rtx insns, carry_ptr, carry, new_carry;
> +    int carry_arg_no;
> +
> +    if (! e.type_suffix (0).integer_p)
> +      gcc_unreachable ();
> +
> +    if (e.mode_suffix_id != MODE_none)
> +      gcc_unreachable ();
> +
> +    carry_arg_no = 1;
> +
> +    carry = gen_reg_rtx (SImode);
> +    carry_ptr = e.args[carry_arg_no];
> +    emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr)));
> +    e.args[carry_arg_no] = carry;
> +
> +    new_carry = gen_reg_rtx (SImode);
> +    e.args.quick_insert (0, new_carry);
> +
> +    switch (e.pred)
> +      {
> +      case PRED_none:
> +	/* No predicate.  */
> +	code = e.type_suffix (0).unsigned_p
> +	  ? code_for_mve_vshlcq (VSHLCQ_U, mode)
> +	  : code_for_mve_vshlcq (VSHLCQ_S, mode);
> +	insns = e.use_exact_insn (code);
> +	break;
> +
> +      case PRED_m:
> +	/* "m" predicate.  */
> +	code = e.type_suffix (0).unsigned_p
> +	  ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode)
> +	  : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode);
> +	insns = e.use_cond_insn (code, 0);
> +	break;
> +
> +      default:
> +	gcc_unreachable ();
> +      }
> +
> +    /* Update carry.  */
> +    emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry));
> +
> +    return insns;
> +  }
> +};
> +
>  } /* end anonymous namespace */
>  
>  namespace arm_mve {
> @@ -815,6 +886,7 @@ FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
>  FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
>  FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
>  FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
> +FUNCTION (vshlcq, vshlc_impl,)
>  FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ)
>  FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ)
>  FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
> index c5f1e8a197b..bd69f06d7e4 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -152,6 +152,7 @@ DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
>  DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
>  DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none)
>  DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none)
>  DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
> index ed8761318bb..1eff50d3c6d 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -188,6 +188,7 @@ extern const function_base *const vrshlq;
>  extern const function_base *const vrshrnbq;
>  extern const function_base *const vrshrntq;
>  extern const function_base *const vrshrq;
> +extern const function_base *const vshlcq;
>  extern const function_base *const vshllbq;
>  extern const function_base *const vshlltq;
>  extern const function_base *const vshlq;
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
> index 1180421bf0a..252744596ce 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -810,6 +810,7 @@ function_instance::has_inactive_argument () const
>        || (base == functions::vrshlq && mode_suffix_id == MODE_n)
>        || base == functions::vrshrnbq
>        || base == functions::vrshrntq
> +      || base == functions::vshlcq
>        || base == functions::vshrnbq
>        || base == functions::vshrntq
>        || base == functions::vsliq
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 37b0fedc4ff..c577c373e98 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -42,7 +42,6 @@
>  
>  #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
>  #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
> -#define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
>  #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
>  #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
>  #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
> @@ -101,7 +100,6 @@
>  #define vld4q(__addr) __arm_vld4q(__addr)
>  #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
>  #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx)
> -#define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p)
>  
>  
>  #define vst4q_s8( __addr, __value) __arm_vst4q_s8( __addr, __value)
> @@ -113,12 +111,6 @@
>  #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
>  #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
>  #define vpnot(__a) __arm_vpnot(__a)
> -#define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
> -#define vshlcq_u8(__a,  __b,  __imm) __arm_vshlcq_u8(__a,  __b,  __imm)
> -#define vshlcq_s16(__a,  __b,  __imm) __arm_vshlcq_s16(__a,  __b,  __imm)
> -#define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
> -#define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
> -#define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
>  #define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value)
>  #define vstrbq_u8( __addr, __value) __arm_vstrbq_u8( __addr, __value)
>  #define vstrbq_u16( __addr, __value) __arm_vstrbq_u16( __addr, __value)
> @@ -421,12 +413,6 @@
>  #define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1)
>  #define lsll(__p0, __p1) __arm_lsll(__p0, __p1)
>  #define asrl(__p0, __p1) __arm_asrl(__p0, __p1)
> -#define vshlcq_m_s8(__a,  __b,  __imm, __p) __arm_vshlcq_m_s8(__a,  __b,  __imm, __p)
> -#define vshlcq_m_u8(__a,  __b,  __imm, __p) __arm_vshlcq_m_u8(__a,  __b,  __imm, __p)
> -#define vshlcq_m_s16(__a,  __b,  __imm, __p) __arm_vshlcq_m_s16(__a,  __b,  __imm, __p)
> -#define vshlcq_m_u16(__a,  __b,  __imm, __p) __arm_vshlcq_m_u16(__a,  __b,  __imm, __p)
> -#define vshlcq_m_s32(__a,  __b,  __imm, __p) __arm_vshlcq_m_s32(__a,  __b,  __imm, __p)
> -#define vshlcq_m_u32(__a,  __b,  __imm, __p) __arm_vshlcq_m_u32(__a,  __b,  __imm, __p)
>  #endif
>  
>  /* For big-endian, GCC's vector indices are reversed within each 64 bits
> @@ -502,60 +488,6 @@ __arm_vpnot (mve_pred16_t __a)
>    return __builtin_mve_vpnotv16bi (__a);
>  }
>  
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
> -{
> -  int8x16_t __res = __builtin_mve_vshlcq_vec_sv16qi (__a, *__b, __imm);
> -  *__b = __builtin_mve_vshlcq_carry_sv16qi (__a, *__b, __imm);
> -  return __res;
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_u8 (uint8x16_t __a, uint32_t * __b, const int __imm)
> -{
> -  uint8x16_t __res = __builtin_mve_vshlcq_vec_uv16qi (__a, *__b, __imm);
> -  *__b = __builtin_mve_vshlcq_carry_uv16qi (__a, *__b, __imm);
> -  return __res;
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_s16 (int16x8_t __a, uint32_t * __b, const int __imm)
> -{
> -  int16x8_t __res = __builtin_mve_vshlcq_vec_sv8hi (__a, *__b, __imm);
> -  *__b = __builtin_mve_vshlcq_carry_sv8hi (__a, *__b, __imm);
> -  return __res;
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_u16 (uint16x8_t __a, uint32_t * __b, const int __imm)
> -{
> -  uint16x8_t __res = __builtin_mve_vshlcq_vec_uv8hi (__a, *__b, __imm);
> -  *__b = __builtin_mve_vshlcq_carry_uv8hi (__a, *__b, __imm);
> -  return __res;
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_s32 (int32x4_t __a, uint32_t * __b, const int __imm)
> -{
> -  int32x4_t __res = __builtin_mve_vshlcq_vec_sv4si (__a, *__b, __imm);
> -  *__b = __builtin_mve_vshlcq_carry_sv4si (__a, *__b, __imm);
> -  return __res;
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm)
> -{
> -  uint32x4_t __res = __builtin_mve_vshlcq_vec_uv4si (__a, *__b, __imm);
> -  *__b = __builtin_mve_vshlcq_carry_uv4si (__a, *__b, __imm);
> -  return __res;
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
> @@ -2404,60 +2336,6 @@ __arm_srshr (int32_t value, const int shift)
>    return __builtin_mve_srshr_si (value, shift);
>  }
>  
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_s8 (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> -  int8x16_t __res = __builtin_mve_vshlcq_m_vec_sv16qi (__a, *__b, __imm, __p);
> -  *__b = __builtin_mve_vshlcq_m_carry_sv16qi (__a, *__b, __imm, __p);
> -  return __res;
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_u8 (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> -  uint8x16_t __res = __builtin_mve_vshlcq_m_vec_uv16qi (__a, *__b, __imm, __p);
> -  *__b = __builtin_mve_vshlcq_m_carry_uv16qi (__a, *__b, __imm, __p);
> -  return __res;
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_s16 (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> -  int16x8_t __res = __builtin_mve_vshlcq_m_vec_sv8hi (__a, *__b, __imm, __p);
> -  *__b = __builtin_mve_vshlcq_m_carry_sv8hi (__a, *__b, __imm, __p);
> -  return __res;
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_u16 (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> -  uint16x8_t __res = __builtin_mve_vshlcq_m_vec_uv8hi (__a, *__b, __imm, __p);
> -  *__b = __builtin_mve_vshlcq_m_carry_uv8hi (__a, *__b, __imm, __p);
> -  return __res;
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_s32 (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> -  int32x4_t __res = __builtin_mve_vshlcq_m_vec_sv4si (__a, *__b, __imm, __p);
> -  *__b = __builtin_mve_vshlcq_m_carry_sv4si (__a, *__b, __imm, __p);
> -  return __res;
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_u32 (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> -  uint32x4_t __res = __builtin_mve_vshlcq_m_vec_uv4si (__a, *__b, __imm, __p);
> -  *__b = __builtin_mve_vshlcq_m_carry_uv4si (__a, *__b, __imm, __p);
> -  return __res;
> -}
> -
>  #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
>  
>  __extension__ extern __inline void
> @@ -2868,48 +2746,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
>   __arm_vst4q_u32 (__addr, __value);
>  }
>  
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_s8 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (uint8x16_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_u8 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (int16x8_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (uint16x8_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (int32x4_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_u32 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
> @@ -4240,48 +4076,6 @@ __arm_vgetq_lane (uint64x2_t __a, const int __idx)
>   return __arm_vgetq_lane_u64 (__a, __idx);
>  }
>  
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_s8 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_u8 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_u16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_u32 (__a, __b, __imm, __p);
> -}
> -
>  #if (__ARM_FEATURE_MVE & 2)  /* MVE Floating point.  */
>  
>  __extension__ extern __inline void
> @@ -4887,15 +4681,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
>    int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
>  
> -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vld1q_z(p0,p1) ( \
>    _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
>    int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
> @@ -5234,15 +5019,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
>    int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
>  
> -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
>    int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> @@ -5615,15 +5391,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
>    int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
>  
> -#define __arm_vshlcq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
> -
>  #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 0507e117f51..83a1eb48533 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1719,7 +1719,7 @@ (define_expand "mve_vshlcq_carry_<supf><mode>"
>    DONE;
>  })
>  
> -(define_insn "mve_vshlcq_<supf><mode>"
> +(define_insn "@mve_vshlcq_<supf><mode>"
>   [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
>         (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
>  		      (match_operand:SI 3 "s_register_operand" "1")
> @@ -6279,7 +6279,7 @@ (define_expand "mve_vshlcq_m_carry_<supf><mode>"
>    DONE;
>  })
>  
> -(define_insn "mve_vshlcq_m_<supf><mode>"
> +(define_insn "@mve_vshlcq_m_<supf><mode>"
>   [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
>         (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
>  		      (match_operand:SI 3 "s_register_operand" "1")
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index eaf054d9823..9f1f7e69c57 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -483,6 +483,77 @@  public:
   }
 };
 
+/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector
+   mode associated with type suffix 0.  We need this special case because the
+   intrinsics derefrence the second parameter and update its contents.  */
+class vshlc_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY | CP_READ_MEMORY;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &) const override
+  {
+    return get_typenode_from_name (UINT32_TYPE);
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vector_mode (0);
+    insn_code code;
+    rtx insns, carry_ptr, carry, new_carry;
+    int carry_arg_no;
+
+    if (! e.type_suffix (0).integer_p)
+      gcc_unreachable ();
+
+    if (e.mode_suffix_id != MODE_none)
+      gcc_unreachable ();
+
+    carry_arg_no = 1;
+
+    carry = gen_reg_rtx (SImode);
+    carry_ptr = e.args[carry_arg_no];
+    emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr)));
+    e.args[carry_arg_no] = carry;
+
+    new_carry = gen_reg_rtx (SImode);
+    e.args.quick_insert (0, new_carry);
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate.  */
+	code = e.type_suffix (0).unsigned_p
+	  ? code_for_mve_vshlcq (VSHLCQ_U, mode)
+	  : code_for_mve_vshlcq (VSHLCQ_S, mode);
+	insns = e.use_exact_insn (code);
+	break;
+
+      case PRED_m:
+	/* "m" predicate.  */
+	code = e.type_suffix (0).unsigned_p
+	  ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode)
+	  : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode);
+	insns = e.use_cond_insn (code, 0);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    /* Update carry.  */
+    emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry));
+
+    return insns;
+  }
+};
+
 } /* end anonymous namespace */
 
 namespace arm_mve {
@@ -815,6 +886,7 @@  FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
 FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
 FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
+FUNCTION (vshlcq, vshlc_impl,)
 FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ)
 FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ)
 FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index c5f1e8a197b..bd69f06d7e4 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -152,6 +152,7 @@  DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index ed8761318bb..1eff50d3c6d 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -188,6 +188,7 @@  extern const function_base *const vrshlq;
 extern const function_base *const vrshrnbq;
 extern const function_base *const vrshrntq;
 extern const function_base *const vrshrq;
+extern const function_base *const vshlcq;
 extern const function_base *const vshllbq;
 extern const function_base *const vshlltq;
 extern const function_base *const vshlq;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 1180421bf0a..252744596ce 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -810,6 +810,7 @@  function_instance::has_inactive_argument () const
       || (base == functions::vrshlq && mode_suffix_id == MODE_n)
       || base == functions::vrshrnbq
       || base == functions::vrshrntq
+      || base == functions::vshlcq
       || base == functions::vshrnbq
       || base == functions::vshrntq
       || base == functions::vsliq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 37b0fedc4ff..c577c373e98 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -42,7 +42,6 @@ 
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
 #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
 #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
@@ -101,7 +100,6 @@ 
 #define vld4q(__addr) __arm_vld4q(__addr)
 #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
 #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx)
-#define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p)
 
 
 #define vst4q_s8( __addr, __value) __arm_vst4q_s8( __addr, __value)
@@ -113,12 +111,6 @@ 
 #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
 #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
 #define vpnot(__a) __arm_vpnot(__a)
-#define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
-#define vshlcq_u8(__a,  __b,  __imm) __arm_vshlcq_u8(__a,  __b,  __imm)
-#define vshlcq_s16(__a,  __b,  __imm) __arm_vshlcq_s16(__a,  __b,  __imm)
-#define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
-#define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
-#define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
 #define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value)
 #define vstrbq_u8( __addr, __value) __arm_vstrbq_u8( __addr, __value)
 #define vstrbq_u16( __addr, __value) __arm_vstrbq_u16( __addr, __value)
@@ -421,12 +413,6 @@ 
 #define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1)
 #define lsll(__p0, __p1) __arm_lsll(__p0, __p1)
 #define asrl(__p0, __p1) __arm_asrl(__p0, __p1)
-#define vshlcq_m_s8(__a,  __b,  __imm, __p) __arm_vshlcq_m_s8(__a,  __b,  __imm, __p)
-#define vshlcq_m_u8(__a,  __b,  __imm, __p) __arm_vshlcq_m_u8(__a,  __b,  __imm, __p)
-#define vshlcq_m_s16(__a,  __b,  __imm, __p) __arm_vshlcq_m_s16(__a,  __b,  __imm, __p)
-#define vshlcq_m_u16(__a,  __b,  __imm, __p) __arm_vshlcq_m_u16(__a,  __b,  __imm, __p)
-#define vshlcq_m_s32(__a,  __b,  __imm, __p) __arm_vshlcq_m_s32(__a,  __b,  __imm, __p)
-#define vshlcq_m_u32(__a,  __b,  __imm, __p) __arm_vshlcq_m_u32(__a,  __b,  __imm, __p)
 #endif
 
 /* For big-endian, GCC's vector indices are reversed within each 64 bits
@@ -502,60 +488,6 @@  __arm_vpnot (mve_pred16_t __a)
   return __builtin_mve_vpnotv16bi (__a);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
-{
-  int8x16_t __res = __builtin_mve_vshlcq_vec_sv16qi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_sv16qi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u8 (uint8x16_t __a, uint32_t * __b, const int __imm)
-{
-  uint8x16_t __res = __builtin_mve_vshlcq_vec_uv16qi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_uv16qi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s16 (int16x8_t __a, uint32_t * __b, const int __imm)
-{
-  int16x8_t __res = __builtin_mve_vshlcq_vec_sv8hi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_sv8hi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u16 (uint16x8_t __a, uint32_t * __b, const int __imm)
-{
-  uint16x8_t __res = __builtin_mve_vshlcq_vec_uv8hi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_uv8hi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s32 (int32x4_t __a, uint32_t * __b, const int __imm)
-{
-  int32x4_t __res = __builtin_mve_vshlcq_vec_sv4si (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_sv4si (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm)
-{
-  uint32x4_t __res = __builtin_mve_vshlcq_vec_uv4si (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_uv4si (__a, *__b, __imm);
-  return __res;
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -2404,60 +2336,6 @@  __arm_srshr (int32_t value, const int shift)
   return __builtin_mve_srshr_si (value, shift);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s8 (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  int8x16_t __res = __builtin_mve_vshlcq_m_vec_sv16qi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_sv16qi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u8 (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  uint8x16_t __res = __builtin_mve_vshlcq_m_vec_uv16qi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_uv16qi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s16 (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  int16x8_t __res = __builtin_mve_vshlcq_m_vec_sv8hi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_sv8hi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u16 (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  uint16x8_t __res = __builtin_mve_vshlcq_m_vec_uv8hi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_uv8hi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s32 (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  int32x4_t __res = __builtin_mve_vshlcq_m_vec_sv4si (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_sv4si (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u32 (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  uint32x4_t __res = __builtin_mve_vshlcq_m_vec_uv4si (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_uv4si (__a, *__b, __imm, __p);
-  return __res;
-}
-
 #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
 
 __extension__ extern __inline void
@@ -2868,48 +2746,6 @@  __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
  __arm_vst4q_u32 (__addr, __value);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint8x16_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int16x8_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint16x8_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int32x4_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u32 (__a, __b, __imm);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -4240,48 +4076,6 @@  __arm_vgetq_lane (uint64x2_t __a, const int __idx)
  return __arm_vgetq_lane_u64 (__a, __idx);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u32 (__a, __b, __imm, __p);
-}
-
 #if (__ARM_FEATURE_MVE & 2)  /* MVE Floating point.  */
 
 __extension__ extern __inline void
@@ -4887,15 +4681,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
   int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
 
-#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vld1q_z(p0,p1) ( \
   _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
@@ -5234,15 +5019,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
 
-#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
@@ -5615,15 +5391,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vshlcq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
-
 #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 0507e117f51..83a1eb48533 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1719,7 +1719,7 @@  (define_expand "mve_vshlcq_carry_<supf><mode>"
   DONE;
 })
 
-(define_insn "mve_vshlcq_<supf><mode>"
+(define_insn "@mve_vshlcq_<supf><mode>"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
        (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
 		      (match_operand:SI 3 "s_register_operand" "1")
@@ -6279,7 +6279,7 @@  (define_expand "mve_vshlcq_m_carry_<supf><mode>"
   DONE;
 })
 
-(define_insn "mve_vshlcq_m_<supf><mode>"
+(define_insn "@mve_vshlcq_m_<supf><mode>"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
        (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
 		      (match_operand:SI 3 "s_register_operand" "1")