Message ID | 20240904132650.2720446-30-christophe.lyon@linaro.org |
---|---|
State | New |
Headers | show |
Series | arm: [MVE intrinsics] Re-implement more intrinsics | expand |
On 04/09/2024 14:26, Christophe Lyon wrote: > Implement vshlc using the new MVE builtins framework. > > 2024-08-28 Christophe Lyon <christophe.lyon@linaro.org> > > gcc/ > * config/arm/arm-mve-builtins-base.cc (class vshlc_impl): New. > (vshlc): New. > * config/arm/arm-mve-builtins-base.def (vshlcq): New. > * config/arm/arm-mve-builtins-base.h (vshlcq): New. > * config/arm/arm-mve-builtins.cc > (function_instance::has_inactive_argument): Handle vshlc. > * config/arm/arm_mve.h (vshlcq): Delete. > (vshlcq_m): Delete. > (vshlcq_s8): Delete. > (vshlcq_u8): Delete. > (vshlcq_s16): Delete. > (vshlcq_u16): Delete. > (vshlcq_s32): Delete. > (vshlcq_u32): Delete. > (vshlcq_m_s8): Delete. > (vshlcq_m_u8): Delete. > (vshlcq_m_s16): Delete. > (vshlcq_m_u16): Delete. > (vshlcq_m_s32): Delete. > (vshlcq_m_u32): Delete. > (__arm_vshlcq_s8): Delete. > (__arm_vshlcq_u8): Delete. > (__arm_vshlcq_s16): Delete. > (__arm_vshlcq_u16): Delete. > (__arm_vshlcq_s32): Delete. > (__arm_vshlcq_u32): Delete. > (__arm_vshlcq_m_s8): Delete. > (__arm_vshlcq_m_u8): Delete. > (__arm_vshlcq_m_s16): Delete. > (__arm_vshlcq_m_u16): Delete. > (__arm_vshlcq_m_s32): Delete. > (__arm_vshlcq_m_u32): Delete. > (__arm_vshlcq): Delete. > (__arm_vshlcq_m): Delete. > * config/arm/mve.md (mve_vshlcq_<supf><mode>): Add '@' prefix. > (mve_vshlcq_m_<supf><mode>): Likewise. OK. R. > --- > gcc/config/arm/arm-mve-builtins-base.cc | 72 +++++++ > gcc/config/arm/arm-mve-builtins-base.def | 1 + > gcc/config/arm/arm-mve-builtins-base.h | 1 + > gcc/config/arm/arm-mve-builtins.cc | 1 + > gcc/config/arm/arm_mve.h | 233 ----------------------- > gcc/config/arm/mve.md | 4 +- > 6 files changed, 77 insertions(+), 235 deletions(-) > > diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc > index eaf054d9823..9f1f7e69c57 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.cc > +++ b/gcc/config/arm/arm-mve-builtins-base.cc > @@ -483,6 +483,77 @@ public: > } > }; > > +/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector > + mode associated with type suffix 0. We need this special case because the > + intrinsics derefrence the second parameter and update its contents. */ > +class vshlc_impl : public function_base > +{ > +public: > + unsigned int > + call_properties (const function_instance &) const override > + { > + return CP_WRITE_MEMORY | CP_READ_MEMORY; > + } > + > + tree > + memory_scalar_type (const function_instance &) const override > + { > + return get_typenode_from_name (UINT32_TYPE); > + } > + > + rtx > + expand (function_expander &e) const override > + { > + machine_mode mode = e.vector_mode (0); > + insn_code code; > + rtx insns, carry_ptr, carry, new_carry; > + int carry_arg_no; > + > + if (! e.type_suffix (0).integer_p) > + gcc_unreachable (); > + > + if (e.mode_suffix_id != MODE_none) > + gcc_unreachable (); > + > + carry_arg_no = 1; > + > + carry = gen_reg_rtx (SImode); > + carry_ptr = e.args[carry_arg_no]; > + emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr))); > + e.args[carry_arg_no] = carry; > + > + new_carry = gen_reg_rtx (SImode); > + e.args.quick_insert (0, new_carry); > + > + switch (e.pred) > + { > + case PRED_none: > + /* No predicate. */ > + code = e.type_suffix (0).unsigned_p > + ? code_for_mve_vshlcq (VSHLCQ_U, mode) > + : code_for_mve_vshlcq (VSHLCQ_S, mode); > + insns = e.use_exact_insn (code); > + break; > + > + case PRED_m: > + /* "m" predicate. */ > + code = e.type_suffix (0).unsigned_p > + ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode) > + : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode); > + insns = e.use_cond_insn (code, 0); > + break; > + > + default: > + gcc_unreachable (); > + } > + > + /* Update carry. */ > + emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry)); > + > + return insns; > + } > +}; > + > } /* end anonymous namespace */ > > namespace arm_mve { > @@ -815,6 +886,7 @@ FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ) > FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ) > FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ) > FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ) > +FUNCTION (vshlcq, vshlc_impl,) > FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ) > FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ) > FUNCTION_WITH_M_N_R (vshlq, VSHLQ) > diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def > index c5f1e8a197b..bd69f06d7e4 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.def > +++ b/gcc/config/arm/arm-mve-builtins-base.def > @@ -152,6 +152,7 @@ DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none) > DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none) > DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none) > DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none) > +DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none) > DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none) > DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none) > DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none) > diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h > index ed8761318bb..1eff50d3c6d 100644 > --- a/gcc/config/arm/arm-mve-builtins-base.h > +++ b/gcc/config/arm/arm-mve-builtins-base.h > @@ -188,6 +188,7 @@ extern const function_base *const vrshlq; > extern const function_base *const vrshrnbq; > extern const function_base *const vrshrntq; > extern const function_base *const vrshrq; > +extern const function_base *const vshlcq; > extern const function_base *const vshllbq; > extern const function_base *const vshlltq; > extern const function_base *const vshlq; > diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc > index 1180421bf0a..252744596ce 100644 > --- a/gcc/config/arm/arm-mve-builtins.cc > +++ b/gcc/config/arm/arm-mve-builtins.cc > @@ -810,6 +810,7 @@ function_instance::has_inactive_argument () const > || (base == functions::vrshlq && mode_suffix_id == MODE_n) > || base == functions::vrshrnbq > || base == functions::vrshrntq > + || base == functions::vshlcq > || base == functions::vshrnbq > || base == functions::vshrntq > || base == functions::vsliq > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h > index 37b0fedc4ff..c577c373e98 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -42,7 +42,6 @@ > > #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE > #define vst4q(__addr, __value) __arm_vst4q(__addr, __value) > -#define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm) > #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value) > #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value) > #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value) > @@ -101,7 +100,6 @@ > #define vld4q(__addr) __arm_vld4q(__addr) > #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx) > #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx) > -#define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p) > > > #define vst4q_s8( __addr, __value) __arm_vst4q_s8( __addr, __value) > @@ -113,12 +111,6 @@ > #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value) > #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value) > #define vpnot(__a) __arm_vpnot(__a) > -#define vshlcq_s8(__a, __b, __imm) __arm_vshlcq_s8(__a, __b, __imm) > -#define vshlcq_u8(__a, __b, __imm) __arm_vshlcq_u8(__a, __b, __imm) > -#define vshlcq_s16(__a, __b, __imm) __arm_vshlcq_s16(__a, __b, __imm) > -#define vshlcq_u16(__a, __b, __imm) __arm_vshlcq_u16(__a, __b, __imm) > -#define vshlcq_s32(__a, __b, __imm) __arm_vshlcq_s32(__a, __b, __imm) > -#define vshlcq_u32(__a, __b, __imm) __arm_vshlcq_u32(__a, __b, __imm) > #define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value) > #define vstrbq_u8( __addr, __value) __arm_vstrbq_u8( __addr, __value) > #define vstrbq_u16( __addr, __value) __arm_vstrbq_u16( __addr, __value) > @@ -421,12 +413,6 @@ > #define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1) > #define lsll(__p0, __p1) __arm_lsll(__p0, __p1) > #define asrl(__p0, __p1) __arm_asrl(__p0, __p1) > -#define vshlcq_m_s8(__a, __b, __imm, __p) __arm_vshlcq_m_s8(__a, __b, __imm, __p) > -#define vshlcq_m_u8(__a, __b, __imm, __p) __arm_vshlcq_m_u8(__a, __b, __imm, __p) > -#define vshlcq_m_s16(__a, __b, __imm, __p) __arm_vshlcq_m_s16(__a, __b, __imm, __p) > -#define vshlcq_m_u16(__a, __b, __imm, __p) __arm_vshlcq_m_u16(__a, __b, __imm, __p) > -#define vshlcq_m_s32(__a, __b, __imm, __p) __arm_vshlcq_m_s32(__a, __b, __imm, __p) > -#define vshlcq_m_u32(__a, __b, __imm, __p) __arm_vshlcq_m_u32(__a, __b, __imm, __p) > #endif > > /* For big-endian, GCC's vector indices are reversed within each 64 bits > @@ -502,60 +488,6 @@ __arm_vpnot (mve_pred16_t __a) > return __builtin_mve_vpnotv16bi (__a); > } > > -__extension__ extern __inline int8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm) > -{ > - int8x16_t __res = __builtin_mve_vshlcq_vec_sv16qi (__a, *__b, __imm); > - *__b = __builtin_mve_vshlcq_carry_sv16qi (__a, *__b, __imm); > - return __res; > -} > - > -__extension__ extern __inline uint8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_u8 (uint8x16_t __a, uint32_t * __b, const int __imm) > -{ > - uint8x16_t __res = __builtin_mve_vshlcq_vec_uv16qi (__a, *__b, __imm); > - *__b = __builtin_mve_vshlcq_carry_uv16qi (__a, *__b, __imm); > - return __res; > -} > - > -__extension__ extern __inline int16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_s16 (int16x8_t __a, uint32_t * __b, const int __imm) > -{ > - int16x8_t __res = __builtin_mve_vshlcq_vec_sv8hi (__a, *__b, __imm); > - *__b = __builtin_mve_vshlcq_carry_sv8hi (__a, *__b, __imm); > - return __res; > -} > - > -__extension__ extern __inline uint16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_u16 (uint16x8_t __a, uint32_t * __b, const int __imm) > -{ > - uint16x8_t __res = __builtin_mve_vshlcq_vec_uv8hi (__a, *__b, __imm); > - *__b = __builtin_mve_vshlcq_carry_uv8hi (__a, *__b, __imm); > - return __res; > -} > - > -__extension__ extern __inline int32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_s32 (int32x4_t __a, uint32_t * __b, const int __imm) > -{ > - int32x4_t __res = __builtin_mve_vshlcq_vec_sv4si (__a, *__b, __imm); > - *__b = __builtin_mve_vshlcq_carry_sv4si (__a, *__b, __imm); > - return __res; > -} > - > -__extension__ extern __inline uint32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm) > -{ > - uint32x4_t __res = __builtin_mve_vshlcq_vec_uv4si (__a, *__b, __imm); > - *__b = __builtin_mve_vshlcq_carry_uv4si (__a, *__b, __imm); > - return __res; > -} > - > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value) > @@ -2404,60 +2336,6 @@ __arm_srshr (int32_t value, const int shift) > return __builtin_mve_srshr_si (value, shift); > } > > -__extension__ extern __inline int8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m_s8 (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - int8x16_t __res = __builtin_mve_vshlcq_m_vec_sv16qi (__a, *__b, __imm, __p); > - *__b = __builtin_mve_vshlcq_m_carry_sv16qi (__a, *__b, __imm, __p); > - return __res; > -} > - > -__extension__ extern __inline uint8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m_u8 (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - uint8x16_t __res = __builtin_mve_vshlcq_m_vec_uv16qi (__a, *__b, __imm, __p); > - *__b = __builtin_mve_vshlcq_m_carry_uv16qi (__a, *__b, __imm, __p); > - return __res; > -} > - > -__extension__ extern __inline int16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m_s16 (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - int16x8_t __res = __builtin_mve_vshlcq_m_vec_sv8hi (__a, *__b, __imm, __p); > - *__b = __builtin_mve_vshlcq_m_carry_sv8hi (__a, *__b, __imm, __p); > - return __res; > -} > - > -__extension__ extern __inline uint16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m_u16 (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - uint16x8_t __res = __builtin_mve_vshlcq_m_vec_uv8hi (__a, *__b, __imm, __p); > - *__b = __builtin_mve_vshlcq_m_carry_uv8hi (__a, *__b, __imm, __p); > - return __res; > -} > - > -__extension__ extern __inline int32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m_s32 (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - int32x4_t __res = __builtin_mve_vshlcq_m_vec_sv4si (__a, *__b, __imm, __p); > - *__b = __builtin_mve_vshlcq_m_carry_sv4si (__a, *__b, __imm, __p); > - return __res; > -} > - > -__extension__ extern __inline uint32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m_u32 (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - uint32x4_t __res = __builtin_mve_vshlcq_m_vec_uv4si (__a, *__b, __imm, __p); > - *__b = __builtin_mve_vshlcq_m_carry_uv4si (__a, *__b, __imm, __p); > - return __res; > -} > - > #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ > > __extension__ extern __inline void > @@ -2868,48 +2746,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value) > __arm_vst4q_u32 (__addr, __value); > } > > -__extension__ extern __inline int8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm) > -{ > - return __arm_vshlcq_s8 (__a, __b, __imm); > -} > - > -__extension__ extern __inline uint8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq (uint8x16_t __a, uint32_t * __b, const int __imm) > -{ > - return __arm_vshlcq_u8 (__a, __b, __imm); > -} > - > -__extension__ extern __inline int16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq (int16x8_t __a, uint32_t * __b, const int __imm) > -{ > - return __arm_vshlcq_s16 (__a, __b, __imm); > -} > - > -__extension__ extern __inline uint16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq (uint16x8_t __a, uint32_t * __b, const int __imm) > -{ > - return __arm_vshlcq_u16 (__a, __b, __imm); > -} > - > -__extension__ extern __inline int32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq (int32x4_t __a, uint32_t * __b, const int __imm) > -{ > - return __arm_vshlcq_s32 (__a, __b, __imm); > -} > - > -__extension__ extern __inline uint32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm) > -{ > - return __arm_vshlcq_u32 (__a, __b, __imm); > -} > - > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value) > @@ -4240,48 +4076,6 @@ __arm_vgetq_lane (uint64x2_t __a, const int __idx) > return __arm_vgetq_lane_u64 (__a, __idx); > } > > -__extension__ extern __inline int8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - return __arm_vshlcq_m_s8 (__a, __b, __imm, __p); > -} > - > -__extension__ extern __inline uint8x16_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - return __arm_vshlcq_m_u8 (__a, __b, __imm, __p); > -} > - > -__extension__ extern __inline int16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - return __arm_vshlcq_m_s16 (__a, __b, __imm, __p); > -} > - > -__extension__ extern __inline uint16x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - return __arm_vshlcq_m_u16 (__a, __b, __imm, __p); > -} > - > -__extension__ extern __inline int32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - return __arm_vshlcq_m_s32 (__a, __b, __imm, __p); > -} > - > -__extension__ extern __inline uint32x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -__arm_vshlcq_m (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) > -{ > - return __arm_vshlcq_m_u32 (__a, __b, __imm, __p); > -} > - > #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ > > __extension__ extern __inline void > @@ -4887,15 +4681,6 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \ > int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));}) > > -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ > - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \ > - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \ > - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \ > - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \ > - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \ > - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));}) > - > #define __arm_vld1q_z(p0,p1) ( \ > _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \ > @@ -5234,15 +5019,6 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \ > int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));}) > > -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ > - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ > - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \ > - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \ > - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \ > - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \ > - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \ > - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));}) > - > #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ > _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ > int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ > @@ -5615,15 +5391,6 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ > int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) > > -#define __arm_vshlcq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ > - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ > - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2, p3), \ > - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2, p3), \ > - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2, p3), \ > - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2, p3), \ > - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \ > - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));}) > - > #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \ > _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ > int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \ > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 0507e117f51..83a1eb48533 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -1719,7 +1719,7 @@ (define_expand "mve_vshlcq_carry_<supf><mode>" > DONE; > }) > > -(define_insn "mve_vshlcq_<supf><mode>" > +(define_insn "@mve_vshlcq_<supf><mode>" > [(set (match_operand:MVE_2 0 "s_register_operand" "=w") > (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0") > (match_operand:SI 3 "s_register_operand" "1") > @@ -6279,7 +6279,7 @@ (define_expand "mve_vshlcq_m_carry_<supf><mode>" > DONE; > }) > > -(define_insn "mve_vshlcq_m_<supf><mode>" > +(define_insn "@mve_vshlcq_m_<supf><mode>" > [(set (match_operand:MVE_2 0 "s_register_operand" "=w") > (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0") > (match_operand:SI 3 "s_register_operand" "1")
diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index eaf054d9823..9f1f7e69c57 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -483,6 +483,77 @@ public: } }; +/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector + mode associated with type suffix 0. We need this special case because the + intrinsics derefrence the second parameter and update its contents. */ +class vshlc_impl : public function_base +{ +public: + unsigned int + call_properties (const function_instance &) const override + { + return CP_WRITE_MEMORY | CP_READ_MEMORY; + } + + tree + memory_scalar_type (const function_instance &) const override + { + return get_typenode_from_name (UINT32_TYPE); + } + + rtx + expand (function_expander &e) const override + { + machine_mode mode = e.vector_mode (0); + insn_code code; + rtx insns, carry_ptr, carry, new_carry; + int carry_arg_no; + + if (! e.type_suffix (0).integer_p) + gcc_unreachable (); + + if (e.mode_suffix_id != MODE_none) + gcc_unreachable (); + + carry_arg_no = 1; + + carry = gen_reg_rtx (SImode); + carry_ptr = e.args[carry_arg_no]; + emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr))); + e.args[carry_arg_no] = carry; + + new_carry = gen_reg_rtx (SImode); + e.args.quick_insert (0, new_carry); + + switch (e.pred) + { + case PRED_none: + /* No predicate. */ + code = e.type_suffix (0).unsigned_p + ? code_for_mve_vshlcq (VSHLCQ_U, mode) + : code_for_mve_vshlcq (VSHLCQ_S, mode); + insns = e.use_exact_insn (code); + break; + + case PRED_m: + /* "m" predicate. */ + code = e.type_suffix (0).unsigned_p + ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode) + : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode); + insns = e.use_cond_insn (code, 0); + break; + + default: + gcc_unreachable (); + } + + /* Update carry. */ + emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry)); + + return insns; + } +}; + } /* end anonymous namespace */ namespace arm_mve { @@ -815,6 +886,7 @@ FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ) FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ) FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ) FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ) +FUNCTION (vshlcq, vshlc_impl,) FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ) FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ) FUNCTION_WITH_M_N_R (vshlq, VSHLQ) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index c5f1e8a197b..bd69f06d7e4 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -152,6 +152,7 @@ DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none) DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none) DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none) DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none) +DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none) DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none) DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none) DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index ed8761318bb..1eff50d3c6d 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -188,6 +188,7 @@ extern const function_base *const vrshlq; extern const function_base *const vrshrnbq; extern const function_base *const vrshrntq; extern const function_base *const vrshrq; +extern const function_base *const vshlcq; extern const function_base *const vshllbq; extern const function_base *const vshlltq; extern const function_base *const vshlq; diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index 1180421bf0a..252744596ce 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -810,6 +810,7 @@ function_instance::has_inactive_argument () const || (base == functions::vrshlq && mode_suffix_id == MODE_n) || base == functions::vrshrnbq || base == functions::vrshrntq + || base == functions::vshlcq || base == functions::vshrnbq || base == functions::vshrntq || base == functions::vsliq diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 37b0fedc4ff..c577c373e98 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -42,7 +42,6 @@ #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE #define vst4q(__addr, __value) __arm_vst4q(__addr, __value) -#define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm) #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value) #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value) #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value) @@ -101,7 +100,6 @@ #define vld4q(__addr) __arm_vld4q(__addr) #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx) #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx) -#define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p) #define vst4q_s8( __addr, __value) __arm_vst4q_s8( __addr, __value) @@ -113,12 +111,6 @@ #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value) #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value) #define vpnot(__a) __arm_vpnot(__a) -#define vshlcq_s8(__a, __b, __imm) __arm_vshlcq_s8(__a, __b, __imm) -#define vshlcq_u8(__a, __b, __imm) __arm_vshlcq_u8(__a, __b, __imm) -#define vshlcq_s16(__a, __b, __imm) __arm_vshlcq_s16(__a, __b, __imm) -#define vshlcq_u16(__a, __b, __imm) __arm_vshlcq_u16(__a, __b, __imm) -#define vshlcq_s32(__a, __b, __imm) __arm_vshlcq_s32(__a, __b, __imm) -#define vshlcq_u32(__a, __b, __imm) __arm_vshlcq_u32(__a, __b, __imm) #define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value) #define vstrbq_u8( __addr, __value) __arm_vstrbq_u8( __addr, __value) #define vstrbq_u16( __addr, __value) __arm_vstrbq_u16( __addr, __value) @@ -421,12 +413,6 @@ #define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1) #define lsll(__p0, __p1) __arm_lsll(__p0, __p1) #define asrl(__p0, __p1) __arm_asrl(__p0, __p1) -#define vshlcq_m_s8(__a, __b, __imm, __p) __arm_vshlcq_m_s8(__a, __b, __imm, __p) -#define vshlcq_m_u8(__a, __b, __imm, __p) __arm_vshlcq_m_u8(__a, __b, __imm, __p) -#define vshlcq_m_s16(__a, __b, __imm, __p) __arm_vshlcq_m_s16(__a, __b, __imm, __p) -#define vshlcq_m_u16(__a, __b, __imm, __p) __arm_vshlcq_m_u16(__a, __b, __imm, __p) -#define vshlcq_m_s32(__a, __b, __imm, __p) __arm_vshlcq_m_s32(__a, __b, __imm, __p) -#define vshlcq_m_u32(__a, __b, __imm, __p) __arm_vshlcq_m_u32(__a, __b, __imm, __p) #endif /* For big-endian, GCC's vector indices are reversed within each 64 bits @@ -502,60 +488,6 @@ __arm_vpnot (mve_pred16_t __a) return __builtin_mve_vpnotv16bi (__a); } -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm) -{ - int8x16_t __res = __builtin_mve_vshlcq_vec_sv16qi (__a, *__b, __imm); - *__b = __builtin_mve_vshlcq_carry_sv16qi (__a, *__b, __imm); - return __res; -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_u8 (uint8x16_t __a, uint32_t * __b, const int __imm) -{ - uint8x16_t __res = __builtin_mve_vshlcq_vec_uv16qi (__a, *__b, __imm); - *__b = __builtin_mve_vshlcq_carry_uv16qi (__a, *__b, __imm); - return __res; -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_s16 (int16x8_t __a, uint32_t * __b, const int __imm) -{ - int16x8_t __res = __builtin_mve_vshlcq_vec_sv8hi (__a, *__b, __imm); - *__b = __builtin_mve_vshlcq_carry_sv8hi (__a, *__b, __imm); - return __res; -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_u16 (uint16x8_t __a, uint32_t * __b, const int __imm) -{ - uint16x8_t __res = __builtin_mve_vshlcq_vec_uv8hi (__a, *__b, __imm); - *__b = __builtin_mve_vshlcq_carry_uv8hi (__a, *__b, __imm); - return __res; -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_s32 (int32x4_t __a, uint32_t * __b, const int __imm) -{ - int32x4_t __res = __builtin_mve_vshlcq_vec_sv4si (__a, *__b, __imm); - *__b = __builtin_mve_vshlcq_carry_sv4si (__a, *__b, __imm); - return __res; -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm) -{ - uint32x4_t __res = __builtin_mve_vshlcq_vec_uv4si (__a, *__b, __imm); - *__b = __builtin_mve_vshlcq_carry_uv4si (__a, *__b, __imm); - return __res; -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value) @@ -2404,60 +2336,6 @@ __arm_srshr (int32_t value, const int shift) return __builtin_mve_srshr_si (value, shift); } -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m_s8 (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - int8x16_t __res = __builtin_mve_vshlcq_m_vec_sv16qi (__a, *__b, __imm, __p); - *__b = __builtin_mve_vshlcq_m_carry_sv16qi (__a, *__b, __imm, __p); - return __res; -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m_u8 (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - uint8x16_t __res = __builtin_mve_vshlcq_m_vec_uv16qi (__a, *__b, __imm, __p); - *__b = __builtin_mve_vshlcq_m_carry_uv16qi (__a, *__b, __imm, __p); - return __res; -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m_s16 (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - int16x8_t __res = __builtin_mve_vshlcq_m_vec_sv8hi (__a, *__b, __imm, __p); - *__b = __builtin_mve_vshlcq_m_carry_sv8hi (__a, *__b, __imm, __p); - return __res; -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m_u16 (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - uint16x8_t __res = __builtin_mve_vshlcq_m_vec_uv8hi (__a, *__b, __imm, __p); - *__b = __builtin_mve_vshlcq_m_carry_uv8hi (__a, *__b, __imm, __p); - return __res; -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m_s32 (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - int32x4_t __res = __builtin_mve_vshlcq_m_vec_sv4si (__a, *__b, __imm, __p); - *__b = __builtin_mve_vshlcq_m_carry_sv4si (__a, *__b, __imm, __p); - return __res; -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m_u32 (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - uint32x4_t __res = __builtin_mve_vshlcq_m_vec_uv4si (__a, *__b, __imm, __p); - *__b = __builtin_mve_vshlcq_m_carry_uv4si (__a, *__b, __imm, __p); - return __res; -} - #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -2868,48 +2746,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value) __arm_vst4q_u32 (__addr, __value); } -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm) -{ - return __arm_vshlcq_s8 (__a, __b, __imm); -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq (uint8x16_t __a, uint32_t * __b, const int __imm) -{ - return __arm_vshlcq_u8 (__a, __b, __imm); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq (int16x8_t __a, uint32_t * __b, const int __imm) -{ - return __arm_vshlcq_s16 (__a, __b, __imm); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq (uint16x8_t __a, uint32_t * __b, const int __imm) -{ - return __arm_vshlcq_u16 (__a, __b, __imm); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq (int32x4_t __a, uint32_t * __b, const int __imm) -{ - return __arm_vshlcq_s32 (__a, __b, __imm); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm) -{ - return __arm_vshlcq_u32 (__a, __b, __imm); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value) @@ -4240,48 +4076,6 @@ __arm_vgetq_lane (uint64x2_t __a, const int __idx) return __arm_vgetq_lane_u64 (__a, __idx); } -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - return __arm_vshlcq_m_s8 (__a, __b, __imm, __p); -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - return __arm_vshlcq_m_u8 (__a, __b, __imm, __p); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - return __arm_vshlcq_m_s16 (__a, __b, __imm, __p); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - return __arm_vshlcq_m_u16 (__a, __b, __imm, __p); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - return __arm_vshlcq_m_s32 (__a, __b, __imm, __p); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vshlcq_m (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p) -{ - return __arm_vshlcq_m_u32 (__a, __b, __imm, __p); -} - #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void @@ -4887,15 +4681,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \ int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));}) -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));}) - #define __arm_vld1q_z(p0,p1) ( \ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \ @@ -5234,15 +5019,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \ int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));}) -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));}) - #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \ @@ -5615,15 +5391,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) -#define __arm_vshlcq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2, p3), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2, p3), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2, p3), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2, p3), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));}) - #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \ diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 0507e117f51..83a1eb48533 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -1719,7 +1719,7 @@ (define_expand "mve_vshlcq_carry_<supf><mode>" DONE; }) -(define_insn "mve_vshlcq_<supf><mode>" +(define_insn "@mve_vshlcq_<supf><mode>" [(set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0") (match_operand:SI 3 "s_register_operand" "1") @@ -6279,7 +6279,7 @@ (define_expand "mve_vshlcq_m_carry_<supf><mode>" DONE; }) -(define_insn "mve_vshlcq_m_<supf><mode>" +(define_insn "@mve_vshlcq_m_<supf><mode>" [(set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0") (match_operand:SI 3 "s_register_operand" "1")