Message ID | VI1PR0802MB2368FE5A1C55BAAE9922AD0E9B319@VI1PR0802MB2368.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | [GCC-10,backport] arm: Fix polymorphic variants failing with undefined reference to `__ARM_undef` error. | expand |
> -----Original Message----- > From: Srinath Parvathaneni <Srinath.Parvathaneni@arm.com> > Sent: 14 June 2021 11:35 > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com> > Subject: [GCC-10 backport][PATCH] arm: Fix polymorphic variants failing with > undefined reference to `__ARM_undef` error. > > Hi, > > This patch fixes the issue mentioned in PR101016, which is mve polymorphic > variants > failing at linking with undefined reference to "__ARM_undef" error. > > Regression tested on arm-none-eabi and found no regressions. > > This patch have cleanly applied, ok for the GCC-10 branch? Ok. Thanks, Kyrill > > Regards, > Srinath. > > gcc/ChangeLog: > > 2021-06-11 Srinath Parvathaneni <srinath.parvathaneni@arm.com> > > PR target/101016 > * config/arm/arm_mve.h (__arm_vld1q): Change > __ARM_mve_coerce(p0, > int8_t const *) to __ARM_mve_coerce1(p0, int8_t *) in the argument > for > the polymorphic variants matching code. > (__arm_vld1q_z): Likewise. > (__arm_vld2q): Likewise. > (__arm_vld4q): Likewise. > (__arm_vldrbq_gather_offset): Likewise. > (__arm_vldrbq_gather_offset_z): Likewise. > > gcc/testsuite/ChangeLog: > > 2021-06-11 Srinath Parvathaneni <srinath.parvathaneni@arm.com> > > PR target/101016 > * gcc.target/arm/mve/intrinsics/pr101016.c: New test. > > (cherry picked from commit b13f297f01c943aa167f7c6eb94bed40dce0d553) > > > ############### Attachment also inlined for ease of reply > ############### > > > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h > index > 1132c7cf87d217a380cf26dd6f110130ea7bf175..4aa3787ca536215450fcb1a > 5bb602b7d5bdbbb16 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -37559,47 +37559,47 @@ extern void *__ARM_undef; > > #define __arm_vld1q(p0) (\ > _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 > (__ARM_mve_coerce(p0, int8_t const *)), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 > (__ARM_mve_coerce(p0, int16_t const *)), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 > (__ARM_mve_coerce(p0, int32_t const *)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 > (__ARM_mve_coerce(p0, uint8_t const *)), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 > (__ARM_mve_coerce(p0, uint16_t const *)), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 > (__ARM_mve_coerce(p0, uint32_t const *)), \ > - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 > (__ARM_mve_coerce(p0, float16_t const *)), \ > - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 > (__ARM_mve_coerce(p0, float32_t const *)))) > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 > (__ARM_mve_coerce1(p0, int8_t *)), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 > (__ARM_mve_coerce1(p0, int16_t *)), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 > (__ARM_mve_coerce1(p0, int32_t *)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 > (__ARM_mve_coerce1(p0, uint8_t *)), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 > (__ARM_mve_coerce1(p0, uint16_t *)), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 > (__ARM_mve_coerce1(p0, uint32_t *)), \ > + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 > (__ARM_mve_coerce1(p0, float16_t *)), \ > + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 > (__ARM_mve_coerce1(p0, float32_t *)))) > > #define __arm_vld1q_z(p0,p1) ( \ > _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 > (__ARM_mve_coerce(p0, int8_t const *), p1), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 > (__ARM_mve_coerce(p0, int16_t const *), p1), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 > (__ARM_mve_coerce(p0, int32_t const *), p1), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 > (__ARM_mve_coerce(p0, uint8_t const *), p1), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 > (__ARM_mve_coerce(p0, uint16_t const *), p1), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 > (__ARM_mve_coerce(p0, uint32_t const *), p1), \ > - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 > (__ARM_mve_coerce(p0, float16_t const *), p1), \ > - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 > (__ARM_mve_coerce(p0, float32_t const *), p1))) > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 > (__ARM_mve_coerce1(p0, int8_t *), p1), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 > (__ARM_mve_coerce1(p0, int16_t *), p1), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 > (__ARM_mve_coerce1(p0, int32_t *), p1), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 > (__ARM_mve_coerce1(p0, uint8_t *), p1), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 > (__ARM_mve_coerce1(p0, uint16_t *), p1), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 > (__ARM_mve_coerce1(p0, uint32_t *), p1), \ > + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 > (__ARM_mve_coerce1(p0, float16_t *), p1), \ > + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 > (__ARM_mve_coerce1(p0, float32_t *), p1))) > > #define __arm_vld2q(p0) ( \ > _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 > (__ARM_mve_coerce(p0, int8_t const *)), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 > (__ARM_mve_coerce(p0, int16_t const *)), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 > (__ARM_mve_coerce(p0, int32_t const *)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 > (__ARM_mve_coerce(p0, uint8_t const *)), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 > (__ARM_mve_coerce(p0, uint16_t const *)), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 > (__ARM_mve_coerce(p0, uint32_t const *)), \ > - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 > (__ARM_mve_coerce(p0, float16_t const *)), \ > - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 > (__ARM_mve_coerce(p0, float32_t const *)))) > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 > (__ARM_mve_coerce1(p0, int8_t *)), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 > (__ARM_mve_coerce1(p0, int16_t *)), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 > (__ARM_mve_coerce1(p0, int32_t *)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 > (__ARM_mve_coerce1(p0, uint8_t *)), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 > (__ARM_mve_coerce1(p0, uint16_t *)), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 > (__ARM_mve_coerce1(p0, uint32_t *)), \ > + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 > (__ARM_mve_coerce1(p0, float16_t *)), \ > + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 > (__ARM_mve_coerce1(p0, float32_t *)))) > > #define __arm_vld4q(p0) ( \ > _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 > (__ARM_mve_coerce(p0, int8_t const *)), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 > (__ARM_mve_coerce(p0, int16_t const *)), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 > (__ARM_mve_coerce(p0, int32_t const *)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 > (__ARM_mve_coerce(p0, uint8_t const *)), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 > (__ARM_mve_coerce(p0, uint16_t const *)), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 > (__ARM_mve_coerce(p0, uint32_t const *)), \ > - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 > (__ARM_mve_coerce(p0, float16_t const *)), \ > - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 > (__ARM_mve_coerce(p0, float32_t const *)))) > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 > (__ARM_mve_coerce1(p0, int8_t *)), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 > (__ARM_mve_coerce1(p0, int16_t *)), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 > (__ARM_mve_coerce1(p0, int32_t *)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 > (__ARM_mve_coerce1(p0, uint8_t *)), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 > (__ARM_mve_coerce1(p0, uint16_t *)), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 > (__ARM_mve_coerce1(p0, uint32_t *)), \ > + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 > (__ARM_mve_coerce1(p0, float16_t *)), \ > + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 > (__ARM_mve_coerce1(p0, float32_t *)))) > > #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ > _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ > @@ -39625,25 +39625,26 @@ extern void *__ARM_undef; > > #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ > _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint32x4_t)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), > __ARM_mve_coerce(__p1, uint32x4_t)));}) > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint32x4_t)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint32x4_t)));}) > > #define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = > (p2); \ > _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ > int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 > (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ > int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32 > (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));}) > > -#define __arm_vld1q(p0) (_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 > (__ARM_mve_coerce(p0, int8_t const *)), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 > (__ARM_mve_coerce(p0, int16_t const *)), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 > (__ARM_mve_coerce(p0, int32_t const *)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 > (__ARM_mve_coerce(p0, uint8_t const *)), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 > (__ARM_mve_coerce(p0, uint16_t const *)), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 > (__ARM_mve_coerce(p0, uint32_t const *)))) > +#define __arm_vld1q(p0) (\ > + _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 > (__ARM_mve_coerce1(p0, int8_t *)), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 > (__ARM_mve_coerce1(p0, int16_t *)), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 > (__ARM_mve_coerce1(p0, int32_t *)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 > (__ARM_mve_coerce1(p0, uint8_t *)), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 > (__ARM_mve_coerce1(p0, uint16_t *)), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 > (__ARM_mve_coerce1(p0, uint32_t *)))) > > #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ > _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ > @@ -40140,29 +40141,29 @@ extern void *__ARM_undef; > int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32 > (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));}) > > #define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, > \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 > (__ARM_mve_coerce(p0, int8_t const *), p1), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 > (__ARM_mve_coerce(p0, int16_t const *), p1), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 > (__ARM_mve_coerce(p0, int32_t const *), p1), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 > (__ARM_mve_coerce(p0, uint8_t const *), p1), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 > (__ARM_mve_coerce(p0, uint16_t const *), p1), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 > (__ARM_mve_coerce(p0, uint32_t const *), p1))) > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 > (__ARM_mve_coerce1(p0, int8_t *), p1), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 > (__ARM_mve_coerce1(p0, int16_t *), p1), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 > (__ARM_mve_coerce1(p0, int32_t *), p1), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 > (__ARM_mve_coerce1(p0, uint8_t *), p1), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 > (__ARM_mve_coerce1(p0, uint16_t *), p1), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 > (__ARM_mve_coerce1(p0, uint32_t *), p1))) > > #define __arm_vld2q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 > (__ARM_mve_coerce(p0, int8_t const *)), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 > (__ARM_mve_coerce(p0, int16_t const *)), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 > (__ARM_mve_coerce(p0, int32_t const *)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 > (__ARM_mve_coerce(p0, uint8_t const *)), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 > (__ARM_mve_coerce(p0, uint16_t const *)), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 > (__ARM_mve_coerce(p0, uint32_t const *)))) > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 > (__ARM_mve_coerce1(p0, int8_t *)), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 > (__ARM_mve_coerce1(p0, int16_t *)), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 > (__ARM_mve_coerce1(p0, int32_t *)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 > (__ARM_mve_coerce1(p0, uint8_t *)), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 > (__ARM_mve_coerce1(p0, uint16_t *)), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 > (__ARM_mve_coerce1(p0, uint32_t *)))) > > > #define __arm_vld4q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 > (__ARM_mve_coerce(p0, int8_t const *)), \ > - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 > (__ARM_mve_coerce(p0, int16_t const *)), \ > - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 > (__ARM_mve_coerce(p0, int32_t const *)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 > (__ARM_mve_coerce(p0, uint8_t const *)), \ > - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 > (__ARM_mve_coerce(p0, uint16_t const *)), \ > - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 > (__ARM_mve_coerce(p0, uint32_t const *)))) > + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 > (__ARM_mve_coerce1(p0, int8_t *)), \ > + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 > (__ARM_mve_coerce1(p0, int16_t *)), \ > + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 > (__ARM_mve_coerce1(p0, int32_t *)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 > (__ARM_mve_coerce1(p0, uint8_t *)), \ > + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 > (__ARM_mve_coerce1(p0, uint16_t *)), \ > + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 > (__ARM_mve_coerce1(p0, uint32_t *)))) > > #define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \ > _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ > @@ -40509,12 +40510,12 @@ extern void *__ARM_undef; > > #define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); > \ > _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint8x16_t), p2), \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint16x8_t), p2), \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint32x4_t), p2), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), > __ARM_mve_coerce(__p1, uint8x16_t), p2), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce(p0, uint8_t const > *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce(p0, uint8_t const > *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint8x16_t), p2), \ > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint16x8_t), p2), \ > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint32x4_t), p2), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint8x16_t), p2), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint16x8_t), p2), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint32x4_t), p2));}) > > #define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > @@ -41195,12 +41196,12 @@ extern void *__ARM_undef; > > #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ > _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), > __ARM_mve_coerce(__p1, uint32x4_t)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), > __ARM_mve_coerce(__p1, uint32x4_t)));}) > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce1(p0, int8_t *), > __ARM_mve_coerce(__p1, uint32x4_t)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: > __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint8x16_t)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: > __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint16x8_t)), \ > + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: > __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce1(p0, uint8_t *), > __ARM_mve_coerce(__p1, uint32x4_t)));}) > > #define __arm_vidupq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ > __typeof(p1) __p1 = (p1); \ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..b12786d04f558474ed9b3 > df9998663c7f9bc4d1a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c > @@ -0,0 +1,136 @@ > +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ > +/* { dg-add-options arm_v8_1m_mve_fp } */ > + > +#include "arm_mve.h" > + > +void > +foo (void) > +{ > + mve_pred16_t p; > + int8x16_t a; > + int8_t a1[10]; > + int16x8_t b; > + int16_t b1[10]; > + int32x4_t c; > + int32_t c1[10]; > + uint8x16_t ua; > + uint8_t ua1[10]; > + uint16x8_t ub; > + uint16_t ub1[10]; > + uint32x4_t uc; > + uint32_t uc1[10]; > + float16x8_t fb; > + float16_t fb1[10]; > + float32x4_t fc; > + float32_t fc1[10]; > + > + fb = vld1q (fb1); > + fc = vld1q (fc1); > + b = vld1q (b1); > + c = vld1q (c1); > + a = vld1q (a1); > + ub = vld1q (ub1); > + uc = vld1q (uc1); > + ua = vld1q (ua1); > + fb = vld1q_z (fb1, p); > + fc = vld1q_z (fc1, p); > + b = vld1q_z (b1, p); > + c = vld1q_z (c1, p); > + a = vld1q_z (a1, p); > + ub = vld1q_z (ub1, p); > + uc = vld1q_z (uc1, p); > + ua = vld1q_z (ua1, p); > +} > + > +void > +foo1 (void) > +{ > + mve_pred16_t p; > + int8x16x2_t a; > + int8_t a1[10]; > + int16x8x2_t b; > + int16_t b1[10]; > + int32x4x2_t c; > + int32_t c1[10]; > + uint8x16x2_t ua; > + uint8_t ua1[10]; > + uint16x8x2_t ub; > + uint16_t ub1[10]; > + uint32x4x2_t uc; > + uint32_t uc1[10]; > + float16x8x2_t fb; > + float16_t fb1[10]; > + float32x4x2_t fc; > + float32_t fc1[10]; > + > + fb = vld2q (fb1); > + fc = vld2q (fc1); > + b = vld2q (b1); > + c = vld2q (c1); > + a = vld2q (a1); > + ub = vld2q (ub1); > + uc = vld2q (uc1); > + ua = vld2q (ua1); > +} > + > +void > +foo2 (void) > +{ > + mve_pred16_t p; > + int8x16x4_t a; > + int8_t a1[10]; > + int16x8x4_t b; > + int16_t b1[10]; > + int32x4x4_t c; > + int32_t c1[10]; > + uint8x16x4_t ua; > + uint8_t ua1[10]; > + uint16x8x4_t ub; > + uint16_t ub1[10]; > + uint32x4x4_t uc; > + uint32_t uc1[10]; > + float16x8x4_t fb; > + float16_t fb1[10]; > + float32x4x4_t fc; > + float32_t fc1[10]; > + > + fb = vld4q (fb1); > + fc = vld4q (fc1); > + b = vld4q (b1); > + c = vld4q (c1); > + a = vld4q (a1); > + ub = vld4q (ub1); > + uc = vld4q (uc1); > + ua = vld4q (ua1); > +} > + > +void > +foo3 (void) > +{ > + mve_pred16_t p; > + int16x8_t a; > + uint16x8_t ua; > + int8_t a1[10]; > + uint8_t ua1[10]; > + uint16x8_t offset_a; > + int8x16_t b; > + uint8x16_t ub; > + uint8x16_t offset_b; > + int32x4_t c; > + uint32x4_t uc; > + uint32x4_t offset_c; > + > + a = vldrbq_gather_offset (a1, offset_a); > + ua = vldrbq_gather_offset (ua1, offset_a); > + b = vldrbq_gather_offset (a1, offset_b); > + ub = vldrbq_gather_offset (ua1, offset_b); > + c = vldrbq_gather_offset (a1, offset_c); > + uc = vldrbq_gather_offset (ua1, offset_c); > + a = vldrbq_gather_offset_z (a1, offset_a, p); > + ua = vldrbq_gather_offset_z (ua1, offset_a, p); > + b = vldrbq_gather_offset_z (a1, offset_b, p); > + ub = vldrbq_gather_offset_z (ua1, offset_b, p); > + c = vldrbq_gather_offset_z (a1, offset_c, p); > + uc = vldrbq_gather_offset_z (ua1, offset_c, p); > +} > +/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 1132c7cf87d217a380cf26dd6f110130ea7bf175..4aa3787ca536215450fcb1a5bb602b7d5bdbbb16 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -37559,47 +37559,47 @@ extern void *__ARM_undef; #define __arm_vld1q(p0) (\ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \ - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \ - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce(p0, float32_t const *)))) + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \ + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \ + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce1(p0, float32_t *)))) #define __arm_vld1q_z(p0,p1) ( \ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(p0, int8_t const *), p1), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(p0, int16_t const *), p1), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(p0, int32_t const *), p1), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), p1), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(p0, uint16_t const *), p1), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(p0, uint32_t const *), p1), \ - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce(p0, float16_t const *), p1), \ - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce(p0, float32_t const *), p1))) + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce1(p0, int8_t *), p1), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce1(p0, int16_t *), p1), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce1(p0, int32_t *), p1), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), p1), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce1(p0, uint16_t *), p1), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce1(p0, uint32_t *), p1), \ + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce1(p0, float16_t *), p1), \ + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce1(p0, float32_t *), p1))) #define __arm_vld2q(p0) ( \ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \ - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \ - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce(p0, float32_t const *)))) + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \ + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \ + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce1(p0, float32_t *)))) #define __arm_vld4q(p0) ( \ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(p0, uint32_t const *)), \ - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce(p0, float16_t const *)), \ - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce(p0, float32_t const *)))) + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce1(p0, uint32_t *)), \ + int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce1(p0, float16_t *)), \ + int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce1(p0, float32_t *)))) #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ @@ -39625,25 +39625,26 @@ extern void *__ARM_undef; #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));}) + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) #define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \ int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));}) -#define __arm_vld1q(p0) (_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce(p0, uint32_t const *)))) +#define __arm_vld1q(p0) (\ + _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce1(p0, uint32_t *)))) #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ @@ -40140,29 +40141,29 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));}) #define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce(p0, int8_t const *), p1), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce(p0, int16_t const *), p1), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce(p0, int32_t const *), p1), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), p1), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce(p0, uint16_t const *), p1), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce(p0, uint32_t const *), p1))) + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce1(p0, int8_t *), p1), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce1(p0, int16_t *), p1), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce1(p0, int32_t *), p1), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), p1), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce1(p0, uint16_t *), p1), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce1(p0, uint32_t *), p1))) #define __arm_vld2q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce(p0, uint32_t const *)))) + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce1(p0, uint32_t *)))) #define __arm_vld4q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce(p0, int8_t const *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce(p0, int16_t const *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce(p0, int32_t const *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce(p0, uint8_t const *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce(p0, uint16_t const *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce(p0, uint32_t const *)))) + int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce1(p0, int8_t *)), \ + int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce1(p0, int16_t *)), \ + int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce1(p0, int32_t *)), \ + int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce1(p0, uint8_t *)), \ + int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce1(p0, uint16_t *)), \ + int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce1(p0, uint32_t *)))) #define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ @@ -40509,12 +40510,12 @@ extern void *__ARM_undef; #define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) #define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -41195,12 +41196,12 @@ extern void *__ARM_undef; #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce(p0, int8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce(p0, uint8_t const *), __ARM_mve_coerce(__p1, uint32x4_t)));}) + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce1(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) #define __arm_vidupq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c new file mode 100644 index 0000000000000000000000000000000000000000..b12786d04f558474ed9b3df9998663c7f9bc4d1a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/pr101016.c @@ -0,0 +1,136 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ + +#include "arm_mve.h" + +void +foo (void) +{ + mve_pred16_t p; + int8x16_t a; + int8_t a1[10]; + int16x8_t b; + int16_t b1[10]; + int32x4_t c; + int32_t c1[10]; + uint8x16_t ua; + uint8_t ua1[10]; + uint16x8_t ub; + uint16_t ub1[10]; + uint32x4_t uc; + uint32_t uc1[10]; + float16x8_t fb; + float16_t fb1[10]; + float32x4_t fc; + float32_t fc1[10]; + + fb = vld1q (fb1); + fc = vld1q (fc1); + b = vld1q (b1); + c = vld1q (c1); + a = vld1q (a1); + ub = vld1q (ub1); + uc = vld1q (uc1); + ua = vld1q (ua1); + fb = vld1q_z (fb1, p); + fc = vld1q_z (fc1, p); + b = vld1q_z (b1, p); + c = vld1q_z (c1, p); + a = vld1q_z (a1, p); + ub = vld1q_z (ub1, p); + uc = vld1q_z (uc1, p); + ua = vld1q_z (ua1, p); +} + +void +foo1 (void) +{ + mve_pred16_t p; + int8x16x2_t a; + int8_t a1[10]; + int16x8x2_t b; + int16_t b1[10]; + int32x4x2_t c; + int32_t c1[10]; + uint8x16x2_t ua; + uint8_t ua1[10]; + uint16x8x2_t ub; + uint16_t ub1[10]; + uint32x4x2_t uc; + uint32_t uc1[10]; + float16x8x2_t fb; + float16_t fb1[10]; + float32x4x2_t fc; + float32_t fc1[10]; + + fb = vld2q (fb1); + fc = vld2q (fc1); + b = vld2q (b1); + c = vld2q (c1); + a = vld2q (a1); + ub = vld2q (ub1); + uc = vld2q (uc1); + ua = vld2q (ua1); +} + +void +foo2 (void) +{ + mve_pred16_t p; + int8x16x4_t a; + int8_t a1[10]; + int16x8x4_t b; + int16_t b1[10]; + int32x4x4_t c; + int32_t c1[10]; + uint8x16x4_t ua; + uint8_t ua1[10]; + uint16x8x4_t ub; + uint16_t ub1[10]; + uint32x4x4_t uc; + uint32_t uc1[10]; + float16x8x4_t fb; + float16_t fb1[10]; + float32x4x4_t fc; + float32_t fc1[10]; + + fb = vld4q (fb1); + fc = vld4q (fc1); + b = vld4q (b1); + c = vld4q (c1); + a = vld4q (a1); + ub = vld4q (ub1); + uc = vld4q (uc1); + ua = vld4q (ua1); +} + +void +foo3 (void) +{ + mve_pred16_t p; + int16x8_t a; + uint16x8_t ua; + int8_t a1[10]; + uint8_t ua1[10]; + uint16x8_t offset_a; + int8x16_t b; + uint8x16_t ub; + uint8x16_t offset_b; + int32x4_t c; + uint32x4_t uc; + uint32x4_t offset_c; + + a = vldrbq_gather_offset (a1, offset_a); + ua = vldrbq_gather_offset (ua1, offset_a); + b = vldrbq_gather_offset (a1, offset_b); + ub = vldrbq_gather_offset (ua1, offset_b); + c = vldrbq_gather_offset (a1, offset_c); + uc = vldrbq_gather_offset (ua1, offset_c); + a = vldrbq_gather_offset_z (a1, offset_a, p); + ua = vldrbq_gather_offset_z (ua1, offset_a, p); + b = vldrbq_gather_offset_z (a1, offset_b, p); + ub = vldrbq_gather_offset_z (ua1, offset_b, p); + c = vldrbq_gather_offset_z (a1, offset_c, p); + uc = vldrbq_gather_offset_z (ua1, offset_c, p); +} +/* { dg-final { scan-assembler-not "__ARM_undef" } } */