Message ID | AM0PR08MB53808B7A64F0B953AA930D919BF70@AM0PR08MB5380.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | [v2,ARM,3/5x] : MVE store intrinsics with predicated suffix. | expand |
Hi Srinath, > -----Original Message----- > From: Srinath Parvathaneni <Srinath.Parvathaneni@arm.com> > Sent: 18 March 2020 17:18 > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: [PATCH v2][ARM][GCC][3/5x]: MVE store intrinsics with predicated > suffix. > > Hello Kyrill, > > Following patch is the rebased version of v1. > (version v1) https://gcc.gnu.org/pipermail/gcc-patches/2019- > November/534337.html > > #### > > Hello, > > This patch supports the following MVE ACLE store intrinsics with predicated > suffix. > > vstrbq_p_s8, vstrbq_p_s32, vstrbq_p_s16, vstrbq_p_u8, vstrbq_p_u32, > vstrbq_p_u16, vstrbq_scatter_offset_p_s8, vstrbq_scatter_offset_p_s32, > vstrbq_scatter_offset_p_s16, vstrbq_scatter_offset_p_u8, > vstrbq_scatter_offset_p_u32, vstrbq_scatter_offset_p_u16, > vstrwq_scatter_base_p_s32, vstrwq_scatter_base_p_u32. > > Please refer to M-profile Vector Extension (MVE) intrinsics [1] for more > details. > [1] https://developer.arm.com/architectures/instruction-sets/simd- > isas/helium/mve-intrinsics > > Regression tested on arm-none-eabi and found no regressions. > > Ok for trunk? Thanks, I've pushed this patch to master. Kyrill > > Thanks, > Srinath. > > gcc/ChangeLog: > > 2019-11-01 Andre Vieira <andre.simoesdiasvieira@arm.com> > Mihail Ionescu <mihail.ionescu@arm.com> > Srinath Parvathaneni <srinath.parvathaneni@arm.com> > > * config/arm/arm-builtins.c (STRS_P_QUALIFIERS): Define builtin > qualifier. > (STRU_P_QUALIFIERS): Likewise. > (STRSU_P_QUALIFIERS): Likewise. > (STRSS_P_QUALIFIERS): Likewise. > (STRSBS_P_QUALIFIERS): Likewise. > (STRSBU_P_QUALIFIERS): Likewise. > * config/arm/arm_mve.h (vstrbq_p_s8): Define macro. > (vstrbq_p_s32): Likewise. > (vstrbq_p_s16): Likewise. > (vstrbq_p_u8): Likewise. > (vstrbq_p_u32): Likewise. > (vstrbq_p_u16): Likewise. > (vstrbq_scatter_offset_p_s8): Likewise. > (vstrbq_scatter_offset_p_s32): Likewise. > (vstrbq_scatter_offset_p_s16): Likewise. > (vstrbq_scatter_offset_p_u8): Likewise. > (vstrbq_scatter_offset_p_u32): Likewise. > (vstrbq_scatter_offset_p_u16): Likewise. > (vstrwq_scatter_base_p_s32): Likewise. > (vstrwq_scatter_base_p_u32): Likewise. > (__arm_vstrbq_p_s8): Define intrinsic. > (__arm_vstrbq_p_s32): Likewise. > (__arm_vstrbq_p_s16): Likewise. > (__arm_vstrbq_p_u8): Likewise. > (__arm_vstrbq_p_u32): Likewise. > (__arm_vstrbq_p_u16): Likewise. > (__arm_vstrbq_scatter_offset_p_s8): Likewise. > (__arm_vstrbq_scatter_offset_p_s32): Likewise. > (__arm_vstrbq_scatter_offset_p_s16): Likewise. > (__arm_vstrbq_scatter_offset_p_u8): Likewise. > (__arm_vstrbq_scatter_offset_p_u32): Likewise. > (__arm_vstrbq_scatter_offset_p_u16): Likewise. > (__arm_vstrwq_scatter_base_p_s32): Likewise. > (__arm_vstrwq_scatter_base_p_u32): Likewise. > (vstrbq_p): Define polymorphic variant. > (vstrbq_scatter_offset_p): Likewise. > (vstrwq_scatter_base_p): Likewise. > * config/arm/arm_mve_builtins.def (STRS_P_QUALIFIERS): Use > builtin > qualifier. > (STRU_P_QUALIFIERS): Likewise. > (STRSU_P_QUALIFIERS): Likewise. > (STRSS_P_QUALIFIERS): Likewise. > (STRSBS_P_QUALIFIERS): Likewise. > (STRSBU_P_QUALIFIERS): Likewise. > * config/arm/mve.md > (mve_vstrbq_scatter_offset_p_<supf><mode>): Define > RTL pattern. > (mve_vstrwq_scatter_base_p_<supf>v4si): Likewise. > (mve_vstrbq_p_<supf><mode>): Likewise. > > gcc/testsuite/ChangeLog: > > 2019-11-01 Andre Vieira <andre.simoesdiasvieira@arm.com> > Mihail Ionescu <mihail.ionescu@arm.com> > Srinath Parvathaneni <srinath.parvathaneni@arm.com> > > * gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c: New test. > * gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c: Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c: Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c: Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c: Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c: Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s16.c: > Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s32.c: > Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s8.c: > Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u16.c: > Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u32.c: > Likewise. > * gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u8.c: > Likewise. > * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_s32.c: > Likewise. > * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_u32.c: > Likewise. > > > ############### Attachment also inlined for ease of reply > ############### > > > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > index > aced55f52d317e8deafdc6a6804db3b80c00fd80..c87fa3118510e4de90ac9afe > 08608fb2315f4809 100644 > --- a/gcc/config/arm/arm-builtins.c > +++ b/gcc/config/arm/arm-builtins.c > @@ -613,6 +613,41 @@ arm_strsbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] > #define STRSBU_QUALIFIERS (arm_strsbu_qualifiers) > > static enum arm_type_qualifiers > +arm_strs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_void, qualifier_pointer, qualifier_none, > +qualifier_unsigned}; #define STRS_P_QUALIFIERS (arm_strs_p_qualifiers) > + > +static enum arm_type_qualifiers > +arm_stru_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_void, qualifier_pointer, qualifier_unsigned, > + qualifier_unsigned}; > +#define STRU_P_QUALIFIERS (arm_stru_p_qualifiers) > + > +static enum arm_type_qualifiers > +arm_strsu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_void, qualifier_pointer, qualifier_unsigned, > + qualifier_unsigned, qualifier_unsigned}; #define > +STRSU_P_QUALIFIERS (arm_strsu_p_qualifiers) > + > +static enum arm_type_qualifiers > +arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_void, qualifier_pointer, qualifier_unsigned, > + qualifier_none, qualifier_unsigned}; #define STRSS_P_QUALIFIERS > +(arm_strss_p_qualifiers) > + > +static enum arm_type_qualifiers > +arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_void, qualifier_unsigned, qualifier_immediate, > + qualifier_none, qualifier_unsigned}; #define STRSBS_P_QUALIFIERS > +(arm_strsbs_p_qualifiers) > + > +static enum arm_type_qualifiers > +arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > + = { qualifier_void, qualifier_unsigned, qualifier_immediate, > + qualifier_unsigned, qualifier_unsigned}; #define > +STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers) > + > +static enum arm_type_qualifiers > arm_ldrgu_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_unsigned, qualifier_pointer, qualifier_unsigned}; #define > LDRGU_QUALIFIERS (arm_ldrgu_qualifiers) diff --git > a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index > c2c195a9270e976c21d8d0a5941f85dd2bdd9e9d..deed81cfb31b2cf8f830076c > 5e44098c1abf2310 100644 > --- a/gcc/config/arm/arm_mve.h > +++ b/gcc/config/arm/arm_mve.h > @@ -1730,6 +1730,20 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; > #define vldrbq_u32(__base) __arm_vldrbq_u32(__base) #define > vldrwq_gather_base_s32(__addr, __offset) > __arm_vldrwq_gather_base_s32(__addr, __offset) #define > vldrwq_gather_base_u32(__addr, __offset) > __arm_vldrwq_gather_base_u32(__addr, __offset) > +#define vstrbq_p_s8( __addr, __value, __p) __arm_vstrbq_p_s8( __addr, > +__value, __p) #define vstrbq_p_s32( __addr, __value, __p) > +__arm_vstrbq_p_s32( __addr, __value, __p) #define vstrbq_p_s16( __addr, > +__value, __p) __arm_vstrbq_p_s16( __addr, __value, __p) #define > +vstrbq_p_u8( __addr, __value, __p) __arm_vstrbq_p_u8( __addr, __value, > +__p) #define vstrbq_p_u32( __addr, __value, __p) __arm_vstrbq_p_u32( > +__addr, __value, __p) #define vstrbq_p_u16( __addr, __value, __p) > +__arm_vstrbq_p_u16( __addr, __value, __p) #define > +vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p) > +__arm_vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p) > +#define vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p) > +__arm_vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p) > +#define vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p) > +__arm_vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p) > +#define vstrbq_scatter_offset_p_u8( __base, __offset, __value, __p) > +__arm_vstrbq_scatter_offset_p_u8( __base, __offset, __value, __p) > +#define vstrbq_scatter_offset_p_u32( __base, __offset, __value, __p) > +__arm_vstrbq_scatter_offset_p_u32( __base, __offset, __value, __p) > +#define vstrbq_scatter_offset_p_u16( __base, __offset, __value, __p) > +__arm_vstrbq_scatter_offset_p_u16( __base, __offset, __value, __p) > +#define vstrwq_scatter_base_p_s32(__addr, __offset, __value, __p) > +__arm_vstrwq_scatter_base_p_s32(__addr, __offset, __value, __p) > +#define vstrwq_scatter_base_p_u32(__addr, __offset, __value, __p) > +__arm_vstrwq_scatter_base_p_u32(__addr, __offset, __value, __p) > #endif > > __extension__ extern __inline void > @@ -11219,6 +11233,103 @@ __arm_vldrwq_gather_base_u32 (uint32x4_t > __addr, const int __offset) > return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset); } > > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_p_s8 (int8_t * __addr, int8x16_t __value, mve_pred16_t > +__p) { > + __builtin_mve_vstrbq_p_sv16qi ((__builtin_neon_qi *) __addr, __value, > +__p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_p_s32 (int8_t * __addr, int32x4_t __value, mve_pred16_t > +__p) { > + __builtin_mve_vstrbq_p_sv4si ((__builtin_neon_qi *) __addr, __value, > +__p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_p_s16 (int8_t * __addr, int16x8_t __value, mve_pred16_t > +__p) { > + __builtin_mve_vstrbq_p_sv8hi ((__builtin_neon_qi *) __addr, __value, > +__p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_p_u8 (uint8_t * __addr, uint8x16_t __value, mve_pred16_t > +__p) { > + __builtin_mve_vstrbq_p_uv16qi ((__builtin_neon_qi *) __addr, __value, > +__p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_p_u32 (uint8_t * __addr, uint32x4_t __value, mve_pred16_t > +__p) { > + __builtin_mve_vstrbq_p_uv4si ((__builtin_neon_qi *) __addr, __value, > +__p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_p_u16 (uint8_t * __addr, uint16x8_t __value, mve_pred16_t > +__p) { > + __builtin_mve_vstrbq_p_uv8hi ((__builtin_neon_qi *) __addr, __value, > +__p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_scatter_offset_p_s8 (int8_t * __base, uint8x16_t __offset, > +int8x16_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrbq_scatter_offset_p_sv16qi ((__builtin_neon_qi *) > +__base, __offset, __value, __p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_scatter_offset_p_s32 (int8_t * __base, uint32x4_t > +__offset, int32x4_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrbq_scatter_offset_p_sv4si ((__builtin_neon_qi *) > +__base, __offset, __value, __p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_scatter_offset_p_s16 (int8_t * __base, uint16x8_t > +__offset, int16x8_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrbq_scatter_offset_p_sv8hi ((__builtin_neon_qi *) > +__base, __offset, __value, __p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_scatter_offset_p_u8 (uint8_t * __base, uint8x16_t > +__offset, uint8x16_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrbq_scatter_offset_p_uv16qi ((__builtin_neon_qi *) > +__base, __offset, __value, __p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_scatter_offset_p_u32 (uint8_t * __base, uint32x4_t > +__offset, uint32x4_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrbq_scatter_offset_p_uv4si ((__builtin_neon_qi *) > +__base, __offset, __value, __p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrbq_scatter_offset_p_u16 (uint8_t * __base, uint16x8_t > +__offset, uint16x8_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrbq_scatter_offset_p_uv8hi ((__builtin_neon_qi *) > +__base, __offset, __value, __p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrwq_scatter_base_p_s32 (uint32x4_t __addr, const int __offset, > +int32x4_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrwq_scatter_base_p_sv4si (__addr, __offset, __value, > +__p); } > + > +__extension__ extern __inline void > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +__arm_vstrwq_scatter_base_p_u32 (uint32x4_t __addr, const int __offset, > +uint32x4_t __value, mve_pred16_t __p) { > + __builtin_mve_vstrwq_scatter_base_p_uv4si (__addr, __offset, __value, > +__p); } > #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ > > __extension__ extern __inline void > diff --git a/gcc/config/arm/arm_mve_builtins.def > b/gcc/config/arm/arm_mve_builtins.def > index > f6d0fec2fb4795d452ecd5478577d564a384220b..c6e065d52d00e5f6b618e37c > 0b2df42c94e18f60 100644 > --- a/gcc/config/arm/arm_mve_builtins.def > +++ b/gcc/config/arm/arm_mve_builtins.def > @@ -697,3 +697,9 @@ VAR3 (LDRS, vldrbq_s, v16qi, v8hi, v4si) > VAR3 (LDRU, vldrbq_u, v16qi, v8hi, v4si) > VAR1 (LDRGBS, vldrwq_gather_base_s, v4si) > VAR1 (LDRGBU, vldrwq_gather_base_u, v4si) > +VAR3 (STRS_P, vstrbq_p_s, v16qi, v8hi, v4si) > +VAR3 (STRU_P, vstrbq_p_u, v16qi, v8hi, v4si) > +VAR3 (STRSS_P, vstrbq_scatter_offset_p_s, v16qi, v8hi, v4si) > +VAR3 (STRSU_P, vstrbq_scatter_offset_p_u, v16qi, v8hi, v4si) > +VAR1 (STRSBS_P, vstrwq_scatter_base_p_s, v4si) > +VAR1 (STRSBU_P, vstrwq_scatter_base_p_u, v4si) > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index > be1b9faee7f94124ecd6f37cfff55003d162aee5..0e8b04f531c679b87d972265e > d21ea5ec796e3a2 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -8077,3 +8077,68 @@ > return ""; > } > [(set_attr "length" "4")]) > + > +;; > +;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u] ;; > +(define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>" > + [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") > + (unspec:<MVE_B_ELEM> > + [(match_operand:MVE_2 1 "s_register_operand" "w") > + (match_operand:MVE_2 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRBSOQ)) > + ] > + "TARGET_HAVE_MVE" > +{ > + rtx ops[3]; > + ops[0] = operands[0]; > + ops[1] = operands[1]; > + ops[2] = operands[2]; > + output_asm_insn ("vpst\n\tvstrbt.<V_sz_elem>\t%q2, [%m0, %q1]",ops); > + return ""; > +} > + [(set_attr "length" "8")]) > + > +;; > +;; [vstrwq_scatter_base_p_s vstrwq_scatter_base_p_u] ;; (define_insn > +"mve_vstrwq_scatter_base_p_<supf>v4si" > + [(set (mem:BLK (scratch)) > + (unspec:BLK > + [(match_operand:V4SI 0 "s_register_operand" "w") > + (match_operand:SI 1 "immediate_operand" "i") > + (match_operand:V4SI 2 "s_register_operand" "w") > + (match_operand:HI 3 "vpr_register_operand" "Up")] > + VSTRWSBQ)) > + ] > + "TARGET_HAVE_MVE" > +{ > + rtx ops[3]; > + ops[0] = operands[0]; > + ops[1] = operands[1]; > + ops[2] = operands[2]; > + output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops); > + return ""; > +} > + [(set_attr "length" "8")]) > + > +;; > +;; [vstrbq_p_s vstrbq_p_u] > +;; > +(define_insn "mve_vstrbq_p_<supf><mode>" > + [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") > + (unspec:<MVE_B_ELEM> [(match_operand:MVE_2 1 > "s_register_operand" "w") > + (match_operand:HI 2 "vpr_register_operand" > "Up")] > + VSTRBQ)) > + ] > + "TARGET_HAVE_MVE" > +{ > + rtx ops[2]; > + int regno = REGNO (operands[1]); > + ops[1] = gen_rtx_REG (TImode, regno); > + ops[0] = operands[0]; > + output_asm_insn ("vpst\n\tvstrbt.<V_sz_elem>\t%q1, %E0",ops); > + return ""; > +} > + [(set_attr "length" "8")]) > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..704ac270d078338104ad881 > c2e76ba239b4707d5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (int8_t * addr, int16x8_t value, mve_pred16_t p) { > + vstrbq_p_s16 (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > + > +void > +foo1 (int8_t * addr, int16x8_t value, mve_pred16_t p) { > + vstrbq_p (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..0c6c0a9add4423f88c028c8 > 543fbc93236667226 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (int8_t * addr, int32x4_t value, mve_pred16_t p) { > + vstrbq_p_s32 (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > + > +void > +foo1 (int8_t * addr, int32x4_t value, mve_pred16_t p) { > + vstrbq_p (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..aa8ce9286f85575c65f0bb1 > 56a31e9994fef5d1c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (int8_t * addr, int8x16_t value, mve_pred16_t p) { > + vstrbq_p_s8 (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > + > +void > +foo1 (int8_t * addr, int8x16_t value, mve_pred16_t p) { > + vstrbq_p (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..e9911c1eff66a0b3c411deb > ec9d4a43e42cc665d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint8_t * addr, uint16x8_t value, mve_pred16_t p) { > + vstrbq_p_u16 (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > + > +void > +foo1 (uint8_t * addr, uint16x8_t value, mve_pred16_t p) { > + vstrbq_p (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..39e0cd14e192fa7de53ff80f > 0d7290ec4fcb21fd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint8_t * addr, uint32x4_t value, mve_pred16_t p) { > + vstrbq_p_u32 (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > + > +void > +foo1 (uint8_t * addr, uint32x4_t value, mve_pred16_t p) { > + vstrbq_p (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..809fe38b9ccaaa3e74afdb7f > fef6a5ea6c5d86d4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint8_t * addr, uint8x16_t value, mve_pred16_t p) { > + vstrbq_p_u8 (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > + > +void > +foo1 (uint8_t * addr, uint8x16_t value, mve_pred16_t p) { > + vstrbq_p (addr, value, p); > +} > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s16.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..8813fbe2eb1b6d5d4ba268 > 63598655e3833283a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_ > +++ p_s16.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (int8_t * base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) > +{ > + vstrbq_scatter_offset_p_s16 (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > + > +void > +foo1 (int8_t * base, uint16x8_t offset, int16x8_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..f6116ac76613f43e095422ac > dd3b5e943b8814c3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_ > +++ p_s32.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (int8_t * base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) > +{ > + vstrbq_scatter_offset_p_s32 (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > + > +void > +foo1 (int8_t * base, uint32x4_t offset, int32x4_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s8.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..7fa63efd8cc9398e80a34e13 > c44fa0593b7f8b4f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_ > +++ p_s8.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (int8_t * base, uint8x16_t offset, int8x16_t value, mve_pred16_t p) > +{ > + vstrbq_scatter_offset_p_s8 (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > + > +void > +foo1 (int8_t * base, uint8x16_t offset, int8x16_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u16.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u16.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..3db472aa63a768f5d5558bc > 9fae7fffe90c94c3b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_ > +++ p_u16.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint8_t * base, uint16x8_t offset, uint16x8_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p_u16 (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > + > +void > +foo1 (uint8_t * base, uint16x8_t offset, uint16x8_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.16" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..4055d70bb5fb2c9c3e092c8 > f26d7814830681422 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_ > +++ p_u32.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint8_t * base, uint32x4_t offset, uint32x4_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p_u32 (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > + > +void > +foo1 (uint8_t * base, uint32x4_t offset, uint32x4_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.32" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u8.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..e96f7a228174e4544285f4fa > 25643b0460fa7c7e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_ > +++ p_u8.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint8_t * base, uint8x16_t offset, uint8x16_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p_u8 (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > + > +void > +foo1 (uint8_t * base, uint8x16_t offset, uint8x16_t value, mve_pred16_t > +p) { > + vstrbq_scatter_offset_p (base, offset, value, p); } > + > +/* { dg-final { scan-assembler "vstrbt.8" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_s32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_s32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..0bce9e646e84e751c3868ad > a0d84ae33a9b36fdd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_ > +++ s32.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint32x4_t addr, int32x4_t value, mve_pred16_t p) { > + vstrwq_scatter_base_p_s32 (addr, 8, value, p); } > + > +/* { dg-final { scan-assembler "vstrwt.u32" } } */ > + > +void > +foo1 (uint32x4_t addr, int32x4_t value, mve_pred16_t p) { > + vstrwq_scatter_base_p (addr, 8, value, p); } > + > +/* { dg-final { scan-assembler "vstrwt.u32" } } */ > diff --git > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_u32.c > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_u32.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..59b21d2c7715a75b812b99 > 0b020af154ec73856a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_ > +++ u32.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O2" } */ > + > +#include "arm_mve.h" > + > +void > +foo (uint32x4_t addr, uint32x4_t value, mve_pred16_t p) { > + vstrwq_scatter_base_p_u32 (addr, 8, value, p); } > + > +/* { dg-final { scan-assembler "vstrwt.u32" } } */ > + > +void > +foo1 (uint32x4_t addr, uint32x4_t value, mve_pred16_t p) { > + vstrwq_scatter_base_p (addr, 8, value, p); } > + > +/* { dg-final { scan-assembler "vstrwt.u32" } } */
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index aced55f52d317e8deafdc6a6804db3b80c00fd80..c87fa3118510e4de90ac9afe08608fb2315f4809 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -613,6 +613,41 @@ arm_strsbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define STRSBU_QUALIFIERS (arm_strsbu_qualifiers) static enum arm_type_qualifiers +arm_strs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer, qualifier_none, qualifier_unsigned}; +#define STRS_P_QUALIFIERS (arm_strs_p_qualifiers) + +static enum arm_type_qualifiers +arm_stru_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer, qualifier_unsigned, + qualifier_unsigned}; +#define STRU_P_QUALIFIERS (arm_stru_p_qualifiers) + +static enum arm_type_qualifiers +arm_strsu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer, qualifier_unsigned, + qualifier_unsigned, qualifier_unsigned}; +#define STRSU_P_QUALIFIERS (arm_strsu_p_qualifiers) + +static enum arm_type_qualifiers +arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer, qualifier_unsigned, + qualifier_none, qualifier_unsigned}; +#define STRSS_P_QUALIFIERS (arm_strss_p_qualifiers) + +static enum arm_type_qualifiers +arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_immediate, + qualifier_none, qualifier_unsigned}; +#define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers) + +static enum arm_type_qualifiers +arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned, qualifier_unsigned}; +#define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers) + +static enum arm_type_qualifiers arm_ldrgu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_pointer, qualifier_unsigned}; #define LDRGU_QUALIFIERS (arm_ldrgu_qualifiers) diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index c2c195a9270e976c21d8d0a5941f85dd2bdd9e9d..deed81cfb31b2cf8f830076c5e44098c1abf2310 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -1730,6 +1730,20 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t; #define vldrbq_u32(__base) __arm_vldrbq_u32(__base) #define vldrwq_gather_base_s32(__addr, __offset) __arm_vldrwq_gather_base_s32(__addr, __offset) #define vldrwq_gather_base_u32(__addr, __offset) __arm_vldrwq_gather_base_u32(__addr, __offset) +#define vstrbq_p_s8( __addr, __value, __p) __arm_vstrbq_p_s8( __addr, __value, __p) +#define vstrbq_p_s32( __addr, __value, __p) __arm_vstrbq_p_s32( __addr, __value, __p) +#define vstrbq_p_s16( __addr, __value, __p) __arm_vstrbq_p_s16( __addr, __value, __p) +#define vstrbq_p_u8( __addr, __value, __p) __arm_vstrbq_p_u8( __addr, __value, __p) +#define vstrbq_p_u32( __addr, __value, __p) __arm_vstrbq_p_u32( __addr, __value, __p) +#define vstrbq_p_u16( __addr, __value, __p) __arm_vstrbq_p_u16( __addr, __value, __p) +#define vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p) +#define vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p) +#define vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p) +#define vstrbq_scatter_offset_p_u8( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_u8( __base, __offset, __value, __p) +#define vstrbq_scatter_offset_p_u32( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_u32( __base, __offset, __value, __p) +#define vstrbq_scatter_offset_p_u16( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_u16( __base, __offset, __value, __p) +#define vstrwq_scatter_base_p_s32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p_s32(__addr, __offset, __value, __p) +#define vstrwq_scatter_base_p_u32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p_u32(__addr, __offset, __value, __p) #endif __extension__ extern __inline void @@ -11219,6 +11233,103 @@ __arm_vldrwq_gather_base_u32 (uint32x4_t __addr, const int __offset) return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_p_s8 (int8_t * __addr, int8x16_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_p_sv16qi ((__builtin_neon_qi *) __addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_p_s32 (int8_t * __addr, int32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_p_sv4si ((__builtin_neon_qi *) __addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_p_s16 (int8_t * __addr, int16x8_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_p_sv8hi ((__builtin_neon_qi *) __addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_p_u8 (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_p_uv16qi ((__builtin_neon_qi *) __addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_p_u32 (uint8_t * __addr, uint32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_p_uv4si ((__builtin_neon_qi *) __addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_p_u16 (uint8_t * __addr, uint16x8_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_p_uv8hi ((__builtin_neon_qi *) __addr, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_scatter_offset_p_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_scatter_offset_p_sv16qi ((__builtin_neon_qi *) __base, __offset, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_scatter_offset_p_s32 (int8_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_scatter_offset_p_sv4si ((__builtin_neon_qi *) __base, __offset, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_scatter_offset_p_s16 (int8_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_scatter_offset_p_sv8hi ((__builtin_neon_qi *) __base, __offset, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_scatter_offset_p_u8 (uint8_t * __base, uint8x16_t __offset, uint8x16_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_scatter_offset_p_uv16qi ((__builtin_neon_qi *) __base, __offset, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_scatter_offset_p_u32 (uint8_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_scatter_offset_p_uv4si ((__builtin_neon_qi *) __base, __offset, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrbq_scatter_offset_p_u16 (uint8_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrbq_scatter_offset_p_uv8hi ((__builtin_neon_qi *) __base, __offset, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_p_s32 (uint32x4_t __addr, const int __offset, int32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_p_sv4si (__addr, __offset, __value, __p); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__arm_vstrwq_scatter_base_p_u32 (uint32x4_t __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p) +{ + __builtin_mve_vstrwq_scatter_base_p_uv4si (__addr, __offset, __value, __p); +} #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ __extension__ extern __inline void diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index f6d0fec2fb4795d452ecd5478577d564a384220b..c6e065d52d00e5f6b618e37c0b2df42c94e18f60 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -697,3 +697,9 @@ VAR3 (LDRS, vldrbq_s, v16qi, v8hi, v4si) VAR3 (LDRU, vldrbq_u, v16qi, v8hi, v4si) VAR1 (LDRGBS, vldrwq_gather_base_s, v4si) VAR1 (LDRGBU, vldrwq_gather_base_u, v4si) +VAR3 (STRS_P, vstrbq_p_s, v16qi, v8hi, v4si) +VAR3 (STRU_P, vstrbq_p_u, v16qi, v8hi, v4si) +VAR3 (STRSS_P, vstrbq_scatter_offset_p_s, v16qi, v8hi, v4si) +VAR3 (STRSU_P, vstrbq_scatter_offset_p_u, v16qi, v8hi, v4si) +VAR1 (STRSBS_P, vstrwq_scatter_base_p_s, v4si) +VAR1 (STRSBU_P, vstrwq_scatter_base_p_u, v4si) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index be1b9faee7f94124ecd6f37cfff55003d162aee5..0e8b04f531c679b87d972265ed21ea5ec796e3a2 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -8077,3 +8077,68 @@ return ""; } [(set_attr "length" "4")]) + +;; +;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u] +;; +(define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>" + [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") + (unspec:<MVE_B_ELEM> + [(match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:MVE_2 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRBSOQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vpst\n\tvstrbt.<V_sz_elem>\t%q2, [%m0, %q1]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vstrwq_scatter_base_p_s vstrwq_scatter_base_p_u] +;; +(define_insn "mve_vstrwq_scatter_base_p_<supf>v4si" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:V4SI 0 "s_register_operand" "w") + (match_operand:SI 1 "immediate_operand" "i") + (match_operand:V4SI 2 "s_register_operand" "w") + (match_operand:HI 3 "vpr_register_operand" "Up")] + VSTRWSBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[3]; + ops[0] = operands[0]; + ops[1] = operands[1]; + ops[2] = operands[2]; + output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops); + return ""; +} + [(set_attr "length" "8")]) + +;; +;; [vstrbq_p_s vstrbq_p_u] +;; +(define_insn "mve_vstrbq_p_<supf><mode>" + [(set (match_operand:<MVE_B_ELEM> 0 "memory_operand" "=Us") + (unspec:<MVE_B_ELEM> [(match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:HI 2 "vpr_register_operand" "Up")] + VSTRBQ)) + ] + "TARGET_HAVE_MVE" +{ + rtx ops[2]; + int regno = REGNO (operands[1]); + ops[1] = gen_rtx_REG (TImode, regno); + ops[0] = operands[0]; + output_asm_insn ("vpst\n\tvstrbt.<V_sz_elem>\t%q1, %E0",ops); + return ""; +} + [(set_attr "length" "8")]) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c new file mode 100644 index 0000000000000000000000000000000000000000..704ac270d078338104ad881c2e76ba239b4707d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * addr, int16x8_t value, mve_pred16_t p) +{ + vstrbq_p_s16 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ + +void +foo1 (int8_t * addr, int16x8_t value, mve_pred16_t p) +{ + vstrbq_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..0c6c0a9add4423f88c028c8543fbc93236667226 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * addr, int32x4_t value, mve_pred16_t p) +{ + vstrbq_p_s32 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ + +void +foo1 (int8_t * addr, int32x4_t value, mve_pred16_t p) +{ + vstrbq_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c new file mode 100644 index 0000000000000000000000000000000000000000..aa8ce9286f85575c65f0bb156a31e9994fef5d1c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_s8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * addr, int8x16_t value, mve_pred16_t p) +{ + vstrbq_p_s8 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ + +void +foo1 (int8_t * addr, int8x16_t value, mve_pred16_t p) +{ + vstrbq_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c new file mode 100644 index 0000000000000000000000000000000000000000..e9911c1eff66a0b3c411debec9d4a43e42cc665d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * addr, uint16x8_t value, mve_pred16_t p) +{ + vstrbq_p_u16 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ + +void +foo1 (uint8_t * addr, uint16x8_t value, mve_pred16_t p) +{ + vstrbq_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..39e0cd14e192fa7de53ff80f0d7290ec4fcb21fd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * addr, uint32x4_t value, mve_pred16_t p) +{ + vstrbq_p_u32 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ + +void +foo1 (uint8_t * addr, uint32x4_t value, mve_pred16_t p) +{ + vstrbq_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c new file mode 100644 index 0000000000000000000000000000000000000000..809fe38b9ccaaa3e74afdb7ffef6a5ea6c5d86d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_p_u8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * addr, uint8x16_t value, mve_pred16_t p) +{ + vstrbq_p_u8 (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ + +void +foo1 (uint8_t * addr, uint8x16_t value, mve_pred16_t p) +{ + vstrbq_p (addr, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s16.c new file mode 100644 index 0000000000000000000000000000000000000000..8813fbe2eb1b6d5d4ba26863598655e3833283a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p_s16 (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ + +void +foo1 (int8_t * base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..f6116ac76613f43e095422acdd3b5e943b8814c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p_s32 (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ + +void +foo1 (int8_t * base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s8.c new file mode 100644 index 0000000000000000000000000000000000000000..7fa63efd8cc9398e80a34e13c44fa0593b7f8b4f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_s8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (int8_t * base, uint8x16_t offset, int8x16_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p_s8 (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ + +void +foo1 (int8_t * base, uint8x16_t offset, int8x16_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u16.c new file mode 100644 index 0000000000000000000000000000000000000000..3db472aa63a768f5d5558bc9fae7fffe90c94c3b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u16.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p_u16 (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ + +void +foo1 (uint8_t * base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.16" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..4055d70bb5fb2c9c3e092c8f26d7814830681422 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p_u32 (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ + +void +foo1 (uint8_t * base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u8.c new file mode 100644 index 0000000000000000000000000000000000000000..e96f7a228174e4544285f4fa25643b0460fa7c7e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrbq_scatter_offset_p_u8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint8_t * base, uint8x16_t offset, uint8x16_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p_u8 (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ + +void +foo1 (uint8_t * base, uint8x16_t offset, uint8x16_t value, mve_pred16_t p) +{ + vstrbq_scatter_offset_p (base, offset, value, p); +} + +/* { dg-final { scan-assembler "vstrbt.8" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_s32.c new file mode 100644 index 0000000000000000000000000000000000000000..0bce9e646e84e751c3868ada0d84ae33a9b36fdd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_s32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t addr, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_p_s32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t addr, int32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_u32.c new file mode 100644 index 0000000000000000000000000000000000000000..59b21d2c7715a75b812b990b020af154ec73856a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_p_u32.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" + +void +foo (uint32x4_t addr, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_p_u32 (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */ + +void +foo1 (uint32x4_t addr, uint32x4_t value, mve_pred16_t p) +{ + vstrwq_scatter_base_p (addr, 8, value, p); +} + +/* { dg-final { scan-assembler "vstrwt.u32" } } */