diff mbox series

[07/15] arm: [MVE intrinsics] rework vstr scatter_base

Message ID 20241107091820.2010568-8-christophe.lyon@linaro.org
State New
Headers show
Series arm: [MVE intrinsics] Rework store_scatter and load_gather intrinsics | expand

Commit Message

Christophe Lyon Nov. 7, 2024, 9:18 a.m. UTC
Implement vstr?q_scatter_base using the new MVE builtins framework.

We need to introduce a new iterator (MVE_4) to support the set needed
by vstr?q_scatter_base (V4SI V4SF V2DI).

gcc/ChangeLog:

	* config/arm/arm-builtins.cc (arm_strsbs_qualifiers)
	(arm_strsbu_qualifiers, arm_strsbs_p_qualifiers)
	(arm_strsbu_p_qualifiers): Delete.
	* config/arm/arm-mve-builtins-base.cc (class
	vstrq_scatter_base_impl): New.
	(vstrwq_scatter_base, vstrdq_scatter_base): New.
	* config/arm/arm-mve-builtins-base.def (vstrwq_scatter_base)
	(vstrdq_scatter_base): New.
	* config/arm/arm-mve-builtins-base.h (vstrwq_scatter_base)
	(vstrdq_scatter_base): New.
	* config/arm/arm_mve.h (vstrwq_scatter_base): Delete.
	(vstrwq_scatter_base_p): Delete.
	(vstrdq_scatter_base_p): Delete.
	(vstrdq_scatter_base): Delete.
	(vstrwq_scatter_base_s32): Delete.
	(vstrwq_scatter_base_u32): Delete.
	(vstrwq_scatter_base_p_s32): Delete.
	(vstrwq_scatter_base_p_u32): Delete.
	(vstrdq_scatter_base_p_s64): Delete.
	(vstrdq_scatter_base_p_u64): Delete.
	(vstrdq_scatter_base_s64): Delete.
	(vstrdq_scatter_base_u64): Delete.
	(vstrwq_scatter_base_f32): Delete.
	(vstrwq_scatter_base_p_f32): Delete.
	(__arm_vstrwq_scatter_base_s32): Delete.
	(__arm_vstrwq_scatter_base_u32): Delete.
	(__arm_vstrwq_scatter_base_p_s32): Delete.
	(__arm_vstrwq_scatter_base_p_u32): Delete.
	(__arm_vstrdq_scatter_base_p_s64): Delete.
	(__arm_vstrdq_scatter_base_p_u64): Delete.
	(__arm_vstrdq_scatter_base_s64): Delete.
	(__arm_vstrdq_scatter_base_u64): Delete.
	(__arm_vstrwq_scatter_base_f32): Delete.
	(__arm_vstrwq_scatter_base_p_f32): Delete.
	(__arm_vstrwq_scatter_base): Delete.
	(__arm_vstrwq_scatter_base_p): Delete.
	(__arm_vstrdq_scatter_base_p): Delete.
	(__arm_vstrdq_scatter_base): Delete.
	* config/arm/arm_mve_builtins.def (vstrwq_scatter_base_s)
	(vstrwq_scatter_base_u, vstrwq_scatter_base_p_s)
	(vstrwq_scatter_base_p_u, vstrdq_scatter_base_s)
	(vstrwq_scatter_base_f, vstrdq_scatter_base_p_s)
	(vstrwq_scatter_base_p_f, vstrdq_scatter_base_u)
	(vstrdq_scatter_base_p_u): Delete.
	* config/arm/iterators.md (MVE_4): New.
	(supf): Remove VSTRWQSB_S, VSTRWQSB_U.
	(VSTRWSBQ): Delete.
	* config/arm/mve.md (mve_vstrwq_scatter_base_<supf>v4si): Delete.
	(mve_vstrwq_scatter_base_p_<supf>v4si): Delete.
	(mve_vstrdq_scatter_base_p_<supf>v2di): Delete.
	(mve_vstrdq_scatter_base_<supf>v2di): Delete.
	(mve_vstrwq_scatter_base_fv4sf): Delete.
	(mve_vstrwq_scatter_base_p_fv4sf): Delete.
	(@mve_vstrq_scatter_base_<mode>): New.
	(@mve_vstrq_scatter_base_p_<mode>): New.
	* config/arm/unspecs.md (VSTRWQSB_S, VSTRWQSB_U, VSTRWQSB_F):
	Delete.
	(VSTRSBQ, VSTRSBQ_P): New.
---
 gcc/config/arm/arm-builtins.cc           |  23 ---
 gcc/config/arm/arm-mve-builtins-base.cc  |  38 +++++
 gcc/config/arm/arm-mve-builtins-base.def |   3 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h                 | 196 -----------------------
 gcc/config/arm/arm_mve_builtins.def      |  10 --
 gcc/config/arm/iterators.md              |   5 +-
 gcc/config/arm/mve.md                    | 150 +++--------------
 gcc/config/arm/unspecs.md                |   5 +-
 9 files changed, 72 insertions(+), 360 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index 416b76dc815..15f663e2a0e 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -610,29 +610,6 @@  arm_quadop_unone_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define QUADOP_UNONE_UNONE_UNONE_NONE_PRED_QUALIFIERS \
   (arm_quadop_unone_unone_unone_none_pred_qualifiers)
 
-static enum arm_type_qualifiers
-arm_strsbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate, qualifier_none};
-#define STRSBS_QUALIFIERS (arm_strsbs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate,
-      qualifier_unsigned};
-#define STRSBU_QUALIFIERS (arm_strsbu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate,
-      qualifier_none, qualifier_predicate};
-#define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate,
-      qualifier_unsigned, qualifier_predicate};
-#define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers)
-
 static enum arm_type_qualifiers
 arm_ldrgu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_pointer, qualifier_unsigned};
diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 40ac09af62b..62ad5e2afd0 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -291,6 +291,42 @@  public:
   }
 };
 
+  /* Builds the vstrq_scatter_base intrinsics.  */
+class vstrq_scatter_base_impl : public function_base
+{
+public:
+  CONSTEXPR vstrq_scatter_base_impl (scalar_mode to_int_mode)
+    : m_to_int_mode (to_int_mode)
+  {}
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (e.pred)
+      {
+      case PRED_none:
+	icode = code_for_mve_vstrq_scatter_base (e.vector_mode (0));
+	break;
+
+      case PRED_p:
+	icode = code_for_mve_vstrq_scatter_base_p (e.vector_mode (0));
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+    return e.use_exact_insn (icode);
+  }
+
+  /* The mode of a single memory element.  */
+  scalar_mode m_to_int_mode;
+};
+
 /* Builds the vldrq* intrinsics.  */
 class vldrq_impl : public load_extending
 {
@@ -1261,12 +1297,14 @@  FUNCTION (vst1q, vst1_impl,)
 FUNCTION (vstrbq, vstrq_impl, (QImode, opt_scalar_mode ()))
 FUNCTION (vstrbq_scatter, vstrq_scatter_impl, (false, QImode, opt_scalar_mode ()))
 FUNCTION (vstrdq_scatter, vstrq_scatter_impl, (false, DImode, opt_scalar_mode ()))
+FUNCTION (vstrdq_scatter_base, vstrq_scatter_base_impl, (DImode))
 FUNCTION (vstrdq_scatter_shifted, vstrq_scatter_impl, (true, DImode, opt_scalar_mode ()))
 FUNCTION (vstrhq, vstrq_impl, (HImode, HFmode))
 FUNCTION (vstrhq_scatter, vstrq_scatter_impl, (false, HImode, HFmode))
 FUNCTION (vstrhq_scatter_shifted, vstrq_scatter_impl, (true, HImode, HFmode))
 FUNCTION (vstrwq, vstrq_impl, (SImode, SFmode))
 FUNCTION (vstrwq_scatter, vstrq_scatter_impl, (false, SImode, SFmode))
+FUNCTION (vstrwq_scatter_base, vstrq_scatter_base_impl, (SImode))
 FUNCTION (vstrwq_scatter_shifted, vstrq_scatter_impl, (true, SImode, SFmode))
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index d4c28be904b..a56fae7414e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -177,8 +177,10 @@  DEF_MVE_FUNCTION (vstrhq_scatter, store_scatter_offset, integer_16_32, p_or_none
 DEF_MVE_FUNCTION (vstrhq_scatter_shifted, store_scatter_offset, integer_16_32, p_or_none)
 DEF_MVE_FUNCTION (vstrwq, store, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vstrwq_scatter, store_scatter_offset, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter_base, store_scatter_base, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vstrwq_scatter_shifted, store_scatter_offset, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vstrdq_scatter, store_scatter_offset, integer_64, p_or_none)
+DEF_MVE_FUNCTION (vstrdq_scatter_base, store_scatter_base, integer_64, p_or_none)
 DEF_MVE_FUNCTION (vstrdq_scatter_shifted, store_scatter_offset, integer_64, p_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
@@ -253,6 +255,7 @@  DEF_MVE_FUNCTION (vstrhq_scatter, store_scatter_offset, float_16, p_or_none)
 DEF_MVE_FUNCTION (vstrhq_scatter_shifted, store_scatter_offset, float_16, p_or_none)
 DEF_MVE_FUNCTION (vstrwq, store, float_32, p_or_none)
 DEF_MVE_FUNCTION (vstrwq_scatter, store_scatter_offset, float_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter_base, store_scatter_base, float_32, p_or_none)
 DEF_MVE_FUNCTION (vstrwq_scatter_shifted, store_scatter_offset, float_32, p_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_float, none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 096e707bb91..261248086dc 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -208,12 +208,14 @@  extern const function_base *const vst1q;
 extern const function_base *const vstrbq;
 extern const function_base *const vstrbq_scatter;
 extern const function_base *const vstrdq_scatter;
+extern const function_base *const vstrdq_scatter_base;
 extern const function_base *const vstrdq_scatter_shifted;
 extern const function_base *const vstrhq;
 extern const function_base *const vstrhq_scatter;
 extern const function_base *const vstrhq_scatter_shifted;
 extern const function_base *const vstrwq;
 extern const function_base *const vstrwq_scatter;
+extern const function_base *const vstrwq_scatter_base;
 extern const function_base *const vstrwq_scatter_shifted;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index d39ea977186..ba158a41a68 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -42,9 +42,7 @@ 
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
 #define vldrbq_gather_offset(__base, __offset) __arm_vldrbq_gather_offset(__base, __offset)
-#define vstrwq_scatter_base_p(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p(__addr, __offset, __value, __p)
 #define vldrbq_gather_offset_z(__base, __offset, __p) __arm_vldrbq_gather_offset_z(__base, __offset, __p)
 #define vldrhq_gather_offset(__base, __offset) __arm_vldrhq_gather_offset(__base, __offset)
 #define vldrhq_gather_offset_z(__base, __offset, __p) __arm_vldrhq_gather_offset_z(__base, __offset, __p)
@@ -58,8 +56,6 @@ 
 #define vldrwq_gather_offset_z(__base, __offset, __p) __arm_vldrwq_gather_offset_z(__base, __offset, __p)
 #define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset)
 #define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p)
-#define vstrdq_scatter_base_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base(__addr, __offset, __value) __arm_vstrdq_scatter_base(__addr, __offset, __value)
 #define vuninitializedq(__v) __arm_vuninitializedq(__v)
 #define vstrdq_scatter_base_wb(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb(__addr, __offset, __value)
 #define vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p)
@@ -81,8 +77,6 @@ 
 #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
 #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
 #define vpnot(__a) __arm_vpnot(__a)
-#define vstrwq_scatter_base_s32(__addr,  __offset, __value) __arm_vstrwq_scatter_base_s32(__addr,  __offset, __value)
-#define vstrwq_scatter_base_u32(__addr,  __offset, __value) __arm_vstrwq_scatter_base_u32(__addr,  __offset, __value)
 #define vldrbq_gather_offset_u8(__base, __offset) __arm_vldrbq_gather_offset_u8(__base, __offset)
 #define vldrbq_gather_offset_s8(__base, __offset) __arm_vldrbq_gather_offset_s8(__base, __offset)
 #define vldrbq_gather_offset_u16(__base, __offset) __arm_vldrbq_gather_offset_u16(__base, __offset)
@@ -91,8 +85,6 @@ 
 #define vldrbq_gather_offset_s32(__base, __offset) __arm_vldrbq_gather_offset_s32(__base, __offset)
 #define vldrwq_gather_base_s32(__addr,  __offset) __arm_vldrwq_gather_base_s32(__addr,  __offset)
 #define vldrwq_gather_base_u32(__addr,  __offset) __arm_vldrwq_gather_base_u32(__addr,  __offset)
-#define vstrwq_scatter_base_p_s32(__addr,  __offset, __value, __p) __arm_vstrwq_scatter_base_p_s32(__addr,  __offset, __value, __p)
-#define vstrwq_scatter_base_p_u32(__addr,  __offset, __value, __p) __arm_vstrwq_scatter_base_p_u32(__addr,  __offset, __value, __p)
 #define vldrbq_gather_offset_z_s16(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s16(__base, __offset, __p)
 #define vldrbq_gather_offset_z_u8(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u8(__base, __offset, __p)
 #define vldrbq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s32(__base, __offset, __p)
@@ -147,12 +139,6 @@ 
 #define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p)
 #define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p)
 #define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p)
-#define vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_s64(__addr, __offset, __value)
-#define vstrdq_scatter_base_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_u64(__addr, __offset, __value)
-#define vstrwq_scatter_base_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_f32(__addr, __offset, __value)
-#define vstrwq_scatter_base_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p_f32(__addr, __offset, __value, __p)
 #define vuninitializedq_u8(void) __arm_vuninitializedq_u8(void)
 #define vuninitializedq_u16(void) __arm_vuninitializedq_u16(void)
 #define vuninitializedq_u32(void) __arm_vuninitializedq_u32(void)
@@ -318,20 +304,6 @@  __arm_vpnot (mve_pred16_t __a)
   return __builtin_mve_vpnotv16bi (__a);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_s32 (uint32x4_t __addr, const int __offset, int32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_base_sv4si (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_u32 (uint32x4_t __addr, const int __offset, uint32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_base_uv4si (__addr, __offset, __value);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrbq_gather_offset_u8 (uint8_t const * __base, uint8x16_t __offset)
@@ -388,20 +360,6 @@  __arm_vldrwq_gather_base_u32 (uint32x4_t __addr, const int __offset)
   return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p_s32 (uint32x4_t __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_base_p_sv4si (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p_u32 (uint32x4_t __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_base_p_uv4si (__addr, __offset, __value, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrbq_gather_offset_z_s8 (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
@@ -711,34 +669,6 @@  __arm_vldrwq_gather_shifted_offset_z_u32 (uint32_t const * __base, uint32x4_t __
   return __builtin_mve_vldrwq_gather_shifted_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p_s64 (uint64x2_t __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_base_p_sv2di (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p_u64 (uint64x2_t __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_base_p_uv2di (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_s64 (uint64x2_t __addr, const int __offset, int64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_base_sv2di (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_u64 (uint64x2_t __addr, const int __offset, uint64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_base_uv2di (__addr, __offset, __value);
-}
-
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset)
@@ -1375,20 +1305,6 @@  __arm_vldrwq_gather_shifted_offset_z_f32 (float32_t const * __base, uint32x4_t _
   return __builtin_mve_vldrwq_gather_shifted_offset_z_fv4sf ((__builtin_neon_si *) __base, __offset, __p);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_f32 (uint32x4_t __addr, const int __offset, float32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_base_fv4sf (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p_f32 (uint32x4_t __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_base_p_fv4sf (__addr, __offset, __value, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset)
@@ -1555,20 +1471,6 @@  __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
  __arm_vst4q_u32 (__addr, __value);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, int32x4_t __value)
-{
- __arm_vstrwq_scatter_base_s32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, uint32x4_t __value)
-{
- __arm_vstrwq_scatter_base_u32 (__addr, __offset, __value);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrbq_gather_offset (uint8_t const * __base, uint8x16_t __offset)
@@ -1611,20 +1513,6 @@  __arm_vldrbq_gather_offset (int8_t const * __base, uint32x4_t __offset)
  return __arm_vldrbq_gather_offset_s32 (__base, __offset);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p (uint32x4_t __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_p_s32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p (uint32x4_t __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_p_u32 (__addr, __offset, __value, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrbq_gather_offset_z (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
@@ -1891,34 +1779,6 @@  __arm_vldrwq_gather_shifted_offset_z (uint32_t const * __base, uint32x4_t __offs
  return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p (uint64x2_t __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_p_s64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p (uint64x2_t __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_p_u64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base (uint64x2_t __addr, const int __offset, int64x2_t __value)
-{
- __arm_vstrdq_scatter_base_s64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base (uint64x2_t __addr, const int __offset, uint64x2_t __value)
-{
- __arm_vstrdq_scatter_base_u64 (__addr, __offset, __value);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, int64x2_t __value)
@@ -2285,20 +2145,6 @@  __arm_vldrwq_gather_shifted_offset_z (float32_t const * __base, uint32x4_t __off
  return __arm_vldrwq_gather_shifted_offset_z_f32 (__base, __offset, __p);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, float32x4_t __value)
-{
- __arm_vstrwq_scatter_base_f32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p (uint32x4_t __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_p_f32 (__addr, __offset, __value, __p);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, float32x4_t __value)
@@ -2795,18 +2641,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x2_t]: __arm_vst2q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x2_t)), \
   int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x2_t]: __arm_vst2q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x2_t)));})
 
-#define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32(p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_p_f32(p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
 #define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \
@@ -2880,11 +2714,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
 
-#define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_u32(p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
 #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
@@ -2894,11 +2723,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
@@ -2957,16 +2781,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x2_t]: __arm_vst2q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x2_t)));})
 
 
-#define __arm_vstrdq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
 #define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \
@@ -3065,16 +2879,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
-#define __arm_vstrdq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
 #endif /* __cplusplus  */
 #endif /* __ARM_FEATURE_MVE  */
 #endif /* _GCC_ARM_MVE_H.  */
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index dc0618d5692..d83ce6aa3c6 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -663,14 +663,10 @@  VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf)
-VAR1 (STRSBS, vstrwq_scatter_base_s, v4si)
-VAR1 (STRSBU, vstrwq_scatter_base_u, v4si)
 VAR3 (LDRGU, vldrbq_gather_offset_u, v16qi, v8hi, v4si)
 VAR3 (LDRGS, vldrbq_gather_offset_s, v16qi, v8hi, v4si)
 VAR1 (LDRGBS, vldrwq_gather_base_s, v4si)
 VAR1 (LDRGBU, vldrwq_gather_base_u, v4si)
-VAR1 (STRSBS_P, vstrwq_scatter_base_p_s, v4si)
-VAR1 (STRSBU_P, vstrwq_scatter_base_p_u, v4si)
 VAR1 (LDRGBS_Z, vldrwq_gather_base_z_s, v4si)
 VAR1 (LDRGBU_Z, vldrwq_gather_base_z_u, v4si)
 VAR3 (LDRGS_Z, vldrbq_gather_offset_z_s, v16qi, v8hi, v4si)
@@ -713,12 +709,6 @@  VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di)
 VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di)
 VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si)
 VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si)
-VAR1 (STRSBS, vstrdq_scatter_base_s, v2di)
-VAR1 (STRSBS, vstrwq_scatter_base_f, v4sf)
-VAR1 (STRSBS_P, vstrdq_scatter_base_p_s, v2di)
-VAR1 (STRSBS_P, vstrwq_scatter_base_p_f, v4sf)
-VAR1 (STRSBU, vstrdq_scatter_base_u, v2di)
-VAR1 (STRSBU_P, vstrdq_scatter_base_p_u, v2di)
 VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
 VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
 VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 814f25cb6d3..0482f1f8dd1 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -277,8 +277,9 @@  (define_mode_iterator MVE_VLD_ST_scatter [V16QI V8HI V4SI V8HF V4SF V2DI])
 (define_mode_iterator MVE_VLD_ST_scatter_shifted [V8HI V4SI V8HF V4SF V2DI])
 (define_mode_iterator MVE_0 [V8HF V4SF])
 (define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI])
-(define_mode_iterator MVE_3 [V16QI V8HI])
 (define_mode_iterator MVE_2 [V16QI V8HI V4SI])
+(define_mode_iterator MVE_3 [V16QI V8HI])
+(define_mode_iterator MVE_4 [V4SI V4SF V2DI])
 (define_mode_iterator MVE_5 [V8HI V4SI])
 (define_mode_iterator MVE_7 [V16BI V8BI V4BI V2QI])
 (define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI V2QI])
@@ -2526,7 +2527,6 @@  (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
 		       (VMLALDAVAXQ_P_S "s")
 		       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
-		       (VSTRWQSB_S "s") (VSTRWQSB_U "u")
 		       (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRWQGB_S "s")
 		       (VLDRWQGB_U "u") (VLDRHQGO_S "s")
 		       (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
@@ -2936,7 +2936,6 @@  (define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S])
 (define_int_iterator VSHLLxQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S])
 (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U])
 (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
-(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U])
 (define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
 (define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
 (define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index ea85804e739..a82816e84ee 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -3396,28 +3396,24 @@  (define_insn "@mve_vstrq_truncate_scatter_offset_p_<mode>"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_offset_<mode>"))
   (set_attr "length" "8")])
 
-;
-;
+;; Vector scatter stores with base
+;;
+;; [vstrdq_scatter_base_s vstrdq_scatter_base_u]
 ;; [vstrwq_scatter_base_s vstrwq_scatter_base_u]
+;; [vstrwq_scatter_base_f]
 ;;
-(define_insn "mve_vstrwq_scatter_base_<supf>v4si"
+(define_insn "@mve_vstrq_scatter_base_<mode>"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
+		[(match_operand:<MVE_scatter_offset> 0 "s_register_operand" "w")
 		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VSTRWSBQ))
+		 (match_operand:MVE_4 2 "s_register_operand" "w")]
+	 VSTRSBQ))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn("vstrw.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vstr<MVE_elem_ch>.u<V_sz_elem>\t%q2, [%q0, %1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>"))
   (set_attr "length" "4")])
 
 ;;
@@ -3464,28 +3460,26 @@  (define_insn "mve_vldrwq_gather_base_<supf>v4si"
 }
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
   (set_attr "length" "4")])
+
+;; Predicated vector scatter stores with base
 ;;
+;; [vstrdq_scatter_base_p_s vstrdq_scatter_base_p_u]
 ;; [vstrwq_scatter_base_p_s vstrwq_scatter_base_p_u]
+;; [vstrwq_scatter_base_p_f]
 ;;
-(define_insn "mve_vstrwq_scatter_base_p_<supf>v4si"
+(define_insn "@mve_vstrq_scatter_base_p_<mode>"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
+		[(match_operand:<MVE_scatter_offset> 0 "s_register_operand" "w")
 		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SI 2 "s_register_operand" "w")
-		 (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VSTRWSBQ))
+		 (match_operand:MVE_4 2 "s_register_operand" "w")
+		 (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
+	 VSTRSBQ_P))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\n\tvstr<MVE_elem_ch>t.u<V_sz_elem>\t%q2, [%q0, %1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>"))
   (set_attr "length" "8")])
 
 ;;
@@ -4147,100 +4141,6 @@  (define_insn "mve_vstrq_truncate_scatter_shifted_offset_p_v4si"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_v4si"))
   (set_attr "length" "8")])
 
-;;
-;; [vstrdq_scatter_base_p_s vstrdq_scatter_base_p_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_p_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 2 "s_register_operand" "w")
-		 (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	 VSTRDSBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrdq_scatter_base_s vstrdq_scatter_base_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 0 "s_register_operand" "=w")
-		 (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 2 "s_register_operand" "w")]
-	 VSTRDSBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_scatter_base_f]
-;;
-(define_insn "mve_vstrwq_scatter_base_fv4sf"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SF 2 "s_register_operand" "w")]
-	 VSTRWQSB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_scatter_base_p_f]
-;;
-(define_insn "mve_vstrwq_scatter_base_p_fv4sf"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SF 2 "s_register_operand" "w")
-		 (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VSTRWQSB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
-  (set_attr "length" "8")])
-
 ;;
 ;;
 ;; [vddupq_u_insn, vidupq_u_insn]
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 11d85273b56..2116a17f1a1 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -1146,8 +1146,8 @@  (define_c_enum "unspec" [
   VMAXNMQ_M_F
   VMINNMQ_M_F
   VSUBQ_M_F
-  VSTRWQSB_S
-  VSTRWQSB_U
+  VSTRSBQ
+  VSTRSBQ_P
   VSTRQSO
   VSTRQSO_P
   VSTRQSO_TRUNC
@@ -1189,7 +1189,6 @@  (define_c_enum "unspec" [
   VSTRSSOQ_P
   VSTRSSOQ_TRUNC
   VSTRSSOQ_TRUNC_P
-  VSTRWQSB_F
   VDDUPQ
   VDDUPQ_M
   VDWDUPQ