diff mbox series

[08/15] arm: [MVE intrinsics] rework vstr scatter_base_wb

Message ID 20241107091820.2010568-9-christophe.lyon@linaro.org
State New
Headers show
Series arm: [MVE intrinsics] Rework store_scatter and load_gather intrinsics | expand

Commit Message

Christophe Lyon Nov. 7, 2024, 9:18 a.m. UTC
Implement vstr?q_scatter_base_wb using the new MVE builtins framework.

The patch introduces a new 'b' type for signatures, which
represents the type of the 'base' argument of vstr?q_scatter_base_wb.

gcc/ChangeLog:

	* config/arm/arm-builtins.cc (arm_strsbwbs_qualifiers)
	(arm_strsbwbu_qualifiers, arm_strsbwbs_p_qualifiers)
	(arm_strsbwbu_p_qualifiers): Delete.
	* config/arm/arm-mve-builtins-base.cc (vstrq_scatter_base_impl):
	Add support for MODE_wb.
	* config/arm/arm-mve-builtins-shapes.cc (parse_type): Add support
	for 'b' type.
	(store_scatter_base): Add support for MODE_wb.
	* config/arm/arm-mve-builtins.cc
	(function_resolver::require_pointer_to_type): New.
	* config/arm/arm-mve-builtins.h
	(function_resolver::require_pointer_to_type): New.
	* config/arm/arm_mve.h (vstrdq_scatter_base_wb): Delete.
	(vstrdq_scatter_base_wb_p): Delete.
	(vstrwq_scatter_base_wb_p): Delete.
	(vstrwq_scatter_base_wb): Delete.
	(vstrdq_scatter_base_wb_p_s64): Delete.
	(vstrdq_scatter_base_wb_p_u64): Delete.
	(vstrdq_scatter_base_wb_s64): Delete.
	(vstrdq_scatter_base_wb_u64): Delete.
	(vstrwq_scatter_base_wb_p_s32): Delete.
	(vstrwq_scatter_base_wb_p_f32): Delete.
	(vstrwq_scatter_base_wb_p_u32): Delete.
	(vstrwq_scatter_base_wb_s32): Delete.
	(vstrwq_scatter_base_wb_u32): Delete.
	(vstrwq_scatter_base_wb_f32): Delete.
	(__arm_vstrdq_scatter_base_wb_s64): Delete.
	(__arm_vstrdq_scatter_base_wb_u64): Delete.
	(__arm_vstrdq_scatter_base_wb_p_s64): Delete.
	(__arm_vstrdq_scatter_base_wb_p_u64): Delete.
	(__arm_vstrwq_scatter_base_wb_p_s32): Delete.
	(__arm_vstrwq_scatter_base_wb_p_u32): Delete.
	(__arm_vstrwq_scatter_base_wb_s32): Delete.
	(__arm_vstrwq_scatter_base_wb_u32): Delete.
	(__arm_vstrwq_scatter_base_wb_f32): Delete.
	(__arm_vstrwq_scatter_base_wb_p_f32): Delete.
	(__arm_vstrdq_scatter_base_wb): Delete.
	(__arm_vstrdq_scatter_base_wb_p): Delete.
	(__arm_vstrwq_scatter_base_wb_p): Delete.
	(__arm_vstrwq_scatter_base_wb): Delete.
	* config/arm/arm_mve_builtins.def (vstrwq_scatter_base_wb_u)
	(vstrdq_scatter_base_wb_u, vstrwq_scatter_base_wb_p_u)
	(vstrdq_scatter_base_wb_p_u, vstrwq_scatter_base_wb_s)
	(vstrwq_scatter_base_wb_f, vstrdq_scatter_base_wb_s)
	(vstrwq_scatter_base_wb_p_s, vstrwq_scatter_base_wb_p_f)
	(vstrdq_scatter_base_wb_p_s): Delete.
	* config/arm/iterators.md (supf): Remove VSTRWQSBWB_S,
	VSTRWQSBWB_U, VSTRDQSBWB_S, VSTRDQSBWB_U.
	(VSTRDSBQ, VSTRWSBWBQ, VSTRDSBWBQ): Delete.
	* config/arm/mve.md (mve_vstrwq_scatter_base_wb_<supf>v4si): Delete.
	(mve_vstrwq_scatter_base_wb_p_<supf>v4si): Delete.
	(mve_vstrwq_scatter_base_wb_fv4sf): Delete.
	(mve_vstrwq_scatter_base_wb_p_fv4sf): Delete.
	(mve_vstrdq_scatter_base_wb_<supf>v2di): Delete.
	(mve_vstrdq_scatter_base_wb_p_<supf>v2di): Delete.
	(@mve_vstrq_scatter_base_wb_<mode>): New.
	(@mve_vstrq_scatter_base_wb_p_<mode>): New.
	* config/arm/unspecs.md (VSTRWQSBWB_S, VSTRWQSBWB_U, VSTRWQSBWB_F)
	(VSTRDQSBWB_S, VSTRDQSBWB_U): Delete.
	(VSTRSBWBQ, VSTRSBWBQ_P): New.
---
 gcc/config/arm/arm-builtins.cc            |  22 ---
 gcc/config/arm/arm-mve-builtins-base.cc   |  42 ++++-
 gcc/config/arm/arm-mve-builtins-shapes.cc |  36 ++++-
 gcc/config/arm/arm-mve-builtins.cc        |  25 +++
 gcc/config/arm/arm-mve-builtins.h         |   1 +
 gcc/config/arm/arm_mve.h                  | 187 ----------------------
 gcc/config/arm/arm_mve_builtins.def       |  10 --
 gcc/config/arm/iterators.md               |   8 +-
 gcc/config/arm/mve.md                     | 168 ++++---------------
 gcc/config/arm/unspecs.md                 |   7 +-
 10 files changed, 128 insertions(+), 378 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index 15f663e2a0e..72f63b16959 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -687,28 +687,6 @@  arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_predicate};
 #define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers)
 
-static enum arm_type_qualifiers
-arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_none};
-#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_unsigned};
-#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
-      qualifier_none, qualifier_predicate};
-#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
-      qualifier_unsigned, qualifier_predicate};
-#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
-
 static enum arm_type_qualifiers
 arm_lsll_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_none};
diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 62ad5e2afd0..27e31d6c8cd 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -304,23 +304,59 @@  public:
     return CP_WRITE_MEMORY;
   }
 
+  machine_mode memory_vector_mode (const function_instance &fi) const override
+  {
+    poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
+    return arm_mve_data_mode (m_to_int_mode, nunits).require ();
+  }
+
   rtx expand (function_expander &e) const override
   {
     insn_code icode;
+    rtx insns, base_ptr, new_base;
+    machine_mode base_mode;
+
+    if ((e.mode_suffix_id != MODE_none)
+	&& (e.mode_suffix_id != MODE_wb))
+      gcc_unreachable ();
+
+    /* In _wb mode, the start offset is passed via a pointer,
+       dereference it.  */
+    if (e.mode_suffix_id == MODE_wb)
+      {
+	base_mode = e.memory_vector_mode ();
+	rtx base = gen_reg_rtx (base_mode);
+	base_ptr = e.args[0];
+	emit_insn (gen_rtx_SET (base, gen_rtx_MEM (base_mode, base_ptr)));
+	e.args[0] = base;
+	new_base = gen_reg_rtx (base_mode);
+	e.args.quick_insert (0, new_base);
+      }
+
     switch (e.pred)
       {
       case PRED_none:
-	icode = code_for_mve_vstrq_scatter_base (e.vector_mode (0));
+	icode = (e.mode_suffix_id == MODE_none)
+	  ? code_for_mve_vstrq_scatter_base (e.vector_mode (0))
+	  : code_for_mve_vstrq_scatter_base_wb (e.vector_mode (0));
 	break;
 
       case PRED_p:
-	icode = code_for_mve_vstrq_scatter_base_p (e.vector_mode (0));
+	icode = (e.mode_suffix_id == MODE_none)
+	  ? code_for_mve_vstrq_scatter_base_p (e.vector_mode (0))
+	  : code_for_mve_vstrq_scatter_base_wb_p (e.vector_mode (0));
 	break;
 
       default:
 	gcc_unreachable ();
       }
-    return e.use_exact_insn (icode);
+    insns = e.use_exact_insn (icode);
+
+    /* Update offset as appropriate.  */
+    if (e.mode_suffix_id == MODE_wb)
+      emit_insn (gen_rtx_SET (gen_rtx_MEM (base_mode, base_ptr), new_base));
+
+    return insns;
   }
 
   /* The mode of a single memory element.  */
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 64d4ba5d74e..03714ffb435 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -150,6 +150,7 @@  parse_element_type (const function_instance &instance, const char *&format)
    _       - void
    al      - array pointer for loads
    as      - array pointer for stores
+   b       - pointer to vector of unsigned, width given by the first type suffix
    p       - predicates with type mve_pred16_t
    s<elt>  - a scalar type with the given element suffix
    t<elt>  - a vector or tuple type with given element suffix [*1]
@@ -181,6 +182,15 @@  parse_type (const function_instance &instance, const char *&format)
       gcc_unreachable ();
     }
 
+  if (ch == 'b')
+    {
+      type_class_index tclass = TYPE_unsigned;
+      unsigned int bits = instance.type_suffix (0).element_bits;
+      type_suffix_index suffix = find_type_suffix (tclass, bits);
+      tree acle_type = acle_vector_types[0][type_suffixes[suffix].vector_type];
+      return build_pointer_type (acle_type);
+    }
+
   if (ch == 'p')
     return get_mve_pred16_t ();
 
@@ -1684,7 +1694,8 @@  SHAPE (store_scatter_offset)
 
    Example: vstrbq_scatter_base
    void [__arm_]vstrwq_scatter_base[_s32](uint32x4_t addr, const int offset, int32x4_t value)
-   void [__arm_]vstrwq_scatter_base_p[_s32](uint32x4_t addr, const int offset, int32x4_t value, mve_pred16_t p)  */
+   void [__arm_]vstrwq_scatter_base_p[_s32](uint32x4_t addr, const int offset, int32x4_t value, mve_pred16_t p)
+   void [__arm_]vstrdq_scatter_base_wb[_s64](uint64x2_t *addr, const int offset, int64x2_t value)  */
 struct store_scatter_base_def : public store_scatter
 {
   void
@@ -1692,12 +1703,17 @@  struct store_scatter_base_def : public store_scatter
 	 bool preserve_user_namespace) const override
   {
     b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    b.add_overloaded_functions (group, MODE_wb, preserve_user_namespace);
     build_all (b, "_,vu0,ss64,v0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "_,b,ss64,v0", group, MODE_wb, preserve_user_namespace);
   }
 
   tree
   resolve (function_resolver &r) const override
   {
+    gcc_assert ((r.mode_suffix_id == MODE_none)
+		|| (r.mode_suffix_id == MODE_wb));
+
     unsigned int i, nargs;
     type_suffix_index type;
     if (!r.check_gp_argument (3, i, nargs)
@@ -1708,10 +1724,20 @@  struct store_scatter_base_def : public store_scatter
     type_suffix_index base_type
       = find_type_suffix (TYPE_unsigned, type_suffixes[type].element_bits);
 
-    /* Base (arg 0) should be a vector of unsigned with same width as value
-       (arg 2).  */
-    if (!r.require_matching_vector_type (0, base_type))
-      return error_mark_node;
+    if (r.mode_suffix_id == MODE_none)
+      {
+	/* Base (arg 0) should be a vector of unsigned with same width as value
+	   (arg 2).  */
+	if (!r.require_matching_vector_type (0, base_type))
+	  return error_mark_node;
+      }
+    else
+      {
+	/* Base (arg 0) should be a pointer to a vector of unsigned with the
+	   same width as value (arg 2).  */
+	if (!r.require_pointer_to_type (0, r.get_vector_type (base_type)))
+	  return error_mark_node;
+      }
 
     return r.resolve_to (r.mode_suffix_id, type);
   }
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 3b280228e66..3982d20058b 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -1655,6 +1655,31 @@  function_resolver::require_pointer_type (unsigned int argno)
   return true;
 }
 
+/* Require argument ARGNO to be a pointer to EXPECTED type.  Return true on
+   success, otherwise report an error and return false.  */
+bool
+function_resolver::require_pointer_to_type (unsigned int argno, tree expected)
+{
+  tree actual = get_argument_type (argno);
+  if (TREE_CODE (actual) != POINTER_TYPE)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a pointer type", actual, argno + 1, fndecl);
+      return false;
+    }
+
+  tree target = TREE_TYPE (actual);
+  if (target != expected)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a pointer to %qT", actual, argno + 1, fndecl,
+		expected);
+      return false;
+    }
+
+  return true;
+}
+
 /* Require the function to have exactly EXPECTED arguments.  Return true
    if it does, otherwise report an appropriate error.  */
 bool
diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h
index 5a191b0cde3..c6a929c3eee 100644
--- a/gcc/config/arm/arm-mve-builtins.h
+++ b/gcc/config/arm/arm-mve-builtins.h
@@ -398,6 +398,7 @@  public:
 				    unsigned int = SAME_SIZE);
   bool require_scalar_type (unsigned int, const char *);
   bool require_pointer_type (unsigned int);
+  bool require_pointer_to_type (unsigned int, tree);
   bool require_integer_immediate (unsigned int);
   bool require_derived_scalar_type (unsigned int, type_class_index,
 				    unsigned int = SAME_SIZE);
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index ba158a41a68..407907679bc 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -57,10 +57,6 @@ 
 #define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset)
 #define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p)
 #define vuninitializedq(__v) __arm_vuninitializedq(__v)
-#define vstrdq_scatter_base_wb(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb(__addr, __offset, __value)
-#define vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb(__addr, __offset, __value)
 #define vst2q(__addr, __value) __arm_vst2q(__addr, __value)
 #define vld2q(__addr) __arm_vld2q(__addr)
 #define vld4q(__addr) __arm_vld4q(__addr)
@@ -159,16 +155,6 @@ 
 #define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p)
 #define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p)
 #define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p)
-#define vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_wb_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_s64(__addr, __offset, __value)
-#define vstrdq_scatter_base_wb_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_u64(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_s32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_s32(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_u32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_u32(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_f32(__addr, __offset, __value)
 #define vst2q_s8(__addr, __value) __arm_vst2q_s8(__addr, __value)
 #define vst2q_u8(__addr, __value) __arm_vst2q_u8(__addr, __value)
 #define vld2q_s8(__addr) __arm_vld2q_s8(__addr)
@@ -749,62 +735,6 @@  __arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_
   return result;
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value)
-{
-  *__addr = __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
-{
-  *__addr = __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- *__addr =  __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vst2q_s8 (int8_t * __addr, int8x16x2_t __value)
@@ -1325,20 +1255,6 @@  __arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_
   return result;
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
-}
-
 __extension__ extern __inline float16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vld4q_f16 (float16_t const * __addr)
@@ -1779,62 +1695,6 @@  __arm_vldrwq_gather_shifted_offset_z (uint32_t const * __base, uint32x4_t __offs
  return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, int64x2_t __value)
-{
- __arm_vstrdq_scatter_base_wb_s64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
-{
- __arm_vstrdq_scatter_base_wb_u64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_wb_p_s64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_wb_p_u64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_s32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_u32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, int32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_s32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_u32 (__addr, __offset, __value);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vst2q (int8_t * __addr, int8x16x2_t __value)
@@ -2145,20 +2005,6 @@  __arm_vldrwq_gather_shifted_offset_z (float32_t const * __base, uint32x4_t __off
  return __arm_vldrwq_gather_shifted_offset_z_f32 (__base, __offset, __p);
 }
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, float32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_f32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_f32 (__addr, __offset, __value, __p);
-}
-
 __extension__ extern __inline float16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vld4q (float16_t const * __addr)
@@ -2654,18 +2500,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vuninitializedq_f16 (), \
   int (*)[__ARM_mve_type_float32x4_t]: __arm_vuninitializedq_f32 ());})
 
-#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
 #define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -2695,16 +2529,6 @@  extern void *__ARM_undef;
 
 #else /* MVE Integer.  */
 
-#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vst4q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x4_t]: __arm_vst4q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x4_t)), \
@@ -2834,17 +2658,6 @@  extern void *__ARM_undef;
 
 #endif /* MVE Integer.  */
 
-
-#define __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
 #define __arm_vldrdq_gather_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
   int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \
   int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1)))
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index d83ce6aa3c6..07f5a59b248 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -709,16 +709,6 @@  VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di)
 VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di)
 VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si)
 VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
-VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
-VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di)
-VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si)
-VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf)
-VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di)
-VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_s, v4si)
-VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_f, v4sf)
-VAR1 (STRSBWBS_P, vstrdq_scatter_base_wb_p_s, v2di)
 VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si)
 VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di)
 VAR1 (LDRGBWBU, vldrwq_gather_base_nowb_u, v4si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 0482f1f8dd1..155e9ef6368 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -2535,10 +2535,9 @@  (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
 		       (VLDRWQGSO_S "s") (VLDRWQGSO_U "u")
 		       (VSTRDQSB_S "s") (VSTRDQSB_U "u")
-		       (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u")
 		       (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
-		       (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s")
-		       (VSTRDQSBWB_U "u") (VSBCQ_U "u")  (VSBCQ_M_U "u")
+		       (VLDRDQGBWB_U "u") (VADCQ_M_S "s")
+		       (VSBCQ_U "u")  (VSBCQ_M_U "u")
 		       (VSBCQ_S "s")  (VSBCQ_M_S "s") (VSBCIQ_U "u")
 		       (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s")
 		       (VADCQ_U "u")  (VADCQ_M_U "u") (VADCQ_S "s")
@@ -2945,10 +2944,7 @@  (define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
 (define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
 (define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
 (define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
-(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
-(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U])
 (define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
-(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
 (define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
 (define_int_iterator VxCIQ [VADCIQ_U VADCIQ_S VSBCIQ_U VSBCIQ_S])
 (define_int_iterator VxCIQ_M [VADCIQ_M_U VADCIQ_M_S VSBCIQ_M_U VSBCIQ_M_S])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index a82816e84ee..1963a1ec4f6 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -4293,163 +4293,51 @@  (define_expand "mve_viwdupq_m_wb_u<mode>"
   DONE;
 })
 
+;; Vector scatter stores with base and write-back
 ;;
 ;; [vstrwq_scatter_base_wb_s vstrwq_scatter_base_wb_u]
+;; [vstrwq_scatter_base_wb_f]
+;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si"
+(define_insn "@mve_vstrq_scatter_base_wb_<mode>"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
+		[(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "0")
 		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V4SI 3 "s_register_operand" "w")]
-	 VSTRWSBWBQ))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWSBWBQ))
+		 (match_operand:MVE_4 3 "s_register_operand" "w")]
+	 VSTRSBWBQ))
+   (set (match_operand:<MVE_scatter_offset> 0 "s_register_operand" "=w")
+	(unspec:<MVE_scatter_offset> [(match_dup 1) (match_dup 2)]
+	 VSTRSBWBQ))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vstr<MVE_elem_ch>.u<V_sz_elem>\t%q3, [%q1, %2]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>"))
   (set_attr "length" "4")])
 
+;; Predicated vector scatter stores with base and write-back
 ;;
 ;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u]
-;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
- [(set (mem:BLK (scratch))
-       (unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V4SI 3 "s_register_operand" "w")
-		 (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	VSTRWSBWBQ))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWSBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_base_wb_f]
-;;
-(define_insn "mve_vstrwq_scatter_base_wb_fv4sf"
- [(set (mem:BLK (scratch))
-       (unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V4SF 3 "s_register_operand" "w")]
-	 VSTRWQSBWB_F))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWQSBWB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
 ;; [vstrwq_scatter_base_wb_p_f]
+;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf"
+(define_insn "@mve_vstrq_scatter_base_wb_p_<mode>"
  [(set (mem:BLK (scratch))
        (unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vstrw_immediate" "Rl")
-		 (match_operand:V4SF 3 "s_register_operand" "w")
-		 (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	VSTRWQSBWB_F))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWQSBWB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;vstrwt.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 1 "s_register_operand" "0")
+		[(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "0")
 		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 3 "s_register_operand" "w")]
-	 VSTRDSBWBQ))
-   (set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_dup 1) (match_dup 2)]
-	 VSTRDSBWBQ))
+		 (match_operand:MVE_4 3 "s_register_operand" "w")
+		 (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
+	VSTRSBWBQ_P))
+   (set (match_operand:<MVE_scatter_offset> 0 "s_register_operand" "=w")
+	(unspec:<MVE_scatter_offset> [(match_dup 1) (match_dup 2)]
+	 VSTRSBWBQ_P))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 3 "s_register_operand" "w")
-		 (match_operand:V2QI 4 "vpr_register_operand" "Up")]
-	 VSTRDSBWBQ))
-   (set (match_operand:V2DI 0 "s_register_operand" "=w")
-	(unspec:V2DI [(match_dup 1) (match_dup 2)]
-	 VSTRDSBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;vstrdt.u64\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\;\tvstr<MVE_elem_ch>t.u<V_sz_elem>\t%q3, [%q1, %2]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>"))
   (set_attr "length" "8")])
 
 (define_expand "mve_vldrwq_gather_base_wb_<supf>v4si"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 2116a17f1a1..182908909ab 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -1197,14 +1197,11 @@  (define_c_enum "unspec" [
   VIDUPQ_M
   VIWDUPQ
   VIWDUPQ_M
-  VSTRWQSBWB_S
-  VSTRWQSBWB_U
+  VSTRSBWBQ
+  VSTRSBWBQ_P
   VLDRWQGBWB_S
   VLDRWQGBWB_U
-  VSTRWQSBWB_F
   VLDRWQGBWB_F
-  VSTRDQSBWB_S
-  VSTRDQSBWB_U
   VLDRDQGBWB_S
   VLDRDQGBWB_U
   VADCQ_U