diff mbox series

[10/15] arm: [MVE intrinsics] rework vldr gather_offset

Message ID 20241107091820.2010568-11-christophe.lyon@linaro.org
State New
Headers show
Series arm: [MVE intrinsics] Rework store_scatter and load_gather intrinsics | expand

Commit Message

Christophe Lyon Nov. 7, 2024, 9:18 a.m. UTC
Implement vldr?q_gather_offset using the new MVE builtins framework.

The patch introduces a new attribute iterator (MVE_u_elem) to
accomodate the fact that ACLE's expected output description uses "uNN"
for all modes, except V8HF where it expects ".f16".  Using "V_sz_elem"
would work, but would require to update several testcases.

gcc/ChangeLog:

	* config/arm/arm-mve-builtins-base.cc (class vldrq_gather_impl):
	New.
	(vldrbq_gather, vldrdq_gather, vldrhq_gather, vldrwq_gather): New.
	* config/arm/arm-mve-builtins-base.def (vldrbq_gather)
	(vldrdq_gather, vldrhq_gather, vldrwq_gather): New.
	* config/arm/arm-mve-builtins-base.h (vldrbq_gather)
	(vldrdq_gather, vldrhq_gather, vldrwq_gather): New.
	* config/arm/arm_mve.h (vldrbq_gather_offset): Delete.
	(vldrbq_gather_offset_z): Delete.
	(vldrhq_gather_offset): Delete.
	(vldrhq_gather_offset_z): Delete.
	(vldrdq_gather_offset): Delete.
	(vldrdq_gather_offset_z): Delete.
	(vldrwq_gather_offset): Delete.
	(vldrwq_gather_offset_z): Delete.
	(vldrbq_gather_offset_u8): Delete.
	(vldrbq_gather_offset_s8): Delete.
	(vldrbq_gather_offset_u16): Delete.
	(vldrbq_gather_offset_s16): Delete.
	(vldrbq_gather_offset_u32): Delete.
	(vldrbq_gather_offset_s32): Delete.
	(vldrbq_gather_offset_z_s16): Delete.
	(vldrbq_gather_offset_z_u8): Delete.
	(vldrbq_gather_offset_z_s32): Delete.
	(vldrbq_gather_offset_z_u16): Delete.
	(vldrbq_gather_offset_z_u32): Delete.
	(vldrbq_gather_offset_z_s8): Delete.
	(vldrhq_gather_offset_s32): Delete.
	(vldrhq_gather_offset_s16): Delete.
	(vldrhq_gather_offset_u32): Delete.
	(vldrhq_gather_offset_u16): Delete.
	(vldrhq_gather_offset_z_s32): Delete.
	(vldrhq_gather_offset_z_s16): Delete.
	(vldrhq_gather_offset_z_u32): Delete.
	(vldrhq_gather_offset_z_u16): Delete.
	(vldrdq_gather_offset_s64): Delete.
	(vldrdq_gather_offset_u64): Delete.
	(vldrdq_gather_offset_z_s64): Delete.
	(vldrdq_gather_offset_z_u64): Delete.
	(vldrhq_gather_offset_f16): Delete.
	(vldrhq_gather_offset_z_f16): Delete.
	(vldrwq_gather_offset_f32): Delete.
	(vldrwq_gather_offset_s32): Delete.
	(vldrwq_gather_offset_u32): Delete.
	(vldrwq_gather_offset_z_f32): Delete.
	(vldrwq_gather_offset_z_s32): Delete.
	(vldrwq_gather_offset_z_u32): Delete.
	(__arm_vldrbq_gather_offset_u8): Delete.
	(__arm_vldrbq_gather_offset_s8): Delete.
	(__arm_vldrbq_gather_offset_u16): Delete.
	(__arm_vldrbq_gather_offset_s16): Delete.
	(__arm_vldrbq_gather_offset_u32): Delete.
	(__arm_vldrbq_gather_offset_s32): Delete.
	(__arm_vldrbq_gather_offset_z_s8): Delete.
	(__arm_vldrbq_gather_offset_z_s32): Delete.
	(__arm_vldrbq_gather_offset_z_s16): Delete.
	(__arm_vldrbq_gather_offset_z_u8): Delete.
	(__arm_vldrbq_gather_offset_z_u32): Delete.
	(__arm_vldrbq_gather_offset_z_u16): Delete.
	(__arm_vldrhq_gather_offset_s32): Delete.
	(__arm_vldrhq_gather_offset_s16): Delete.
	(__arm_vldrhq_gather_offset_u32): Delete.
	(__arm_vldrhq_gather_offset_u16): Delete.
	(__arm_vldrhq_gather_offset_z_s32): Delete.
	(__arm_vldrhq_gather_offset_z_s16): Delete.
	(__arm_vldrhq_gather_offset_z_u32): Delete.
	(__arm_vldrhq_gather_offset_z_u16): Delete.
	(__arm_vldrdq_gather_offset_s64): Delete.
	(__arm_vldrdq_gather_offset_u64): Delete.
	(__arm_vldrdq_gather_offset_z_s64): Delete.
	(__arm_vldrdq_gather_offset_z_u64): Delete.
	(__arm_vldrwq_gather_offset_s32): Delete.
	(__arm_vldrwq_gather_offset_u32): Delete.
	(__arm_vldrwq_gather_offset_z_s32): Delete.
	(__arm_vldrwq_gather_offset_z_u32): Delete.
	(__arm_vldrhq_gather_offset_f16): Delete.
	(__arm_vldrhq_gather_offset_z_f16): Delete.
	(__arm_vldrwq_gather_offset_f32): Delete.
	(__arm_vldrwq_gather_offset_z_f32): Delete.
	(__arm_vldrbq_gather_offset): Delete.
	(__arm_vldrbq_gather_offset_z): Delete.
	(__arm_vldrhq_gather_offset): Delete.
	(__arm_vldrhq_gather_offset_z): Delete.
	(__arm_vldrdq_gather_offset): Delete.
	(__arm_vldrdq_gather_offset_z): Delete.
	(__arm_vldrwq_gather_offset): Delete.
	(__arm_vldrwq_gather_offset_z): Delete.
	* config/arm/arm_mve_builtins.def (vldrbq_gather_offset_u)
	(vldrbq_gather_offset_s, vldrbq_gather_offset_z_s)
	(vldrbq_gather_offset_z_u, vldrhq_gather_offset_z_u)
	(vldrhq_gather_offset_u, vldrhq_gather_offset_z_s)
	(vldrhq_gather_offset_s, vldrdq_gather_offset_s)
	(vldrhq_gather_offset_f, vldrwq_gather_offset_f)
	(vldrwq_gather_offset_s, vldrdq_gather_offset_z_s)
	(vldrhq_gather_offset_z_f, vldrwq_gather_offset_z_f)
	(vldrwq_gather_offset_z_s, vldrdq_gather_offset_u)
	(vldrwq_gather_offset_u, vldrdq_gather_offset_z_u)
	(vldrwq_gather_offset_z_u): Delete.
	* config/arm/iterators.md (MVE_u_elem): New.
	(supf): Remove VLDRBQGO_S, VLDRBQGO_U, VLDRHQGO_S, VLDRHQGO_U,
	VLDRDQGO_S, VLDRDQGO_U, VLDRWQGO_S, VLDRWQGO_U.
	(VLDRBGOQ, VLDRHGOQ, VLDRDGOQ, VLDRWGOQ): Delete.
	* config/arm/mve.md (mve_vldrbq_gather_offset_<supf><mode>):
	Delete.
	(mve_vldrbq_gather_offset_z_<supf><mode>): Delete.
	(mve_vldrhq_gather_offset_<supf><mode>): Delete.
	(mve_vldrhq_gather_offset_z_<supf><mode>): Delete.
	(mve_vldrdq_gather_offset_<supf>v2di): Delete.
	(mve_vldrdq_gather_offset_z_<supf>v2di): Delete.
	(mve_vldrhq_gather_offset_fv8hf): Delete.
	(mve_vldrhq_gather_offset_z_fv8hf): Delete.
	(mve_vldrwq_gather_offset_fv4sf): Delete.
	(mve_vldrwq_gather_offset_<supf>v4si): Delete.
	(mve_vldrwq_gather_offset_z_fv4sf): Delete.
	(mve_vldrwq_gather_offset_z_<supf>v4si): Delete.
	(@mve_vldrq_gather_offset_<mode>): New.
	(@mve_vldrq_gather_offset_extend_<mode><US>): New.
	(@mve_vldrq_gather_offset_z_<mode>): New.
	(@mve_vldrq_gather_offset_z_extend_<mode><US>): New.
	* config/arm/unspecs.md (VLDRBQGO_S, VLDRBQGO_U, VLDRHQGO_S)
	(VLDRHQGO_U, VLDRDQGO_S, VLDRDQGO_U, VLDRHQGO_F, VLDRWQGO_F)
	(VLDRWQGO_S, VLDRWQGO_U): Delete.
	(VLDRGOQ, VLDRGOQ_Z, VLDRGOQ_EXT, VLDRGOQ_EXT_Z): New.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  47 ++
 gcc/config/arm/arm-mve-builtins-base.def |   6 +
 gcc/config/arm/arm-mve-builtins-base.h   |   4 +
 gcc/config/arm/arm_mve.h                 | 576 -----------------------
 gcc/config/arm/arm_mve_builtins.def      |  20 -
 gcc/config/arm/iterators.md              |  24 +-
 gcc/config/arm/mve.md                    | 349 ++++----------
 gcc/config/arm/unspecs.md                |  14 +-
 8 files changed, 156 insertions(+), 884 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 27e31d6c8cd..a4d498d534b 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -411,6 +411,49 @@  public:
   }
 };
 
+/* Builds the vldrq_gather*offset intrinsics.  */
+class vldrq_gather_impl : public load_extending
+{
+public:
+  using load_extending::load_extending;
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (e.pred)
+      {
+      case PRED_none:
+	if (e.vector_mode (0) == e.memory_vector_mode ())
+	  /* Non-extending load case.  */
+	  icode = code_for_mve_vldrq_gather_offset (e.vector_mode (0));
+	else
+	  /* Extending load case.  */
+	  icode = code_for_mve_vldrq_gather_offset_extend
+	    (e.memory_vector_mode (),
+	     e.type_suffix (0).unsigned_p
+	     ? ZERO_EXTEND
+	     : SIGN_EXTEND);
+	break;
+
+      case PRED_z:
+	if (e.vector_mode (0) == e.memory_vector_mode ())
+	  icode = code_for_mve_vldrq_gather_offset_z (e.vector_mode (0));
+	else
+	  icode = code_for_mve_vldrq_gather_offset_z_extend
+	    (e.memory_vector_mode (),
+	     e.type_suffix (0).unsigned_p
+	     ? ZERO_EXTEND
+	     : SIGN_EXTEND);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    return e.use_exact_insn (icode);
+  }
+};
+
   /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics.  */
 class vctpq_impl : public function_base
 {
@@ -1208,8 +1251,12 @@  FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
 FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
 FUNCTION (vld1q, vld1_impl,)
 FUNCTION (vldrbq, vldrq_impl, (TYPE_SUFFIX_s8, TYPE_SUFFIX_u8))
+FUNCTION (vldrbq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s8, TYPE_SUFFIX_u8))
+FUNCTION (vldrdq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s64, TYPE_SUFFIX_u64, NUM_TYPE_SUFFIXES))
 FUNCTION (vldrhq, vldrq_impl, (TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16))
+FUNCTION (vldrhq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16))
 FUNCTION (vldrwq, vldrq_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32))
+FUNCTION (vldrwq_gather, vldrq_gather_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32))
 FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ)
 FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ)
 FUNCTION_ONLY_F (vmaxnmaq, VMAXNMAQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index a56fae7414e..5e30a27ae74 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -60,8 +60,12 @@  DEF_MVE_FUNCTION (vidupq, viddup, all_unsigned, mx_or_none)
 DEF_MVE_FUNCTION (viwdupq, vidwdup, all_unsigned, mx_or_none)
 DEF_MVE_FUNCTION (vld1q, load, all_integer, z_or_none)
 DEF_MVE_FUNCTION (vldrbq, load_ext, all_integer, z_or_none)
+DEF_MVE_FUNCTION (vldrbq_gather, load_ext_gather_offset, all_integer, z_or_none)
+DEF_MVE_FUNCTION (vldrdq_gather, load_ext_gather_offset, integer_64, z_or_none)
 DEF_MVE_FUNCTION (vldrhq, load_ext, integer_16_32, z_or_none)
+DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, integer_16_32, z_or_none)
 DEF_MVE_FUNCTION (vldrwq, load_ext, integer_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, integer_32, z_or_none)
 DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none)
@@ -226,7 +230,9 @@  DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none)
 DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vld1q, load, all_float, z_or_none)
 DEF_MVE_FUNCTION (vldrhq, load_ext, float_16, z_or_none)
+DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, float_16, z_or_none)
 DEF_MVE_FUNCTION (vldrwq, load_ext, float_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, float_32, z_or_none)
 DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none)
 DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 261248086dc..88fcff3d577 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -83,8 +83,12 @@  extern const function_base *const vidupq;
 extern const function_base *const viwdupq;
 extern const function_base *const vld1q;
 extern const function_base *const vldrbq;
+extern const function_base *const vldrbq_gather;
+extern const function_base *const vldrdq_gather;
 extern const function_base *const vldrhq;
+extern const function_base *const vldrhq_gather;
 extern const function_base *const vldrwq;
+extern const function_base *const vldrwq_gather;
 extern const function_base *const vmaxaq;
 extern const function_base *const vmaxavq;
 extern const function_base *const vmaxnmaq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 407907679bc..306735a05ba 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -42,18 +42,10 @@ 
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vldrbq_gather_offset(__base, __offset) __arm_vldrbq_gather_offset(__base, __offset)
-#define vldrbq_gather_offset_z(__base, __offset, __p) __arm_vldrbq_gather_offset_z(__base, __offset, __p)
-#define vldrhq_gather_offset(__base, __offset) __arm_vldrhq_gather_offset(__base, __offset)
-#define vldrhq_gather_offset_z(__base, __offset, __p) __arm_vldrhq_gather_offset_z(__base, __offset, __p)
 #define vldrhq_gather_shifted_offset(__base, __offset) __arm_vldrhq_gather_shifted_offset(__base, __offset)
 #define vldrhq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z(__base, __offset, __p)
-#define vldrdq_gather_offset(__base, __offset) __arm_vldrdq_gather_offset(__base, __offset)
-#define vldrdq_gather_offset_z(__base, __offset, __p) __arm_vldrdq_gather_offset_z(__base, __offset, __p)
 #define vldrdq_gather_shifted_offset(__base, __offset) __arm_vldrdq_gather_shifted_offset(__base, __offset)
 #define vldrdq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z(__base, __offset, __p)
-#define vldrwq_gather_offset(__base, __offset) __arm_vldrwq_gather_offset(__base, __offset)
-#define vldrwq_gather_offset_z(__base, __offset, __p) __arm_vldrwq_gather_offset_z(__base, __offset, __p)
 #define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset)
 #define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p)
 #define vuninitializedq(__v) __arm_vuninitializedq(__v)
@@ -73,30 +65,10 @@ 
 #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
 #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
 #define vpnot(__a) __arm_vpnot(__a)
-#define vldrbq_gather_offset_u8(__base, __offset) __arm_vldrbq_gather_offset_u8(__base, __offset)
-#define vldrbq_gather_offset_s8(__base, __offset) __arm_vldrbq_gather_offset_s8(__base, __offset)
-#define vldrbq_gather_offset_u16(__base, __offset) __arm_vldrbq_gather_offset_u16(__base, __offset)
-#define vldrbq_gather_offset_s16(__base, __offset) __arm_vldrbq_gather_offset_s16(__base, __offset)
-#define vldrbq_gather_offset_u32(__base, __offset) __arm_vldrbq_gather_offset_u32(__base, __offset)
-#define vldrbq_gather_offset_s32(__base, __offset) __arm_vldrbq_gather_offset_s32(__base, __offset)
 #define vldrwq_gather_base_s32(__addr,  __offset) __arm_vldrwq_gather_base_s32(__addr,  __offset)
 #define vldrwq_gather_base_u32(__addr,  __offset) __arm_vldrwq_gather_base_u32(__addr,  __offset)
-#define vldrbq_gather_offset_z_s16(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s16(__base, __offset, __p)
-#define vldrbq_gather_offset_z_u8(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u8(__base, __offset, __p)
-#define vldrbq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s32(__base, __offset, __p)
-#define vldrbq_gather_offset_z_u16(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u16(__base, __offset, __p)
-#define vldrbq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u32(__base, __offset, __p)
-#define vldrbq_gather_offset_z_s8(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s8(__base, __offset, __p)
 #define vldrwq_gather_base_z_u32(__addr,  __offset, __p) __arm_vldrwq_gather_base_z_u32(__addr,  __offset, __p)
 #define vldrwq_gather_base_z_s32(__addr,  __offset, __p) __arm_vldrwq_gather_base_z_s32(__addr,  __offset, __p)
-#define vldrhq_gather_offset_s32(__base, __offset) __arm_vldrhq_gather_offset_s32(__base, __offset)
-#define vldrhq_gather_offset_s16(__base, __offset) __arm_vldrhq_gather_offset_s16(__base, __offset)
-#define vldrhq_gather_offset_u32(__base, __offset) __arm_vldrhq_gather_offset_u32(__base, __offset)
-#define vldrhq_gather_offset_u16(__base, __offset) __arm_vldrhq_gather_offset_u16(__base, __offset)
-#define vldrhq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrhq_gather_offset_z_s32(__base, __offset, __p)
-#define vldrhq_gather_offset_z_s16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_s16(__base, __offset, __p)
-#define vldrhq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrhq_gather_offset_z_u32(__base, __offset, __p)
-#define vldrhq_gather_offset_z_u16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_u16(__base, __offset, __p)
 #define vldrhq_gather_shifted_offset_s32(__base, __offset) __arm_vldrhq_gather_shifted_offset_s32(__base, __offset)
 #define vldrhq_gather_shifted_offset_s16(__base, __offset) __arm_vldrhq_gather_shifted_offset_s16(__base, __offset)
 #define vldrhq_gather_shifted_offset_u32(__base, __offset) __arm_vldrhq_gather_shifted_offset_u32(__base, __offset)
@@ -109,26 +81,14 @@ 
 #define vldrdq_gather_base_u64(__addr,  __offset) __arm_vldrdq_gather_base_u64(__addr,  __offset)
 #define vldrdq_gather_base_z_s64(__addr,  __offset, __p) __arm_vldrdq_gather_base_z_s64(__addr,  __offset, __p)
 #define vldrdq_gather_base_z_u64(__addr,  __offset, __p) __arm_vldrdq_gather_base_z_u64(__addr,  __offset, __p)
-#define vldrdq_gather_offset_s64(__base, __offset) __arm_vldrdq_gather_offset_s64(__base, __offset)
-#define vldrdq_gather_offset_u64(__base, __offset) __arm_vldrdq_gather_offset_u64(__base, __offset)
-#define vldrdq_gather_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_s64(__base, __offset, __p)
-#define vldrdq_gather_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_u64(__base, __offset, __p)
 #define vldrdq_gather_shifted_offset_s64(__base, __offset) __arm_vldrdq_gather_shifted_offset_s64(__base, __offset)
 #define vldrdq_gather_shifted_offset_u64(__base, __offset) __arm_vldrdq_gather_shifted_offset_u64(__base, __offset)
 #define vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p)
 #define vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p)
-#define vldrhq_gather_offset_f16(__base, __offset) __arm_vldrhq_gather_offset_f16(__base, __offset)
-#define vldrhq_gather_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_f16(__base, __offset, __p)
 #define vldrhq_gather_shifted_offset_f16(__base, __offset) __arm_vldrhq_gather_shifted_offset_f16(__base, __offset)
 #define vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p)
 #define vldrwq_gather_base_f32(__addr,  __offset) __arm_vldrwq_gather_base_f32(__addr,  __offset)
 #define vldrwq_gather_base_z_f32(__addr,  __offset, __p) __arm_vldrwq_gather_base_z_f32(__addr,  __offset, __p)
-#define vldrwq_gather_offset_f32(__base, __offset) __arm_vldrwq_gather_offset_f32(__base, __offset)
-#define vldrwq_gather_offset_s32(__base, __offset) __arm_vldrwq_gather_offset_s32(__base, __offset)
-#define vldrwq_gather_offset_u32(__base, __offset) __arm_vldrwq_gather_offset_u32(__base, __offset)
-#define vldrwq_gather_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_f32(__base, __offset, __p)
-#define vldrwq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_s32(__base, __offset, __p)
-#define vldrwq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_u32(__base, __offset, __p)
 #define vldrwq_gather_shifted_offset_f32(__base, __offset) __arm_vldrwq_gather_shifted_offset_f32(__base, __offset)
 #define vldrwq_gather_shifted_offset_s32(__base, __offset) __arm_vldrwq_gather_shifted_offset_s32(__base, __offset)
 #define vldrwq_gather_shifted_offset_u32(__base, __offset) __arm_vldrwq_gather_shifted_offset_u32(__base, __offset)
@@ -290,48 +250,6 @@  __arm_vpnot (mve_pred16_t __a)
   return __builtin_mve_vpnotv16bi (__a);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_u8 (uint8_t const * __base, uint8x16_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_uv16qi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_s8 (int8_t const * __base, uint8x16_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_sv16qi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_u16 (uint8_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_uv8hi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_s16 (int8_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_sv8hi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_u32 (uint8_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_uv4si ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_s32 (int8_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_sv4si ((__builtin_neon_qi *) __base, __offset);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrwq_gather_base_s32 (uint32x4_t __addr, const int __offset)
@@ -346,48 +264,6 @@  __arm_vldrwq_gather_base_u32 (uint32x4_t __addr, const int __offset)
   return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_s8 (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_sv16qi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_s32 (int8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_sv4si ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_s16 (int8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_sv8hi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_u8 (uint8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_uv16qi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_u32 (uint8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_uv4si ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_u16 (uint8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_uv8hi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrwq_gather_base_z_s32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p)
@@ -402,62 +278,6 @@  __arm_vldrwq_gather_base_z_u32 (uint32x4_t __addr, const int __offset, mve_pred1
   return __builtin_mve_vldrwq_gather_base_z_uv4si (__addr, __offset, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_s32 (int16_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_sv4si ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_s16 (int16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_sv8hi ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_u32 (uint16_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_uv4si ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_u16 (uint16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_uv8hi ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_s32 (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_sv4si ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_s16 (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_sv8hi ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_u32 (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_uv4si ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_u16 (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_uv8hi ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrhq_gather_shifted_offset_s32 (int16_t const * __base, uint32x4_t __offset)
@@ -542,35 +362,6 @@  __arm_vldrdq_gather_base_z_u64 (uint64x2_t __addr, const int __offset, mve_pred1
   return __builtin_mve_vldrdq_gather_base_z_uv2di (__addr, __offset, __p);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_s64 (int64_t const * __base, uint64x2_t __offset)
-{
-  return __builtin_mve_vldrdq_gather_offset_sv2di ((__builtin_neon_di *) __base, __offset);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_u64 (uint64_t const * __base, uint64x2_t __offset)
-{
-  return __builtin_mve_vldrdq_gather_offset_uv2di ((__builtin_neon_di *) __base, __offset);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z_s64 (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_offset_z_sv2di ((__builtin_neon_di *) __base, __offset, __p);
-}
-
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z_u64 (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p);
-}
-
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrdq_gather_shifted_offset_s64 (int64_t const * __base, uint64x2_t __offset)
@@ -599,34 +390,6 @@  __arm_vldrdq_gather_shifted_offset_z_u64 (uint64_t const * __base, uint64x2_t __
   return __builtin_mve_vldrdq_gather_shifted_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_s32 (int32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_offset_sv4si ((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_u32 (uint32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_offset_uv4si ((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z_s32 (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_offset_z_sv4si ((__builtin_neon_si *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z_u32 (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrwq_gather_shifted_offset_s32 (int32_t const * __base, uint32x4_t __offset)
@@ -1165,20 +928,6 @@  __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value)
   __builtin_mve_vst4qv4sf (__addr, __rv.__o);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_f16 (float16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_fv8hf((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_f16 (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_fv8hf((__builtin_neon_hi *) __base, __offset, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrhq_gather_shifted_offset_f16 (float16_t const * __base, uint16x8_t __offset)
@@ -1207,20 +956,6 @@  __arm_vldrwq_gather_base_z_f32 (uint32x4_t __addr, const int __offset, mve_pred1
   return __builtin_mve_vldrwq_gather_base_z_fv4sf (__addr, __offset, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_f32 (float32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_offset_fv4sf((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z_f32 (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_offset_z_fv4sf((__builtin_neon_si *) __base, __offset, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrwq_gather_shifted_offset_f32 (float32_t const * __base, uint32x4_t __offset)
@@ -1387,146 +1122,6 @@  __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
  __arm_vst4q_u32 (__addr, __value);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (uint8_t const * __base, uint8x16_t __offset)
-{
- return __arm_vldrbq_gather_offset_u8 (__base, __offset);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (int8_t const * __base, uint8x16_t __offset)
-{
- return __arm_vldrbq_gather_offset_s8 (__base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (uint8_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrbq_gather_offset_u16 (__base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (int8_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrbq_gather_offset_s16 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (uint8_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrbq_gather_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (int8_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrbq_gather_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_s8 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (int8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (int8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_s16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_u8 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_u32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_u16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (int16_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrhq_gather_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (int16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_offset_s16 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (uint16_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrhq_gather_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (uint16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_offset_u16 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_s16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_u32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_u16 (__base, __offset, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrhq_gather_shifted_offset (int16_t const * __base, uint32x4_t __offset)
@@ -1583,34 +1178,6 @@  __arm_vldrhq_gather_shifted_offset_z (uint16_t const * __base, uint16x8_t __offs
  return __arm_vldrhq_gather_shifted_offset_z_u16 (__base, __offset, __p);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset (int64_t const * __base, uint64x2_t __offset)
-{
- return __arm_vldrdq_gather_offset_s64 (__base, __offset);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset (uint64_t const * __base, uint64x2_t __offset)
-{
- return __arm_vldrdq_gather_offset_u64 (__base, __offset);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrdq_gather_offset_z_s64 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrdq_gather_offset_z_u64 (__base, __offset, __p);
-}
-
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrdq_gather_shifted_offset (int64_t const * __base, uint64x2_t __offset)
@@ -1639,34 +1206,6 @@  __arm_vldrdq_gather_shifted_offset_z (uint64_t const * __base, uint64x2_t __offs
  return __arm_vldrdq_gather_shifted_offset_z_u64 (__base, __offset, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset (int32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset (uint32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_offset_z_u32 (__base, __offset, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrwq_gather_shifted_offset (int32_t const * __base, uint32x4_t __offset)
@@ -1949,20 +1488,6 @@  __arm_vst4q (float32_t * __addr, float32x4x4_t __value)
  __arm_vst4q_f32 (__addr, __value);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (float16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_offset_f16 (__base, __offset);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_f16 (__base, __offset, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrhq_gather_shifted_offset (float16_t const * __base, uint16x8_t __offset)
@@ -1977,20 +1502,6 @@  __arm_vldrhq_gather_shifted_offset_z (float16_t const * __base, uint16x8_t __off
  return __arm_vldrhq_gather_shifted_offset_z_f16 (__base, __offset, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset (float32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_offset_f32 (__base, __offset);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_offset_z_f32 (__base, __offset, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vldrwq_gather_shifted_offset (float32_t const * __base, uint32x4_t __offset)
@@ -2420,22 +1931,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *)), \
   int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *))))
 
-#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t)));})
-
-#define __arm_vldrhq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
 #define __arm_vldrhq_gather_shifted_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
@@ -2452,18 +1947,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
   int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
 
-#define __arm_vldrwq_gather_offset(p0,p1) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1)))
-
-#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_offset_z_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1, p2)))
-
 #define __arm_vldrwq_gather_shifted_offset(p0,p1) ( \
   _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
   int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
@@ -2538,29 +2021,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
 
-#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vldrhq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vldrhq_gather_shifted_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
@@ -2575,16 +2035,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vldrwq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
 #define __arm_vldrwq_gather_shifted_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1), \
@@ -2658,14 +2108,6 @@  extern void *__ARM_undef;
 
 #endif /* MVE Integer.  */
 
-#define __arm_vldrdq_gather_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \
-  int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1)))
-
-#define __arm_vldrdq_gather_offset_z(p0,p1,p2) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_z_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_z_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1, p2)))
-
 #define __arm_vldrdq_gather_shifted_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
   int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_shifted_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \
   int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_shifted_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1)))
@@ -2674,24 +2116,6 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_shifted_offset_z_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1, p2), \
   int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_shifted_offset_z_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1, p2)))
 
-#define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #endif /* __cplusplus  */
 #endif /* __ARM_FEATURE_MVE  */
 #endif /* _GCC_ARM_MVE_H.  */
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 07f5a59b248..5f328f5e630 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -663,51 +663,31 @@  VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf)
-VAR3 (LDRGU, vldrbq_gather_offset_u, v16qi, v8hi, v4si)
-VAR3 (LDRGS, vldrbq_gather_offset_s, v16qi, v8hi, v4si)
 VAR1 (LDRGBS, vldrwq_gather_base_s, v4si)
 VAR1 (LDRGBU, vldrwq_gather_base_u, v4si)
 VAR1 (LDRGBS_Z, vldrwq_gather_base_z_s, v4si)
 VAR1 (LDRGBU_Z, vldrwq_gather_base_z_u, v4si)
-VAR3 (LDRGS_Z, vldrbq_gather_offset_z_s, v16qi, v8hi, v4si)
-VAR3 (LDRGU_Z, vldrbq_gather_offset_z_u, v16qi, v8hi, v4si)
 VAR2 (LDRGU_Z, vldrhq_gather_shifted_offset_z_u, v8hi, v4si)
-VAR2 (LDRGU_Z, vldrhq_gather_offset_z_u, v8hi, v4si)
 VAR2 (LDRGU, vldrhq_gather_shifted_offset_u, v8hi, v4si)
-VAR2 (LDRGU, vldrhq_gather_offset_u, v8hi, v4si)
 VAR2 (LDRGS_Z, vldrhq_gather_shifted_offset_z_s, v8hi, v4si)
-VAR2 (LDRGS_Z, vldrhq_gather_offset_z_s, v8hi, v4si)
 VAR2 (LDRGS, vldrhq_gather_shifted_offset_s, v8hi, v4si)
-VAR2 (LDRGS, vldrhq_gather_offset_s, v8hi, v4si)
 VAR1 (LDRGBS, vldrdq_gather_base_s, v2di)
 VAR1 (LDRGBS, vldrwq_gather_base_f, v4sf)
 VAR1 (LDRGBS_Z, vldrdq_gather_base_z_s, v2di)
 VAR1 (LDRGBS_Z, vldrwq_gather_base_z_f, v4sf)
 VAR1 (LDRGBU, vldrdq_gather_base_u, v2di)
 VAR1 (LDRGBU_Z, vldrdq_gather_base_z_u, v2di)
-VAR1 (LDRGS, vldrdq_gather_offset_s, v2di)
 VAR1 (LDRGS, vldrdq_gather_shifted_offset_s, v2di)
-VAR1 (LDRGS, vldrhq_gather_offset_f, v8hf)
 VAR1 (LDRGS, vldrhq_gather_shifted_offset_f, v8hf)
-VAR1 (LDRGS, vldrwq_gather_offset_f, v4sf)
-VAR1 (LDRGS, vldrwq_gather_offset_s, v4si)
 VAR1 (LDRGS, vldrwq_gather_shifted_offset_f, v4sf)
 VAR1 (LDRGS, vldrwq_gather_shifted_offset_s, v4si)
-VAR1 (LDRGS_Z, vldrdq_gather_offset_z_s, v2di)
 VAR1 (LDRGS_Z, vldrdq_gather_shifted_offset_z_s, v2di)
-VAR1 (LDRGS_Z, vldrhq_gather_offset_z_f, v8hf)
 VAR1 (LDRGS_Z, vldrhq_gather_shifted_offset_z_f, v8hf)
-VAR1 (LDRGS_Z, vldrwq_gather_offset_z_f, v4sf)
-VAR1 (LDRGS_Z, vldrwq_gather_offset_z_s, v4si)
 VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_f, v4sf)
 VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_s, v4si)
-VAR1 (LDRGU, vldrdq_gather_offset_u, v2di)
 VAR1 (LDRGU, vldrdq_gather_shifted_offset_u, v2di)
-VAR1 (LDRGU, vldrwq_gather_offset_u, v4si)
 VAR1 (LDRGU, vldrwq_gather_shifted_offset_u, v4si)
-VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di)
 VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di)
-VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si)
 VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si)
 VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si)
 VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 155e9ef6368..4c0aac53c0d 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1797,6 +1797,16 @@  (define_mode_attr V_u_elem [(V8QI "u8")  (V16QI "u8")
                             (DI   "u64") (V2DI  "u64")
                             (V2SF "f32") (V4SF  "f32")])
 
+;; Same, but for MVE gather loads.
+;; Note that using "uNN" or "NN" everywhere would work too.
+;; We use this to match the expected output described in ACLE.
+(define_mode_attr MVE_u_elem [(V16QI "u8")
+                              (V8HI  "u16")
+                              (V4SI  "u32")
+                              (V2DI  "u64")
+                              (V8HF  "f16")
+                              (V4SF  "u32")])
+
 ;; Element types for extraction of unsigned scalars.
 (define_mode_attr V_uf_sclr [(V8QI "u8")  (V16QI "u8")
                  (V4HI "u16") (V8HI "u16")
@@ -2527,12 +2537,12 @@  (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
 		       (VMLALDAVAXQ_P_S "s")
 		       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
-		       (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRWQGB_S "s")
-		       (VLDRWQGB_U "u") (VLDRHQGO_S "s")
-		       (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
+		       (VLDRWQGB_S "s")
+		       (VLDRWQGB_U "u")
+		       (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
 		       (VLDRDQGB_S "s") (VLDRDQGB_U "u")
-		       (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s")
-		       (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
+		       (VLDRDQGSO_S "s")
+		       (VLDRDQGSO_U "u")
 		       (VLDRWQGSO_S "s") (VLDRWQGSO_U "u")
 		       (VSTRDQSB_S "s") (VSTRDQSB_U "u")
 		       (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
@@ -2935,14 +2945,10 @@  (define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S])
 (define_int_iterator VSHLLxQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S])
 (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U])
 (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
-(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
 (define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
-(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
 (define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U])
 (define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U])
-(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
 (define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
-(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
 (define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
 (define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
 (define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 1963a1ec4f6..b437fc9883f 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -3416,30 +3416,92 @@  (define_insn "@mve_vstrq_scatter_base_<mode>"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>"))
   (set_attr "length" "4")])
 
+;; Vector gather loads with offset
 ;;
 ;; [vldrbq_gather_offset_s vldrbq_gather_offset_u]
+;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
+;; [vldrhq_gather_offset_f]
+;; [vldrwq_gather_offset_s vldrwq_gather_offset_u]
+;; [vldrwq_gather_offset_f]
+;; [vldrdq_gather_offset_s vldrdq_gather_offset_u]
 ;;
-(define_insn "mve_vldrbq_gather_offset_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=&w")
-	(unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VLDRBGOQ))
+(define_insn "@mve_vldrq_gather_offset_<mode>"
+  [(set (match_operand:MVE_VLD_ST_scatter 0 "s_register_operand" "=&w")
+	(unspec:MVE_VLD_ST_scatter
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w")
+	     (mem:BLK (scratch))]
+	 VLDRGOQ))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 8)
-     output_asm_insn ("vldrb.u8\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vldr<MVE_elem_ch>.<MVE_u_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_<mode>"))
+  (set_attr "length" "4")])
+
+;; Extending vector gather loads with offset
+;;
+;; [vldrbq_gather_offset_s vldrbq_gather_offset_u]
+;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
+;;
+(define_insn "@mve_vldrq_gather_offset_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=&w")
+	(SE:<MVE_wide_n_TYPE>
+	  (unspec:MVE_w_narrow_TYPE
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w")
+	     (mem:BLK (scratch))]
+	   VLDRGOQ_EXT)))
+  ]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))"
+  "vldr<MVE_elem_ch>.<US><MVE_wide_n_sz_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_extend_<mode><US>"))
   (set_attr "length" "4")])
 
+;; Predicated gather loads with offset
+;;
+;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
+;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
+;; [vldrhq_gather_offset_z_f]
+;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u]
+;; [vldrwq_gather_offset_z_f]
+;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u]
+;;
+(define_insn "@mve_vldrq_gather_offset_z_<mode>"
+  [(set (match_operand:MVE_VLD_ST_scatter 0 "s_register_operand" "=&w")
+	(unspec:MVE_VLD_ST_scatter
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w")
+	     (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")
+	     (mem:BLK (scratch))]
+	 VLDRGOQ_Z))
+  ]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\n\tvldr<MVE_elem_ch>t.<MVE_u_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_<mode>"))
+  (set_attr "length" "8")])
+
+;; Predicated extending gather loads with offset
+;;
+;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
+;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
+;;
+(define_insn "@mve_vldrq_gather_offset_z_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=&w")
+	(SE:<MVE_wide_n_TYPE>
+	   (unspec:MVE_w_narrow_TYPE
+	     [(match_operand:SI 1 "register_operand" "r")
+	      (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w")
+	      (match_operand:<MVE_wide_n_VPRED> 3 "vpr_register_operand" "Up")
+	      (mem:BLK (scratch))]
+	VLDRGOQ_EXT_Z)))
+  ]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))"
+  "vpst\n\tvldr<MVE_elem_ch>t.<US><MVE_wide_n_sz_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_extend_<mode><US>"))
+  (set_attr "length" "8")])
+
 ;;
 ;; [vldrwq_gather_base_s vldrwq_gather_base_u]
 ;;
@@ -3482,32 +3544,6 @@  (define_insn "@mve_vstrq_scatter_base_p_<mode>"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>"))
   (set_attr "length" "8")])
 
-;;
-;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrbq_gather_offset_z_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=&w")
-	(unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VLDRBGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 8)
-     output_asm_insn ("vpst\n\tvldrbt.u8\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vpst\n\tvldrbt.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
-  (set_attr "length" "8")])
-
 ;;
 ;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u]
 ;;
@@ -3530,56 +3566,6 @@  (define_insn "mve_vldrwq_gather_base_z_<supf>v4si"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
   (set_attr "length" "8")])
 
-;;
-;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
-;;
-(define_insn "mve_vldrhq_gather_offset_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=&w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")]
-	VLDRHGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 16)
-     output_asm_insn ("vldrh.u16\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrhq_gather_offset_z_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=&w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")
-	]VLDRHGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 16)
-     output_asm_insn ("vpst\n\tvldrht.u16\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
-  (set_attr "length" "8")])
-
 ;;
 ;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u]
 ;;
@@ -3673,49 +3659,6 @@  (define_insn "mve_vldrdq_gather_base_z_<supf>v2di"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di"))
   (set_attr "length" "8")])
 
-;;
-;; [vldrdq_gather_offset_s vldrdq_gather_offset_u]
-;;
-(define_insn "mve_vldrdq_gather_offset_<supf>v2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-       (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us")
-		     (match_operand:V2DI 2 "s_register_operand" "w")]
-	VLDRDGOQ))
- ]
- "TARGET_HAVE_MVE"
-{
-  rtx ops[3];
-  ops[0] = operands[0];
-  ops[1] = operands[1];
-  ops[2] = operands[2];
-  output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2]",ops);
-  return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrdq_gather_offset_z_<supf>v2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-       (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us")
-		     (match_operand:V2DI 2 "s_register_operand" "w")
-		     (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	VLDRDGOQ))
- ]
- "TARGET_HAVE_MVE"
-{
-  rtx ops[3];
-  ops[0] = operands[0];
-  ops[1] = operands[1];
-  ops[2] = operands[2];
-  output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2]",ops);
-  return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
-  (set_attr "length" "8")])
-
 ;;
 ;; [vldrdq_gather_shifted_offset_s vldrdq_gather_shifted_offset_u]
 ;;
@@ -3759,50 +3702,6 @@  (define_insn "mve_vldrdq_gather_shifted_offset_z_<supf>v2di"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di"))
   (set_attr "length" "8")])
 
-;;
-;; [vldrhq_gather_offset_f]
-;;
-(define_insn "mve_vldrhq_gather_offset_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=&w")
-	(unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us")
-		      (match_operand:V8HI 2 "s_register_operand" "w")]
-	 VLDRHQGO_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrhq_gather_offset_z_f]
-;;
-(define_insn "mve_vldrhq_gather_offset_z_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=&w")
-	(unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us")
-		      (match_operand:V8HI 2 "s_register_operand" "w")
-		      (match_operand:V8BI 3 "vpr_register_operand" "Up")]
-	 VLDRHQGO_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
-  (set_attr "length" "8")])
-
 ;;
 ;; [vldrhq_gather_shifted_offset_f]
 ;;
@@ -3890,94 +3789,6 @@  (define_insn "mve_vldrwq_gather_base_z_fv4sf"
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf"))
   (set_attr "length" "8")])
 
-;;
-;; [vldrwq_gather_offset_f]
-;;
-(define_insn "mve_vldrwq_gather_offset_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us")
-		       (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VLDRWQGO_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrwq_gather_offset_s vldrwq_gather_offset_u]
-;;
-(define_insn "mve_vldrwq_gather_offset_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us")
-		       (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VLDRWGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrwq_gather_offset_z_f]
-;;
-(define_insn "mve_vldrwq_gather_offset_z_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWQGO_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrwq_gather_offset_z_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
-  (set_attr "length" "8")])
-
 ;;
 ;; [vldrwq_gather_shifted_offset_f]
 ;;
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 182908909ab..cdad4ed8a7a 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -1156,26 +1156,20 @@  (define_c_enum "unspec" [
   VLDRQ_Z
   VLDRQ_EXT
   VLDRQ_EXT_Z
-  VLDRBQGO_S
-  VLDRBQGO_U
+  VLDRGOQ
+  VLDRGOQ_Z
+  VLDRGOQ_EXT
+  VLDRGOQ_EXT_Z
   VLDRWQGB_S
   VLDRWQGB_U
-  VLDRHQGO_S
-  VLDRHQGO_U
   VLDRHQGSO_S
   VLDRHQGSO_U
   VLDRDQGB_S
   VLDRDQGB_U
-  VLDRDQGO_S
-  VLDRDQGO_U
   VLDRDQGSO_S
   VLDRDQGSO_U
-  VLDRHQGO_F
   VLDRHQGSO_F
   VLDRWQGB_F
-  VLDRWQGO_F
-  VLDRWQGO_S
-  VLDRWQGO_U
   VLDRWQGSO_F
   VLDRWQGSO_S
   VLDRWQGSO_U