@@ -687,28 +687,6 @@ arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
qualifier_predicate};
#define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers)
-static enum arm_type_qualifiers
-arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_none};
-#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_unsigned};
-#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
- qualifier_none, qualifier_predicate};
-#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
- qualifier_unsigned, qualifier_predicate};
-#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
-
static enum arm_type_qualifiers
arm_lsll_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_none};
@@ -304,23 +304,59 @@ public:
return CP_WRITE_MEMORY;
}
+ machine_mode memory_vector_mode (const function_instance &fi) const override
+ {
+ poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
+ return arm_mve_data_mode (m_to_int_mode, nunits).require ();
+ }
+
rtx expand (function_expander &e) const override
{
insn_code icode;
+ rtx insns, base_ptr, new_base;
+ machine_mode base_mode;
+
+ if ((e.mode_suffix_id != MODE_none)
+ && (e.mode_suffix_id != MODE_wb))
+ gcc_unreachable ();
+
+ /* In _wb mode, the start offset is passed via a pointer,
+ dereference it. */
+ if (e.mode_suffix_id == MODE_wb)
+ {
+ base_mode = e.memory_vector_mode ();
+ rtx base = gen_reg_rtx (base_mode);
+ base_ptr = e.args[0];
+ emit_insn (gen_rtx_SET (base, gen_rtx_MEM (base_mode, base_ptr)));
+ e.args[0] = base;
+ new_base = gen_reg_rtx (base_mode);
+ e.args.quick_insert (0, new_base);
+ }
+
switch (e.pred)
{
case PRED_none:
- icode = code_for_mve_vstrq_scatter_base (e.vector_mode (0));
+ icode = (e.mode_suffix_id == MODE_none)
+ ? code_for_mve_vstrq_scatter_base (e.vector_mode (0))
+ : code_for_mve_vstrq_scatter_base_wb (e.vector_mode (0));
break;
case PRED_p:
- icode = code_for_mve_vstrq_scatter_base_p (e.vector_mode (0));
+ icode = (e.mode_suffix_id == MODE_none)
+ ? code_for_mve_vstrq_scatter_base_p (e.vector_mode (0))
+ : code_for_mve_vstrq_scatter_base_wb_p (e.vector_mode (0));
break;
default:
gcc_unreachable ();
}
- return e.use_exact_insn (icode);
+ insns = e.use_exact_insn (icode);
+
+ /* Update offset as appropriate. */
+ if (e.mode_suffix_id == MODE_wb)
+ emit_insn (gen_rtx_SET (gen_rtx_MEM (base_mode, base_ptr), new_base));
+
+ return insns;
}
/* The mode of a single memory element. */
@@ -150,6 +150,7 @@ parse_element_type (const function_instance &instance, const char *&format)
_ - void
al - array pointer for loads
as - array pointer for stores
+ b - pointer to vector of unsigned, width given by the first type suffix
p - predicates with type mve_pred16_t
s<elt> - a scalar type with the given element suffix
t<elt> - a vector or tuple type with given element suffix [*1]
@@ -181,6 +182,15 @@ parse_type (const function_instance &instance, const char *&format)
gcc_unreachable ();
}
+ if (ch == 'b')
+ {
+ type_class_index tclass = TYPE_unsigned;
+ unsigned int bits = instance.type_suffix (0).element_bits;
+ type_suffix_index suffix = find_type_suffix (tclass, bits);
+ tree acle_type = acle_vector_types[0][type_suffixes[suffix].vector_type];
+ return build_pointer_type (acle_type);
+ }
+
if (ch == 'p')
return get_mve_pred16_t ();
@@ -1684,7 +1694,8 @@ SHAPE (store_scatter_offset)
Example: vstrbq_scatter_base
void [__arm_]vstrwq_scatter_base[_s32](uint32x4_t addr, const int offset, int32x4_t value)
- void [__arm_]vstrwq_scatter_base_p[_s32](uint32x4_t addr, const int offset, int32x4_t value, mve_pred16_t p) */
+ void [__arm_]vstrwq_scatter_base_p[_s32](uint32x4_t addr, const int offset, int32x4_t value, mve_pred16_t p)
+ void [__arm_]vstrdq_scatter_base_wb[_s64](uint64x2_t *addr, const int offset, int64x2_t value) */
struct store_scatter_base_def : public store_scatter
{
void
@@ -1692,12 +1703,17 @@ struct store_scatter_base_def : public store_scatter
bool preserve_user_namespace) const override
{
b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+ b.add_overloaded_functions (group, MODE_wb, preserve_user_namespace);
build_all (b, "_,vu0,ss64,v0", group, MODE_none, preserve_user_namespace);
+ build_all (b, "_,b,ss64,v0", group, MODE_wb, preserve_user_namespace);
}
tree
resolve (function_resolver &r) const override
{
+ gcc_assert ((r.mode_suffix_id == MODE_none)
+ || (r.mode_suffix_id == MODE_wb));
+
unsigned int i, nargs;
type_suffix_index type;
if (!r.check_gp_argument (3, i, nargs)
@@ -1708,10 +1724,20 @@ struct store_scatter_base_def : public store_scatter
type_suffix_index base_type
= find_type_suffix (TYPE_unsigned, type_suffixes[type].element_bits);
- /* Base (arg 0) should be a vector of unsigned with same width as value
- (arg 2). */
- if (!r.require_matching_vector_type (0, base_type))
- return error_mark_node;
+ if (r.mode_suffix_id == MODE_none)
+ {
+ /* Base (arg 0) should be a vector of unsigned with same width as value
+ (arg 2). */
+ if (!r.require_matching_vector_type (0, base_type))
+ return error_mark_node;
+ }
+ else
+ {
+ /* Base (arg 0) should be a pointer to a vector of unsigned with the
+ same width as value (arg 2). */
+ if (!r.require_pointer_to_type (0, r.get_vector_type (base_type)))
+ return error_mark_node;
+ }
return r.resolve_to (r.mode_suffix_id, type);
}
@@ -1655,6 +1655,31 @@ function_resolver::require_pointer_type (unsigned int argno)
return true;
}
+/* Require argument ARGNO to be a pointer to EXPECTED type. Return true on
+ success, otherwise report an error and return false. */
+bool
+function_resolver::require_pointer_to_type (unsigned int argno, tree expected)
+{
+ tree actual = get_argument_type (argno);
+ if (TREE_CODE (actual) != POINTER_TYPE)
+ {
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects a pointer type", actual, argno + 1, fndecl);
+ return false;
+ }
+
+ tree target = TREE_TYPE (actual);
+ if (target != expected)
+ {
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects a pointer to %qT", actual, argno + 1, fndecl,
+ expected);
+ return false;
+ }
+
+ return true;
+}
+
/* Require the function to have exactly EXPECTED arguments. Return true
if it does, otherwise report an appropriate error. */
bool
@@ -398,6 +398,7 @@ public:
unsigned int = SAME_SIZE);
bool require_scalar_type (unsigned int, const char *);
bool require_pointer_type (unsigned int);
+ bool require_pointer_to_type (unsigned int, tree);
bool require_integer_immediate (unsigned int);
bool require_derived_scalar_type (unsigned int, type_class_index,
unsigned int = SAME_SIZE);
@@ -57,10 +57,6 @@
#define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset)
#define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p)
#define vuninitializedq(__v) __arm_vuninitializedq(__v)
-#define vstrdq_scatter_base_wb(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb(__addr, __offset, __value)
-#define vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb(__addr, __offset, __value)
#define vst2q(__addr, __value) __arm_vst2q(__addr, __value)
#define vld2q(__addr) __arm_vld2q(__addr)
#define vld4q(__addr) __arm_vld4q(__addr)
@@ -159,16 +155,6 @@
#define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p)
#define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p)
#define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p)
-#define vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_wb_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_s64(__addr, __offset, __value)
-#define vstrdq_scatter_base_wb_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_u64(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_s32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_s32(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_u32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_u32(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_f32(__addr, __offset, __value)
#define vst2q_s8(__addr, __value) __arm_vst2q_s8(__addr, __value)
#define vst2q_u8(__addr, __value) __arm_vst2q_u8(__addr, __value)
#define vld2q_s8(__addr) __arm_vld2q_s8(__addr)
@@ -749,62 +735,6 @@ __arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_
return result;
}
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value)
-{
- *__addr = __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
-{
- *__addr = __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- *__addr = __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- *__addr = __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
- *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value)
-{
- *__addr = __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
-{
- *__addr = __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
-}
-
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vst2q_s8 (int8_t * __addr, int8x16x2_t __value)
@@ -1325,20 +1255,6 @@ __arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_
return result;
}
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value)
-{
- *__addr = __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
- *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
-}
-
__extension__ extern __inline float16x8x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vld4q_f16 (float16_t const * __addr)
@@ -1779,62 +1695,6 @@ __arm_vldrwq_gather_shifted_offset_z (uint32_t const * __base, uint32x4_t __offs
return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p);
}
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, int64x2_t __value)
-{
- __arm_vstrdq_scatter_base_wb_s64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
-{
- __arm_vstrdq_scatter_base_wb_u64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_wb_p_s64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_wb_p_u64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_s32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_u32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, int32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_s32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_u32 (__addr, __offset, __value);
-}
-
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vst2q (int8_t * __addr, int8x16x2_t __value)
@@ -2145,20 +2005,6 @@ __arm_vldrwq_gather_shifted_offset_z (float32_t const * __base, uint32x4_t __off
return __arm_vldrwq_gather_shifted_offset_z_f32 (__base, __offset, __p);
}
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, float32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_f32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_f32 (__addr, __offset, __value, __p);
-}
-
__extension__ extern __inline float16x8x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vld4q (float16_t const * __addr)
@@ -2654,18 +2500,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_float16x8_t]: __arm_vuninitializedq_f16 (), \
int (*)[__ARM_mve_type_float32x4_t]: __arm_vuninitializedq_f32 ());})
-#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
- int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
- int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
#define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
_Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -2695,16 +2529,6 @@ extern void *__ARM_undef;
#else /* MVE Integer. */
-#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
- int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
- int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
#define __arm_vst4q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x4_t]: __arm_vst4q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x4_t)), \
@@ -2834,17 +2658,6 @@ extern void *__ARM_undef;
#endif /* MVE Integer. */
-
-#define __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
- int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
- _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
- int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
- int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
#define __arm_vldrdq_gather_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \
int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1)))
@@ -709,16 +709,6 @@ VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di)
VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di)
VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si)
VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
-VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
-VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di)
-VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si)
-VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf)
-VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di)
-VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_s, v4si)
-VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_f, v4sf)
-VAR1 (STRSBWBS_P, vstrdq_scatter_base_wb_p_s, v2di)
VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si)
VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di)
VAR1 (LDRGBWBU, vldrwq_gather_base_nowb_u, v4si)
@@ -2535,10 +2535,9 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
(VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
(VLDRWQGSO_S "s") (VLDRWQGSO_U "u")
(VSTRDQSB_S "s") (VSTRDQSB_U "u")
- (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u")
(VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
- (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s")
- (VSTRDQSBWB_U "u") (VSBCQ_U "u") (VSBCQ_M_U "u")
+ (VLDRDQGBWB_U "u") (VADCQ_M_S "s")
+ (VSBCQ_U "u") (VSBCQ_M_U "u")
(VSBCQ_S "s") (VSBCQ_M_S "s") (VSBCIQ_U "u")
(VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s")
(VADCQ_U "u") (VADCQ_M_U "u") (VADCQ_S "s")
@@ -2945,10 +2944,7 @@ (define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
-(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
-(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U])
(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
-(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
(define_int_iterator VxCIQ [VADCIQ_U VADCIQ_S VSBCIQ_U VSBCIQ_S])
(define_int_iterator VxCIQ_M [VADCIQ_M_U VADCIQ_M_S VSBCIQ_M_U VSBCIQ_M_S])
@@ -4293,163 +4293,51 @@ (define_expand "mve_viwdupq_m_wb_u<mode>"
DONE;
})
+;; Vector scatter stores with base and write-back
;;
;; [vstrwq_scatter_base_wb_s vstrwq_scatter_base_wb_u]
+;; [vstrwq_scatter_base_wb_f]
+;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
;;
-(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si"
+(define_insn "@mve_vstrq_scatter_base_wb_<mode>"
[(set (mem:BLK (scratch))
(unspec:BLK
- [(match_operand:V4SI 1 "s_register_operand" "0")
+ [(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "0")
(match_operand:SI 2 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 3 "s_register_operand" "w")]
- VSTRWSBWBQ))
- (set (match_operand:V4SI 0 "s_register_operand" "=w")
- (unspec:V4SI [(match_dup 1) (match_dup 2)]
- VSTRWSBWBQ))
+ (match_operand:MVE_4 3 "s_register_operand" "w")]
+ VSTRSBWBQ))
+ (set (match_operand:<MVE_scatter_offset> 0 "s_register_operand" "=w")
+ (unspec:<MVE_scatter_offset> [(match_dup 1) (match_dup 2)]
+ VSTRSBWBQ))
]
- "TARGET_HAVE_MVE"
-{
- rtx ops[3];
- ops[0] = operands[1];
- ops[1] = operands[2];
- ops[2] = operands[3];
- output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
- return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
+ "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+ || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+ "vstr<MVE_elem_ch>.u<V_sz_elem>\t%q3, [%q1, %2]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>"))
(set_attr "length" "4")])
+;; Predicated vector scatter stores with base and write-back
;;
;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u]
-;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
- [(set (mem:BLK (scratch))
- (unspec:BLK
- [(match_operand:V4SI 1 "s_register_operand" "0")
- (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SI 3 "s_register_operand" "w")
- (match_operand:V4BI 4 "vpr_register_operand" "Up")]
- VSTRWSBWBQ))
- (set (match_operand:V4SI 0 "s_register_operand" "=w")
- (unspec:V4SI [(match_dup 1) (match_dup 2)]
- VSTRWSBWBQ))
- ]
- "TARGET_HAVE_MVE"
-{
- rtx ops[3];
- ops[0] = operands[1];
- ops[1] = operands[2];
- ops[2] = operands[3];
- output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops);
- return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
- (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_base_wb_f]
-;;
-(define_insn "mve_vstrwq_scatter_base_wb_fv4sf"
- [(set (mem:BLK (scratch))
- (unspec:BLK
- [(match_operand:V4SI 1 "s_register_operand" "0")
- (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
- (match_operand:V4SF 3 "s_register_operand" "w")]
- VSTRWQSBWB_F))
- (set (match_operand:V4SI 0 "s_register_operand" "=w")
- (unspec:V4SI [(match_dup 1) (match_dup 2)]
- VSTRWQSBWB_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
- rtx ops[3];
- ops[0] = operands[1];
- ops[1] = operands[2];
- ops[2] = operands[3];
- output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
- return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
- (set_attr "length" "4")])
-
-;;
;; [vstrwq_scatter_base_wb_p_f]
+;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf"
+(define_insn "@mve_vstrq_scatter_base_wb_p_<mode>"
[(set (mem:BLK (scratch))
(unspec:BLK
- [(match_operand:V4SI 1 "s_register_operand" "0")
- (match_operand:SI 2 "mve_vstrw_immediate" "Rl")
- (match_operand:V4SF 3 "s_register_operand" "w")
- (match_operand:V4BI 4 "vpr_register_operand" "Up")]
- VSTRWQSBWB_F))
- (set (match_operand:V4SI 0 "s_register_operand" "=w")
- (unspec:V4SI [(match_dup 1) (match_dup 2)]
- VSTRWQSBWB_F))
- ]
- "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
- rtx ops[3];
- ops[0] = operands[1];
- ops[1] = operands[2];
- ops[2] = operands[3];
- output_asm_insn ("vpst\;vstrwt.u32\t%q2, [%q0, %1]!",ops);
- return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
- (set_attr "length" "8")])
-
-;;
-;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di"
- [(set (mem:BLK (scratch))
- (unspec:BLK
- [(match_operand:V2DI 1 "s_register_operand" "0")
+ [(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "0")
(match_operand:SI 2 "mve_vldrd_immediate" "Ri")
- (match_operand:V2DI 3 "s_register_operand" "w")]
- VSTRDSBWBQ))
- (set (match_operand:V2DI 0 "s_register_operand" "=&w")
- (unspec:V2DI [(match_dup 1) (match_dup 2)]
- VSTRDSBWBQ))
+ (match_operand:MVE_4 3 "s_register_operand" "w")
+ (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
+ VSTRSBWBQ_P))
+ (set (match_operand:<MVE_scatter_offset> 0 "s_register_operand" "=w")
+ (unspec:<MVE_scatter_offset> [(match_dup 1) (match_dup 2)]
+ VSTRSBWBQ_P))
]
- "TARGET_HAVE_MVE"
-{
- rtx ops[3];
- ops[0] = operands[1];
- ops[1] = operands[2];
- ops[2] = operands[3];
- output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops);
- return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
- (set_attr "length" "4")])
-
-;;
-;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
- [(set (mem:BLK (scratch))
- (unspec:BLK
- [(match_operand:V2DI 1 "s_register_operand" "0")
- (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
- (match_operand:V2DI 3 "s_register_operand" "w")
- (match_operand:V2QI 4 "vpr_register_operand" "Up")]
- VSTRDSBWBQ))
- (set (match_operand:V2DI 0 "s_register_operand" "=w")
- (unspec:V2DI [(match_dup 1) (match_dup 2)]
- VSTRDSBWBQ))
- ]
- "TARGET_HAVE_MVE"
-{
- rtx ops[3];
- ops[0] = operands[1];
- ops[1] = operands[2];
- ops[2] = operands[3];
- output_asm_insn ("vpst\;vstrdt.u64\t%q2, [%q0, %1]!",ops);
- return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
+ "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+ || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+ "vpst\;\tvstr<MVE_elem_ch>t.u<V_sz_elem>\t%q3, [%q1, %2]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>"))
(set_attr "length" "8")])
(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si"
@@ -1197,14 +1197,11 @@ (define_c_enum "unspec" [
VIDUPQ_M
VIWDUPQ
VIWDUPQ_M
- VSTRWQSBWB_S
- VSTRWQSBWB_U
+ VSTRSBWBQ
+ VSTRSBWBQ_P
VLDRWQGBWB_S
VLDRWQGBWB_U
- VSTRWQSBWB_F
VLDRWQGBWB_F
- VSTRDQSBWB_S
- VSTRDQSBWB_U
VLDRDQGBWB_S
VLDRDQGBWB_U
VADCQ_U