@@ -323,11 +323,11 @@ m32c*-*-*)
;;
aarch64*-*-*)
cpu_type=aarch64
- extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h"
+ extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h"
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
d_target_objs="aarch64-d.o"
- extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o"
+ extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o"
target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.cc \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc"
target_has_targetm_common=yes
;;
@@ -288,6 +288,8 @@ aarch64_pragma_aarch64 (cpp_reader *)
const char *name = TREE_STRING_POINTER (x);
if (strcmp (name, "arm_sve.h") == 0)
aarch64_sve::handle_arm_sve_h ();
+ else if (strcmp (name, "arm_sme.h") == 0)
+ aarch64_sve::handle_arm_sme_h ();
else if (strcmp (name, "arm_neon.h") == 0)
handle_arm_neon_h ();
else if (strcmp (name, "arm_acle.h") == 0)
@@ -131,6 +131,10 @@ AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (),
AARCH64_OPT_EXTENSION("sme", SME, (SVE2), (), (), "sme")
+AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "")
+
+AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "")
+
AARCH64_OPT_EXTENSION("tme", TME, (), (), (), "")
AARCH64_OPT_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm")
@@ -808,7 +808,11 @@ bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
int aarch64_add_offset_temporaries (rtx);
void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
bool aarch64_rdsvl_immediate_p (const_rtx);
+rtx aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT,
+ aarch64_feature_flags);
char *aarch64_output_rdsvl (const_rtx);
+bool aarch64_addsvl_addspl_immediate_p (const_rtx);
+char *aarch64_output_addsvl_addspl (rtx);
bool aarch64_mov_operand_p (rtx, machine_mode);
rtx aarch64_reverse_mask (machine_mode, unsigned int);
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
@@ -851,6 +855,7 @@ bool aarch64_uimm12_shift (HOST_WIDE_INT);
int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
bool aarch64_use_return_insn_p (void);
const char *aarch64_output_casesi (rtx *);
+const char *aarch64_output_sme_zero (rtx);
arm_pcs aarch64_tlsdesc_abi_id ();
enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT);
@@ -865,7 +870,6 @@ int aarch64_uxt_size (int, HOST_WIDE_INT);
int aarch64_vec_fpconst_pow_of_2 (rtx);
rtx aarch64_eh_return_handler_rtx (void);
rtx aarch64_mask_from_zextract_ops (rtx, rtx);
-const char *aarch64_output_move_struct (rtx *operands);
rtx aarch64_return_addr_rtx (void);
rtx aarch64_return_addr (int, rtx);
rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
@@ -879,6 +883,7 @@ bool aarch64_sve_ldnf1_operand_p (rtx);
bool aarch64_sve_ldr_operand_p (rtx);
bool aarch64_sve_prefetch_operand_p (rtx, machine_mode);
bool aarch64_sve_struct_memory_operand_p (rtx);
+bool aarch64_sme_ldr_vnum_offset_p (rtx, rtx);
rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool);
rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int);
bool aarch64_stepped_int_parallel_p (rtx, int);
@@ -996,6 +1001,7 @@ void handle_arm_neon_h (void);
namespace aarch64_sve {
void init_builtins ();
void handle_arm_sve_h ();
+ void handle_arm_sme_h ();
tree builtin_decl (unsigned, bool);
bool builtin_type_p (const_tree);
bool builtin_type_p (const_tree, unsigned int *, unsigned int *);
@@ -24,6 +24,18 @@
;; ---- Test current state
;; ---- PSTATE.SM management
;; ---- PSTATE.ZA management
+;;
+;; == Loads, stores and moves
+;; ---- Single-vector loads
+;; ---- Single-vector stores
+;; ---- Single-vector moves
+;; ---- Zeroing
+;;
+;; == Unary operations
+;; ---- Single vector input
+;;
+;; == Binary operations
+;; ---- Sum of outer products
;; =========================================================================
;; == State management
@@ -269,3 +281,326 @@ (define_insn_and_split "aarch64_restore_za"
DONE;
}
)
+;; =========================================================================
+;; == Loads, stores and moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Single-vector loads
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LD1
+;; - LDR
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [
+ UNSPEC_SME_LDR
+])
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (reg:SME_ZA_I ZA_REGNUM)
+ (unspec:SME_ZA_I
+ [(reg:SME_ZA_I ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:SI 1 "register_operand" "Uci")
+ (match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")]
+ SME_LD1))]
+ "TARGET_STREAMING_SME"
+ "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, 0] }, %2/z, %3"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+ [(set (reg:SME_ZA_I ZA_REGNUM)
+ (unspec:SME_ZA_I
+ [(reg:SME_ZA_I ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (plus:SI (match_operand:SI 1 "register_operand" "Uci")
+ (match_operand:SI 2 "const_int_operand"))
+ (match_operand:<VPRED> 3 "register_operand" "Upl")
+ (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")]
+ SME_LD1))]
+ "TARGET_STREAMING_SME
+ && IN_RANGE (UINTVAL (operands[2]), 0,
+ 15 / GET_MODE_UNIT_SIZE (<MODE>mode))"
+ "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, %2] }, %3/z, %4"
+)
+
+(define_insn "aarch64_sme_ldr0"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI
+ [(reg:VNx16QI ZA_REGNUM)
+ (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:VNx16QI 1 "aarch64_sync_memory_operand" "Q")]
+ UNSPEC_SME_LDR))]
+ "TARGET_SME"
+ "ldr\tza[%w0, 0], %1"
+)
+
+(define_insn "*aarch64_sme_ldrn<mode>"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI
+ [(reg:VNx16QI ZA_REGNUM)
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_int_operand"))
+ (mem:VNx16QI
+ (plus:P (match_operand:P 2 "register_operand" "rk")
+ (match_operand 3)))]
+ UNSPEC_SME_LDR))]
+ "TARGET_SME
+ && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
+ "ldr\tza[%w0, %1], [%2, #%1, mul vl]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Single-vector stores
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ST1
+;; - STR
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [
+ UNSPEC_SME_STR
+])
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
+ (unspec:SME_ZA_I
+ [(match_dup 0)
+ (match_operand:DI 1 "const_int_operand")
+ (match_operand:SI 2 "register_operand" "Uci")
+ (match_operand:<VPRED> 3 "register_operand" "Upl")
+ (reg:SME_ZA_I ZA_REGNUM)]
+ SME_ST1))]
+ "TARGET_STREAMING_SME"
+ "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, 0] }, %3/z, %0"
+)
+
+(define_insn "*aarch64_sme_<optab><mode>_plus"
+ [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
+ (unspec:SME_ZA_I
+ [(match_dup 0)
+ (match_operand:DI 1 "const_int_operand")
+ (plus:SI (match_operand:SI 2 "register_operand" "Uci")
+ (match_operand:SI 3 "const_int_operand"))
+ (match_operand:<VPRED> 4 "register_operand" "Upl")
+ (reg:SME_ZA_I ZA_REGNUM)]
+ SME_ST1))]
+ "TARGET_STREAMING_SME
+ && IN_RANGE (UINTVAL (operands[3]), 0,
+ 15 / GET_MODE_UNIT_SIZE (<MODE>mode))"
+ "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, %3] }, %4/z, %0"
+)
+
+(define_insn "aarch64_sme_str0"
+ [(set (match_operand:VNx16QI 0 "aarch64_sync_memory_operand" "+Q")
+ (unspec:VNx16QI
+ [(match_dup 0)
+ (match_operand:SI 1 "register_operand" "Uci")
+ (reg:VNx16QI ZA_REGNUM)]
+ UNSPEC_SME_STR))]
+ "TARGET_SME"
+ "str\tza[%w1, 0], %0"
+)
+
+(define_insn "*aarch64_sme_strn<mode>"
+ [(set (mem:VNx16QI
+ (plus:P (match_operand:P 2 "register_operand" "rk")
+ (match_operand 3)))
+ (unspec:VNx16QI
+ [(mem:VNx16QI (plus:P (match_dup 2) (match_dup 3)))
+ (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+ (match_operand:SI 1 "const_int_operand"))
+ (reg:VNx16QI ZA_REGNUM)]
+ UNSPEC_SME_STR))]
+ "TARGET_SME
+ && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
+ "str\tza[%w0, %1], [%2, #%1, mul vl]"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Single-vector moves
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOVA
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 1 "register_operand" "0")
+ (match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:SI 4 "register_operand" "Uci")
+ (reg:<V_INT_CONTAINER> ZA_REGNUM)]
+ SME_READ))]
+ "TARGET_STREAMING_SME"
+ "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, 0]"
+)
+
+(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 1 "register_operand" "0")
+ (match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:DI 3 "const_int_operand")
+ (plus:SI (match_operand:SI 4 "register_operand" "Uci")
+ (match_operand:SI 5 "const_int_operand"))
+ (reg:<V_INT_CONTAINER> ZA_REGNUM)]
+ SME_READ))]
+ "TARGET_STREAMING_SME
+ && IN_RANGE (UINTVAL (operands[5]), 0,
+ 15 / GET_MODE_UNIT_SIZE (<MODE>mode))"
+ "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, %5]"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+ (unspec:SVE_FULL
+ [(match_operand:SVE_FULL 1 "register_operand" "0")
+ (match_operand:VNx2BI 2 "register_operand" "Upl")
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:SI 4 "register_operand" "Uci")
+ (reg:VNx1TI_ONLY ZA_REGNUM)]
+ SME_READ))]
+ "TARGET_STREAMING_SME"
+ "mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]"
+)
+
+(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
+ [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (unspec:<V_INT_CONTAINER>
+ [(reg:SVE_FULL ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:SI 1 "register_operand" "Uci")
+ (match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_FULL 3 "register_operand" "w")]
+ SME_WRITE))]
+ "TARGET_STREAMING_SME"
+ "mova\tza%0<hv>.<Vetype>[%w1, 0], %2/m, %3.<Vetype>"
+)
+
+(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
+ [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
+ (unspec:<V_INT_CONTAINER>
+ [(reg:SVE_FULL ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (plus:SI (match_operand:SI 1 "register_operand" "Uci")
+ (match_operand:SI 2 "const_int_operand"))
+ (match_operand:<VPRED> 3 "register_operand" "Upl")
+ (match_operand:SVE_FULL 4 "register_operand" "w")]
+ SME_WRITE))]
+ "TARGET_STREAMING_SME
+ && IN_RANGE (UINTVAL (operands[2]), 0,
+ 15 / GET_MODE_UNIT_SIZE (<MODE>mode))"
+ "mova\tza%0<hv>.<Vetype>[%w1, %2], %3/m, %4.<Vetype>"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
+ [(set (reg:VNx1TI_ONLY ZA_REGNUM)
+ (unspec:VNx1TI_ONLY
+ [(reg:VNx1TI_ONLY ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:SI 1 "register_operand" "Uci")
+ (match_operand:VNx2BI 2 "register_operand" "Upl")
+ (match_operand:SVE_FULL 3 "register_operand" "w")]
+ SME_WRITE))]
+ "TARGET_STREAMING_SME"
+ "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Zeroing
+;; -------------------------------------------------------------------------
+;; Includes
+;; - ZERO
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [UNSPEC_SME_ZERO])
+
+(define_insn "aarch64_sme_zero"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI [(reg:VNx16QI ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")]
+ UNSPEC_SME_ZERO))]
+ "TARGET_SME"
+ {
+ return aarch64_output_sme_zero (operands[0]);
+ }
+)
+
+;; =========================================================================
+;; == Unary operations
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Single vector input
+;; -------------------------------------------------------------------------
+;; Includes
+;; - ADDHA
+;; - ADDVA
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (reg:SME_ZA_SDI ZA_REGNUM)
+ (unspec:SME_ZA_SDI
+ [(reg:SME_ZA_SDI ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SME_ZA_SDI 3 "register_operand" "w")]
+ SME_UNARY_SDI))]
+ "TARGET_STREAMING_SME"
+ "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>"
+)
+
+;; =========================================================================
+;; == Binary operations
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Sum of outer products
+;; -------------------------------------------------------------------------
+;; Includes
+;; - BFMOPA
+;; - BFMOPS
+;; - FMOPA
+;; - FMOPS
+;; - SMOPA
+;; - SMOPS
+;; - SUMOPA
+;; - SUMOPS
+;; - UMOPA
+;; - UMOPS
+;; - USMOPA
+;; - USMOPS
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (reg:<V4xWIDE> ZA_REGNUM)
+ (unspec:<V4xWIDE>
+ [(reg:<V4xWIDE> ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:<V4xWIDE_PRED> 1 "register_operand" "Upl")
+ (match_operand:<V4xWIDE_PRED> 2 "register_operand" "Upl")
+ (match_operand:SME_MOP_BHI 3 "register_operand" "w")
+ (match_operand:SME_MOP_BHI 4 "register_operand" "w")]
+ SME_INT_MOP))]
+ "TARGET_STREAMING_SME"
+ "<optab>\tza%0.<V4xwetype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>"
+)
+
+(define_insn "@aarch64_sme_<optab><mode>"
+ [(set (reg:<SME_FMOP_WIDE> ZA_REGNUM)
+ (unspec:<SME_FMOP_WIDE>
+ [(reg:<SME_FMOP_WIDE> ZA_REGNUM)
+ (match_operand:DI 0 "const_int_operand")
+ (match_operand:<SME_FMOP_WIDE_PRED> 1 "register_operand" "Upl")
+ (match_operand:<SME_FMOP_WIDE_PRED> 2 "register_operand" "Upl")
+ (match_operand:SME_MOP_HSDF 3 "register_operand" "w")
+ (match_operand:SME_MOP_HSDF 4 "register_operand" "w")]
+ SME_FP_MOP))]
+ "TARGET_STREAMING_SME"
+ "<b><optab>\tza%0.<sme_fmop_wide_etype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>"
+)
@@ -2332,10 +2332,21 @@ class svundef_impl : public quiet<multi_vector_function>
public:
using quiet<multi_vector_function>::quiet;
+ unsigned int
+ call_properties (const function_instance &fi) const override
+ {
+ auto base = quiet<multi_vector_function>::call_properties (fi);
+ if (fi.type_suffix (0).za_p)
+ base |= CP_WRITE_ZA;
+ return base;
+ }
+
rtx
expand (function_expander &e) const override
{
- rtx target = e.get_reg_target ();
+ rtx target = (e.type_suffix (0).za_p
+ ? gen_rtx_REG (VNx16QImode, ZA_REGNUM)
+ : e.get_reg_target ());
emit_clobber (copy_rtx (target));
return target;
}
@@ -39,6 +39,36 @@ public:
}
};
+/* Wrap T, which is derived from function_base, and indicate that the
+ function reads from and writes to ZA. */
+template<typename T>
+class read_write_za : public T
+{
+public:
+ using T::T;
+
+ unsigned int
+ call_properties (const function_instance &fi) const override
+ {
+ return T::call_properties (fi) | CP_READ_ZA | CP_WRITE_ZA;
+ }
+};
+
+/* Wrap T, which is derived from function_base, and indicate that the
+ function writes to ZA (but does not read from it). */
+template<typename T>
+class write_za : public T
+{
+public:
+ using T::T;
+
+ unsigned int
+ call_properties (const function_instance &fi) const override
+ {
+ return T::call_properties (fi) | CP_WRITE_ZA;
+ }
+};
+
/* A function_base that sometimes or always operates on tuples of
vectors. */
class multi_vector_function : public function_base
@@ -348,6 +378,15 @@ typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub>
typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane>
unspec_based_sub_lane_function;
+/* General SME unspec-based functions. */
+typedef unspec_based_function_exact_insn<code_for_aarch64_sme>
+ unspec_based_sme_function;
+
+/* SME functions that read from and write to ZA. */
+typedef read_write_za<unspec_based_sme_function> za_arith_function;
+typedef read_write_za<quiet<unspec_based_sme_function>>
+ quiet_za_arith_function;
+
/* A function that acts like unspec_based_function_exact_insn<INT_CODE>
when operating on integers, but that expands to an (fma ...)-style
aarch64_sve* operation when applied to floats. */
@@ -59,7 +59,10 @@ static void
apply_predication (const function_instance &instance, tree return_type,
vec<tree> &argument_types)
{
- if (instance.pred != PRED_none)
+ /* There are currently no SME ZA instructions that have both merging and
+ unpredicated forms, so for simplicity, the predicates are always included
+ in the original format string. */
+ if (instance.pred != PRED_none && instance.pred != PRED_za_m)
{
argument_types.quick_insert (0, get_svbool_t ());
/* For unary merge operations, the first argument is a vector with
@@ -584,6 +587,32 @@ struct binary_imm_long_base : public overloaded_base<0>
}
};
+template<type_class_index TCLASS = function_resolver::SAME_TYPE_CLASS,
+ unsigned int BITS = function_resolver::SAME_SIZE>
+struct binary_za_m_base : public overloaded_base<1>
+{
+ tree
+ resolve (function_resolver &r) const override
+ {
+ type_suffix_index type;
+ if (!r.check_num_arguments (5)
+ || !r.require_integer_immediate (0)
+ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t)
+ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t)
+ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES
+ || !r.require_derived_vector_type (4, 3, type, TCLASS, BITS))
+ return error_mark_node;
+
+ return r.resolve_to (type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+ }
+};
+
/* Base class for inc_dec and inc_dec_pat. */
struct inc_dec_base : public overloaded_base<0>
{
@@ -1571,6 +1600,61 @@ struct binary_wide_opt_n_def : public overloaded_base<0>
};
SHAPE (binary_wide_opt_n)
+/* void svfoo_t0[_t1](uint64_t, svbool_t, svbool_t, sv<t1>_t,
+ sv<t1:int>_t). */
+struct binary_za_int_m_def : public binary_za_m_base<TYPE_signed>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su64,vp,vp,t1,ts1", group, MODE_none);
+ }
+};
+SHAPE (binary_za_int_m)
+
+/* void svfoo_t0[_t1](uint64_t, svbool_t, svbool_t, sv<t1>_t, sv<t1>_t). */
+struct binary_za_m_def : public binary_za_m_base<>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ /* Allow the overloaded form to be specified seperately, with just
+ a single suffix. This is necessary for the 64-bit SME MOP intrinsics,
+ which have some forms dependent on FEAT_SME_I16I64 and some forms
+ dependent on FEAT_SME_F64F64. The resolver needs to be defined
+ for base SME. */
+ if (group.types[0][1] != NUM_TYPE_SUFFIXES)
+ build_all (b, "_,su64,vp,vp,t1,t1", group, MODE_none);
+ }
+};
+SHAPE (binary_za_m)
+
+/* void svfoo_t0[_t1](uint64_t, svbool_t, svbool_t, sv<t1>_t,
+ sv<t1:uint>_t). */
+struct binary_za_uint_m_def : public binary_za_m_base<TYPE_unsigned>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su64,vp,vp,t1,tu1", group, MODE_none);
+ }
+};
+SHAPE (binary_za_uint_m)
+
+/* bool svfoo(). */
+struct bool_inherent_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "sp", group, MODE_none);
+ }
+};
+SHAPE (bool_inherent)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t)
<t0>_t svfoo[_n_t0](<t0>_t, sv<t0>_t). */
struct clast_def : public overloaded_base<0>
@@ -2050,6 +2134,41 @@ struct inherent_b_def : public overloaded_base<0>
};
SHAPE (inherent_b)
+/* void svfoo_t0(). */
+struct inherent_za_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_", group, MODE_none);
+ }
+};
+SHAPE (inherent_za)
+
+/* void svfoo_t0(uint64_t). */
+struct inherent_mask_za_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su64", group, MODE_none);
+ }
+};
+SHAPE (inherent_mask_za)
+
+/* void svfoo_t0(uint32_t, const void *)
+ void svfoo_vnum_t0(uint32_t, const void *, int64_t). */
+struct ldr_za_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su32,al", group, MODE_none);
+ build_all (b, "_,su32,al,ss64", group, MODE_vnum);
+ }
+};
+SHAPE (ldr_za)
+
/* sv<t0>[xN]_t svfoo[_t0](const <t0>_t *)
sv<t0>[xN]_t svfoo_vnum[_t0](const <t0>_t *, int64_t). */
struct load_def : public load_contiguous_base
@@ -2260,6 +2379,27 @@ struct load_replicate_def : public load_contiguous_base
};
SHAPE (load_replicate)
+/* void svfoo_t0(uint64_t, uint32_t, svbool_t, const void *)
+ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, const void *, int64_t)
+
+ where the first two fields form a (ZA tile, slice) pair. */
+struct load_za_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su64,su32,vp,al", group, MODE_none);
+ build_all (b, "_,su64,su32,vp,al,ss64", group, MODE_vnum);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+ }
+};
+SHAPE (load_za)
+
/* svbool_t svfoo(enum svpattern). */
struct pattern_pred_def : public nonoverloaded_base
{
@@ -2354,6 +2494,46 @@ struct rdffr_def : public nonoverloaded_base
};
SHAPE (rdffr)
+/* sv<t1>_t svfoo_t0[_t1](uint64_t, uint32_t). */
+struct read_za_def : public overloaded_base<1>
+{
+ bool
+ has_merge_argument_p (const function_instance &, unsigned int) const override
+ {
+ return true;
+ }
+
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "t1,su64,su32", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ gcc_assert (r.pred == PRED_m);
+ type_suffix_index type;
+ if (!r.check_num_arguments (4)
+ || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t)
+ || !r.require_integer_immediate (2)
+ || !r.require_scalar_type (3, "uint32_t"))
+ return error_mark_node;
+
+ return r.resolve_to (type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ gcc_assert (c.pred == PRED_m);
+ return c.require_immediate_range (1, 0, c.num_za_tiles () - 1);
+ }
+};
+SHAPE (read_za)
+
/* <t0>_t svfoo[_t0](sv<t0>_t). */
struct reduction_def : public overloaded_base<0>
{
@@ -2694,6 +2874,40 @@ struct store_scatter_offset_restricted_def : public store_scatter_base
};
SHAPE (store_scatter_offset_restricted)
+/* void svfoo_t0(uint64_t, uint32_t, svbool_t, void *)
+ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, void *, int64_t)
+
+ where the first two fields form a (ZA tile, slice) pair. */
+struct store_za_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su64,su32,vp,as", group, MODE_none);
+ build_all (b, "_,su64,su32,vp,as,ss64", group, MODE_vnum);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+ }
+};
+SHAPE (store_za)
+
+/* void svfoo_t0(uint32_t, void *)
+ void svfoo_vnum_t0(uint32_t, void *, int64_t). */
+struct str_za_def : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ build_all (b, "_,su32,as", group, MODE_none);
+ build_all (b, "_,su32,as,ss64", group, MODE_vnum);
+ }
+};
+SHAPE (str_za)
+
/* sv<t0>_t svfoo[_t0](sv<t0>xN_t, sv<t0:uint>_t). */
struct tbl_tuple_def : public overloaded_base<0>
{
@@ -3454,4 +3668,68 @@ struct unary_widen_def : public overloaded_base<0>
};
SHAPE (unary_widen)
+/* void svfoo_t0[_t1](uint64_t, svbool_t, svbool_t, sv<t1>_t). */
+struct unary_za_m_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su64,vp,vp,t1", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ type_suffix_index type;
+ if (!r.check_num_arguments (4)
+ || !r.require_integer_immediate (0)
+ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t)
+ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t)
+ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ return r.resolve_to (type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+ }
+};
+SHAPE (unary_za_m)
+
+/* void svfoo_t0[_t1](uint64_t, uint32_t, svbool_t, sv<t1>_t). */
+struct write_za_def : public overloaded_base<1>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "_,su64,su32,vp,t1", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ type_suffix_index type;
+ if (!r.check_num_arguments (4)
+ || !r.require_integer_immediate (0)
+ || !r.require_scalar_type (1, "uint32_t")
+ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t)
+ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES)
+ return error_mark_node;
+
+ return r.resolve_to (type);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1);
+ }
+};
+SHAPE (write_za)
+
}
@@ -93,6 +93,10 @@ namespace aarch64_sve
extern const function_shape *const binary_uint64_opt_n;
extern const function_shape *const binary_wide;
extern const function_shape *const binary_wide_opt_n;
+ extern const function_shape *const binary_za_int_m;
+ extern const function_shape *const binary_za_m;
+ extern const function_shape *const binary_za_uint_m;
+ extern const function_shape *const bool_inherent;
extern const function_shape *const clast;
extern const function_shape *const compare;
extern const function_shape *const compare_opt_n;
@@ -114,6 +118,9 @@ namespace aarch64_sve
extern const function_shape *const inc_dec_pred_scalar;
extern const function_shape *const inherent;
extern const function_shape *const inherent_b;
+ extern const function_shape *const inherent_za;
+ extern const function_shape *const inherent_mask_za;
+ extern const function_shape *const ldr_za;
extern const function_shape *const load;
extern const function_shape *const load_ext;
extern const function_shape *const load_ext_gather_index;
@@ -124,6 +131,7 @@ namespace aarch64_sve
extern const function_shape *const load_gather_sv_restricted;
extern const function_shape *const load_gather_vs;
extern const function_shape *const load_replicate;
+ extern const function_shape *const load_za;
extern const function_shape *const mmla;
extern const function_shape *const pattern_pred;
extern const function_shape *const prefetch;
@@ -131,6 +139,7 @@ namespace aarch64_sve
extern const function_shape *const prefetch_gather_offset;
extern const function_shape *const ptest;
extern const function_shape *const rdffr;
+ extern const function_shape *const read_za;
extern const function_shape *const reduction;
extern const function_shape *const reduction_wide;
extern const function_shape *const set;
@@ -147,6 +156,8 @@ namespace aarch64_sve
extern const function_shape *const store_scatter_index_restricted;
extern const function_shape *const store_scatter_offset;
extern const function_shape *const store_scatter_offset_restricted;
+ extern const function_shape *const store_za;
+ extern const function_shape *const str_za;
extern const function_shape *const tbl_tuple;
extern const function_shape *const ternary_bfloat;
extern const function_shape *const ternary_bfloat_lane;
@@ -185,6 +196,8 @@ namespace aarch64_sve
extern const function_shape *const unary_to_uint;
extern const function_shape *const unary_uint;
extern const function_shape *const unary_widen;
+ extern const function_shape *const unary_za_m;
+ extern const function_shape *const write_za;
}
}
new file mode 100644
@@ -0,0 +1,351 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE2 intrinsics)
+ Copyright (C) 2020-2022 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "aarch64-sve-builtins.h"
+#include "aarch64-sve-builtins-shapes.h"
+#include "aarch64-sve-builtins-base.h"
+#include "aarch64-sve-builtins-sme.h"
+#include "aarch64-sve-builtins-functions.h"
+
+using namespace aarch64_sve;
+
+namespace {
+
+class load_store_za_base : public function_base
+{
+public:
+ tree
+ memory_scalar_type (const function_instance &) const override
+ {
+ return void_type_node;
+ }
+};
+
+class read_write_za_base : public function_base
+{
+public:
+ constexpr read_write_za_base (int unspec) : m_unspec (unspec) {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ auto za_mode = e.vector_mode (0);
+ auto z_mode = e.vector_mode (1);
+ auto icode = (za_mode == VNx1TImode
+ ? code_for_aarch64_sme (m_unspec, za_mode, z_mode)
+ : code_for_aarch64_sme (m_unspec, z_mode, z_mode));
+ return e.use_exact_insn (icode);
+ }
+
+ int m_unspec;
+};
+
+class load_za_base : public load_store_za_base
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_READ_MEMORY | CP_WRITE_ZA;
+ }
+};
+
+class store_za_base : public load_store_za_base
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_WRITE_MEMORY | CP_READ_ZA;
+ }
+};
+
+static void
+add_load_store_operand (function_expander &e, unsigned int base_argno)
+{
+ auto mode = e.vector_mode (0);
+ rtx base = e.get_contiguous_base (mode, base_argno, base_argno + 1,
+ AARCH64_FL_SM_ON);
+ auto mem = gen_rtx_MEM (mode, force_reg (Pmode, base));
+ set_mem_align (mem, BITS_PER_UNIT);
+ e.add_fixed_operand (mem);
+}
+
+class arm_has_sme_impl : public function_base
+{
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ if (TARGET_SME)
+ return f.fold_to_cstu (1);
+ return nullptr;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ if (TARGET_SME)
+ return const1_rtx;
+ emit_insn (gen_aarch64_get_sme_state ());
+ return expand_simple_binop (DImode, LSHIFTRT,
+ gen_rtx_REG (DImode, R0_REGNUM),
+ gen_int_mode (63, QImode),
+ e.possible_target, true, OPTAB_LIB_WIDEN);
+ }
+};
+
+class arm_in_streaming_mode_impl : public function_base
+{
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ if (TARGET_STREAMING)
+ return f.fold_to_cstu (1);
+ if (TARGET_NON_STREAMING)
+ return f.fold_to_cstu (0);
+ return nullptr;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ if (TARGET_STREAMING)
+ return const1_rtx;
+
+ if (TARGET_NON_STREAMING)
+ return const0_rtx;
+
+ rtx reg;
+ if (TARGET_SME)
+ {
+ reg = gen_reg_rtx (DImode);
+ emit_insn (gen_aarch64_read_svcr (reg));
+ }
+ else
+ {
+ emit_insn (gen_aarch64_get_sme_state ());
+ reg = gen_rtx_REG (DImode, R0_REGNUM);
+ }
+ return expand_simple_binop (DImode, AND, reg, gen_int_mode (1, DImode),
+ e.possible_target, true, OPTAB_LIB_WIDEN);
+ }
+};
+
+/* Implements svcnts[bhwd]. */
+class svcnts_bhwd_impl : public function_base
+{
+public:
+ constexpr svcnts_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
+
+ unsigned int
+ get_shift () const
+ {
+ return exact_log2 (GET_MODE_UNIT_SIZE (m_ref_mode));
+ }
+
+ gimple *
+ fold (gimple_folder &f) const override
+ {
+ if (TARGET_STREAMING)
+ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
+ return nullptr;
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ rtx cntsb = aarch64_sme_vq_immediate (DImode, 16, AARCH64_ISA_MODE);
+ auto shift = get_shift ();
+ if (!shift)
+ return cntsb;
+
+ return expand_simple_binop (DImode, LSHIFTRT, cntsb,
+ gen_int_mode (shift, QImode),
+ e.possible_target, true, OPTAB_LIB_WIDEN);
+ }
+
+ /* The mode of the vector associated with the [bhwd] suffix. */
+ machine_mode m_ref_mode;
+};
+
+class svld1_impl : public load_za_base
+{
+public:
+ constexpr svld1_impl (int unspec) : m_unspec (unspec) {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ auto icode = code_for_aarch64_sme (m_unspec, e.vector_mode (0));
+ for (int i = 0; i < 3; ++i)
+ e.add_input_operand (icode, e.args[i]);
+ add_load_store_operand (e, 3);
+ return e.generate_insn (icode);
+ }
+
+ int m_unspec;
+};
+
+class svldr_impl : public load_za_base
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ auto icode = CODE_FOR_aarch64_sme_ldr0;
+ e.add_input_operand (icode, e.args[0]);
+ add_load_store_operand (e, 1);
+ return e.generate_insn (icode);
+ }
+};
+
+class svread_impl : public read_write_za_base
+{
+public:
+ using read_write_za_base::read_write_za_base;
+
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_READ_ZA;
+ }
+};
+
+class svst1_impl : public store_za_base
+{
+public:
+ constexpr svst1_impl (int unspec) : m_unspec (unspec) {}
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ auto icode = code_for_aarch64_sme (m_unspec, e.vector_mode (0));
+ add_load_store_operand (e, 3);
+ for (int i = 0; i < 3; ++i)
+ e.add_input_operand (icode, e.args[i]);
+ return e.generate_insn (icode);
+ }
+
+ int m_unspec;
+};
+
+class svstr_impl : public store_za_base
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ auto icode = CODE_FOR_aarch64_sme_str0;
+ add_load_store_operand (e, 1);
+ e.add_input_operand (icode, e.args[0]);
+ return e.generate_insn (icode);
+ }
+};
+
+class svwrite_impl : public read_write_za_base
+{
+public:
+ using read_write_za_base::read_write_za_base;
+
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_WRITE_ZA;
+ }
+};
+
+class svzero_impl : public write_za<function_base>
+{
+public:
+ rtx
+ expand (function_expander &) const override
+ {
+ emit_insn (gen_aarch64_sme_zero (gen_int_mode (0xff, SImode)));
+ return const0_rtx;
+ }
+};
+
+class svzero_mask_impl : public write_za<function_base>
+{
+public:
+ rtx
+ expand (function_expander &e) const override
+ {
+ return e.use_exact_insn (CODE_FOR_aarch64_sme_zero);
+ }
+};
+
+} /* end anonymous namespace */
+
+namespace aarch64_sve {
+
+FUNCTION (arm_has_sme, arm_has_sme_impl, )
+FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, )
+FUNCTION (svaddha, za_arith_function, (UNSPEC_SME_ADDHA,
+ UNSPEC_SME_ADDHA, -1, 1))
+FUNCTION (svaddva, za_arith_function, (UNSPEC_SME_ADDVA,
+ UNSPEC_SME_ADDVA, -1, 1))
+FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode))
+FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode))
+FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode))
+FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode))
+FUNCTION (svld1_hor, svld1_impl, (UNSPEC_SME_LD1_HOR))
+FUNCTION (svld1_ver, svld1_impl, (UNSPEC_SME_LD1_VER))
+FUNCTION (svldr, svldr_impl, )
+FUNCTION (svmopa, quiet_za_arith_function, (UNSPEC_SME_SMOPA,
+ UNSPEC_SME_UMOPA,
+ UNSPEC_SME_FMOPA, 1))
+FUNCTION (svmops, quiet_za_arith_function, (UNSPEC_SME_SMOPS,
+ UNSPEC_SME_UMOPS,
+ UNSPEC_SME_FMOPS, 1))
+FUNCTION (svread_hor, svread_impl, (UNSPEC_SME_READ_HOR))
+FUNCTION (svread_ver, svread_impl, (UNSPEC_SME_READ_VER))
+FUNCTION (svst1_hor, svst1_impl, (UNSPEC_SME_ST1_HOR))
+FUNCTION (svst1_ver, svst1_impl, (UNSPEC_SME_ST1_VER))
+FUNCTION (svsumopa, quiet_za_arith_function, (UNSPEC_SME_SUMOPA, -1, -1, 1))
+FUNCTION (svsumops, quiet_za_arith_function, (UNSPEC_SME_SUMOPS, -1, -1, 1))
+FUNCTION (svusmopa, quiet_za_arith_function, (-1, UNSPEC_SME_USMOPA, -1, 1))
+FUNCTION (svusmops, quiet_za_arith_function, (-1, UNSPEC_SME_USMOPS, -1, 1))
+FUNCTION (svstr, svstr_impl, )
+FUNCTION (svwrite_hor, svwrite_impl, (UNSPEC_SME_WRITE_HOR))
+FUNCTION (svwrite_ver, svwrite_impl, (UNSPEC_SME_WRITE_VER))
+FUNCTION (svzero, svzero_impl, )
+FUNCTION (svzero_mask, svzero_mask_impl, )
+
+} /* end namespace aarch64_sve */
new file mode 100644
@@ -0,0 +1,83 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+ Copyright (C) 2022 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#define REQUIRED_EXTENSIONS 0
+DEF_SVE_FUNCTION (arm_has_sme, bool_inherent, none, none)
+DEF_SVE_FUNCTION (arm_in_streaming_mode, bool_inherent, none, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SME
+DEF_SVE_FUNCTION (svcntsb, count_inherent, none, none)
+DEF_SVE_FUNCTION (svcntsd, count_inherent, none, none)
+DEF_SVE_FUNCTION (svcntsh, count_inherent, none, none)
+DEF_SVE_FUNCTION (svcntsw, count_inherent, none, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_ZA_ON
+DEF_SVE_FUNCTION (svldr, ldr_za, za, none)
+DEF_SVE_FUNCTION (svstr, str_za, za, none)
+DEF_SVE_FUNCTION (svundef, inherent_za, za, none)
+DEF_SVE_FUNCTION (svzero, inherent_za, za, none)
+DEF_SVE_FUNCTION (svzero_mask, inherent_mask_za, za, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \
+ | AARCH64_FL_SM_ON \
+ | AARCH64_FL_ZA_ON)
+DEF_SVE_FUNCTION (svaddha, unary_za_m, s_za_integer, za_m)
+DEF_SVE_FUNCTION (svaddva, unary_za_m, s_za_integer, za_m)
+DEF_SVE_FUNCTION (svld1_hor, load_za, all_za, none)
+DEF_SVE_FUNCTION (svld1_ver, load_za, all_za, none)
+DEF_SVE_FUNCTION (svmopa, binary_za_m, mop_base, za_m)
+DEF_SVE_FUNCTION (svmopa, binary_za_m, d_za, za_m)
+DEF_SVE_FUNCTION (svmops, binary_za_m, mop_base, za_m)
+DEF_SVE_FUNCTION (svmops, binary_za_m, d_za, za_m)
+DEF_SVE_FUNCTION (svread_hor, read_za, all_za_data, m)
+DEF_SVE_FUNCTION (svread_ver, read_za, all_za_data, m)
+DEF_SVE_FUNCTION (svst1_hor, store_za, all_za, none)
+DEF_SVE_FUNCTION (svst1_ver, store_za, all_za, none)
+DEF_SVE_FUNCTION (svsumopa, binary_za_uint_m, mop_base_signed, za_m)
+DEF_SVE_FUNCTION (svsumops, binary_za_uint_m, mop_base_signed, za_m)
+DEF_SVE_FUNCTION (svusmopa, binary_za_int_m, mop_base_unsigned, za_m)
+DEF_SVE_FUNCTION (svusmops, binary_za_int_m, mop_base_unsigned, za_m)
+DEF_SVE_FUNCTION (svwrite_hor, write_za, all_za_data, za_m)
+DEF_SVE_FUNCTION (svwrite_ver, write_za, all_za_data, za_m)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \
+ | AARCH64_FL_SME_I16I64 \
+ | AARCH64_FL_SM_ON \
+ | AARCH64_FL_ZA_ON)
+DEF_SVE_FUNCTION (svaddha, unary_za_m, d_za_integer, za_m)
+DEF_SVE_FUNCTION (svaddva, unary_za_m, d_za_integer, za_m)
+DEF_SVE_FUNCTION (svmopa, binary_za_m, mop_i16i64, za_m)
+DEF_SVE_FUNCTION (svmops, binary_za_m, mop_i16i64, za_m)
+DEF_SVE_FUNCTION (svsumopa, binary_za_uint_m, mop_i16i64_signed, za_m)
+DEF_SVE_FUNCTION (svsumops, binary_za_uint_m, mop_i16i64_signed, za_m)
+DEF_SVE_FUNCTION (svusmopa, binary_za_int_m, mop_i16i64_unsigned, za_m)
+DEF_SVE_FUNCTION (svusmops, binary_za_int_m, mop_i16i64_unsigned, za_m)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \
+ | AARCH64_FL_SME_F64F64 \
+ | AARCH64_FL_SM_ON \
+ | AARCH64_FL_ZA_ON)
+DEF_SVE_FUNCTION (svmopa, binary_za_m, mop_f64f64, za_m)
+DEF_SVE_FUNCTION (svmops, binary_za_m, mop_f64f64, za_m)
+#undef REQUIRED_EXTENSIONS
new file mode 100644
@@ -0,0 +1,56 @@
+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
+ Copyright (C) 2022 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_AARCH64_SVE_BUILTINS_SME_H
+#define GCC_AARCH64_SVE_BUILTINS_SME_H
+
+namespace aarch64_sve
+{
+ namespace functions
+ {
+ extern const function_base *const arm_has_sme;
+ extern const function_base *const arm_in_streaming_mode;
+ extern const function_base *const svaddha;
+ extern const function_base *const svaddva;
+ extern const function_base *const svcntsb;
+ extern const function_base *const svcntsd;
+ extern const function_base *const svcntsh;
+ extern const function_base *const svcntsw;
+ extern const function_base *const svld1_hor;
+ extern const function_base *const svld1_ver;
+ extern const function_base *const svldr;
+ extern const function_base *const svmopa;
+ extern const function_base *const svmops;
+ extern const function_base *const svread_hor;
+ extern const function_base *const svread_ver;
+ extern const function_base *const svst1_hor;
+ extern const function_base *const svst1_ver;
+ extern const function_base *const svstr;
+ extern const function_base *const svsumopa;
+ extern const function_base *const svsumops;
+ extern const function_base *const svusmopa;
+ extern const function_base *const svusmops;
+ extern const function_base *const svwrite_hor;
+ extern const function_base *const svwrite_ver;
+ extern const function_base *const svzero;
+ extern const function_base *const svzero_mask;
+ }
+}
+
+#endif
@@ -51,6 +51,7 @@
#include "aarch64-sve-builtins.h"
#include "aarch64-sve-builtins-base.h"
#include "aarch64-sve-builtins-sve2.h"
+#include "aarch64-sve-builtins-sme.h"
#include "aarch64-sve-builtins-shapes.h"
namespace aarch64_sve {
@@ -112,6 +113,7 @@ static const char *const pred_suffixes[NUM_PREDS + 1] = {
"_m",
"_x",
"_z",
+ "_m",
""
};
@@ -136,12 +138,28 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
TYPE_##CLASS == TYPE_unsigned, \
TYPE_##CLASS == TYPE_float, \
+ TYPE_##CLASS != TYPE_bool, \
TYPE_##CLASS == TYPE_bool, \
+ false, \
+ 0, \
+ MODE },
+#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \
+ { "_" #NAME, \
+ NUM_VECTOR_TYPES, \
+ NUM_TYPE_CLASSES, \
+ BITS, \
+ BITS / BITS_PER_UNIT, \
+ false, \
+ false, \
+ false, \
+ false, \
+ false, \
+ true, \
0, \
MODE },
#include "aarch64-sve-builtins.def"
{ "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false,
- 0, VOIDmode }
+ false, false, 0, VOIDmode }
};
/* Define a TYPES_<combination> macro for each combination of type
@@ -415,6 +433,73 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
TYPES_while1 (D, b32), \
TYPES_while1 (D, b64)
+/* _za8 _za16 _za32 _za64 _za128. */
+#define TYPES_all_za(S, D) \
+ S (za8), S (za16), S (za32), S (za64), S (za128)
+
+/* _za64. */
+#define TYPES_d_za(S, D) \
+ S (za64)
+
+/* { _za8 } x { _s8 _u8 }
+
+ { _za16 } x { _bf16 _f16 _s16 _u16 }
+
+ { _za32 } x { _f32 _s32 _u32 }
+
+ { _za64 } x { _f64 _s64 _u64 }
+
+ { _za128 } x { _bf16 }
+ { _f16 _f32 _f64 }
+ { _s8 _s16 _s32 _s64 }
+ { _u8 _u16 _u32 _u64 }. */
+#define TYPES_all_za_data(S, D) \
+ D (za8, s8), D (za8, u8), \
+ D (za16, bf16), D (za16, f16), D (za16, s16), D (za16, u16), \
+ D (za32, f32), D (za32, s32), D (za32, u32), \
+ D (za64, f64), D (za64, s64), D (za64, u64), \
+ TYPES_reinterpret1 (D, za128)
+
+/* _za32 x { _s32 _u32 }. */
+#define TYPES_s_za_integer(S, D) \
+ D (za32, s32), D (za32, u32)
+
+/* _za64 x { _s64 _u64 }. */
+#define TYPES_d_za_integer(S, D) \
+ D (za64, s64), D (za64, u64)
+
+/* _za32 x { _s8 _u8 _bf16 _f16 _f32 }. */
+#define TYPES_mop_base(S, D) \
+ D (za32, s8), D (za32, u8), D (za32, bf16), D (za32, f16), D (za32, f32)
+
+/* _za32_s8. */
+#define TYPES_mop_base_signed(S, D) \
+ D (za32, s8)
+
+/* _za32_u8. */
+#define TYPES_mop_base_unsigned(S, D) \
+ D (za32, u8)
+
+/* _za64 x { _s16 _u16 }. */
+#define TYPES_mop_i16i64(S, D) \
+ D (za64, s16), D (za64, u16)
+
+/* _za64_s16. */
+#define TYPES_mop_i16i64_signed(S, D) \
+ D (za64, s16)
+
+/* _za64_u16. */
+#define TYPES_mop_i16i64_unsigned(S, D) \
+ D (za64, u16)
+
+/* _za64 x { _f64 _f64 }. */
+#define TYPES_mop_f64f64(S, D) \
+ D (za64, f64)
+
+/* _za. */
+#define TYPES_za(S, D) \
+ S (za)
+
/* Describe a pair of type suffixes in which only the first is used. */
#define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES }
@@ -482,6 +567,19 @@ DEF_SVE_TYPES_ARRAY (cvt_narrow);
DEF_SVE_TYPES_ARRAY (inc_dec_n);
DEF_SVE_TYPES_ARRAY (reinterpret);
DEF_SVE_TYPES_ARRAY (while);
+DEF_SVE_TYPES_ARRAY (all_za);
+DEF_SVE_TYPES_ARRAY (d_za);
+DEF_SVE_TYPES_ARRAY (all_za_data);
+DEF_SVE_TYPES_ARRAY (s_za_integer);
+DEF_SVE_TYPES_ARRAY (d_za_integer);
+DEF_SVE_TYPES_ARRAY (mop_base);
+DEF_SVE_TYPES_ARRAY (mop_base_signed);
+DEF_SVE_TYPES_ARRAY (mop_base_unsigned);
+DEF_SVE_TYPES_ARRAY (mop_i16i64);
+DEF_SVE_TYPES_ARRAY (mop_i16i64_signed);
+DEF_SVE_TYPES_ARRAY (mop_i16i64_unsigned);
+DEF_SVE_TYPES_ARRAY (mop_f64f64);
+DEF_SVE_TYPES_ARRAY (za);
/* Used by functions that have no governing predicate. */
static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
@@ -490,6 +588,9 @@ static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
explicit suffix. */
static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS };
+/* Used by functions that only support "_m" predication. */
+static const predication_index preds_m[] = { PRED_m, NUM_PREDS };
+
/* Used by functions that allow merging and "don't care" predication,
but are not suitable for predicated MOVPRFX. */
static const predication_index preds_mx[] = {
@@ -521,6 +622,9 @@ static const predication_index preds_z_or_none[] = {
/* Used by (mostly predicate) functions that only support "_z" predication. */
static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
+/* Used by SME instructions that always merge into ZA. */
+static const predication_index preds_za_m[] = { PRED_za_m, NUM_PREDS };
+
/* A list of all SVE ACLE functions. */
static CONSTEXPR const function_group_info function_groups[] = {
#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
@@ -530,8 +634,8 @@ static CONSTEXPR const function_group_info function_groups[] = {
};
/* The scalar type associated with each vector type. */
-extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES];
-tree scalar_types[NUM_VECTOR_TYPES];
+extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES + 1];
+tree scalar_types[NUM_VECTOR_TYPES + 1];
/* The single-predicate and single-vector types, with their built-in
"__SV..._t" name. Allow an index of NUM_VECTOR_TYPES, which always
@@ -639,7 +743,7 @@ find_type_suffix_for_scalar_type (const_tree type)
/* A linear search should be OK here, since the code isn't hot and
the number of types is only small. */
for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
- if (!type_suffixes[suffix_i].bool_p)
+ if (type_suffixes[suffix_i].vector_p)
{
vector_type_index vector_i = type_suffixes[suffix_i].vector_type;
if (matches_type_p (scalar_types[vector_i], type))
@@ -707,6 +811,20 @@ check_required_extensions (location_t location, tree fndecl,
return false;
}
+ if (missing_extensions & AARCH64_FL_SM_ON)
+ {
+ error_at (location, "ACLE function %qD can only be called when"
+ " SME streaming mode is enabled", fndecl);
+ return false;
+ }
+
+ if (missing_extensions & AARCH64_FL_ZA_ON)
+ {
+ error_at (location, "ACLE function %qD can only be called from"
+ " a function that has ZA state", fndecl);
+ return false;
+ }
+
static const struct {
aarch64_feature_flags flag;
const char *name;
@@ -742,9 +860,13 @@ report_out_of_range (location_t location, tree fndecl, unsigned int argno,
HOST_WIDE_INT actual, HOST_WIDE_INT min,
HOST_WIDE_INT max)
{
- error_at (location, "passing %wd to argument %d of %qE, which expects"
- " a value in the range [%wd, %wd]", actual, argno + 1, fndecl,
- min, max);
+ if (min == max)
+ error_at (location, "passing %wd to argument %d of %qE, which expects"
+ " the value %wd", actual, argno + 1, fndecl, min);
+ else
+ error_at (location, "passing %wd to argument %d of %qE, which expects"
+ " a value in the range [%wd, %wd]", actual, argno + 1, fndecl,
+ min, max);
}
/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
@@ -830,7 +952,7 @@ function_instance::reads_global_state_p () const
return true;
/* Handle direct reads of global state. */
- return flags & (CP_READ_MEMORY | CP_READ_FFR);
+ return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA);
}
/* Return true if calls to the function could modify some form of
@@ -851,7 +973,7 @@ function_instance::modifies_global_state_p () const
return true;
/* Handle direct modifications of global state. */
- return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR);
+ return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA);
}
/* Return true if calls to the function could raise a signal. */
@@ -871,6 +993,20 @@ function_instance::could_trap_p () const
return false;
}
+/* Return true if the function shares ZA state with its caller. */
+bool
+function_instance::shared_za_p () const
+{
+ return (call_properties () & (CP_READ_ZA | CP_WRITE_ZA)) != 0;
+}
+
+/* Return true if the function preserves ZA. */
+bool
+function_instance::preserves_za_p () const
+{
+ return (call_properties () & CP_WRITE_ZA) == 0;
+}
+
inline hashval_t
registered_function_hasher::hash (value_type value)
{
@@ -883,8 +1019,8 @@ registered_function_hasher::equal (value_type value, const compare_type &key)
return value->instance == key;
}
-sve_switcher::sve_switcher ()
- : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE)
+sve_switcher::sve_switcher (aarch64_feature_flags flags)
+ : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE | flags)
{
/* Changing the ISA flags and have_regs_of_mode should be enough here.
We shouldn't need to pay the compile-time cost of a full target
@@ -940,6 +1076,10 @@ char *
function_builder::get_name (const function_instance &instance,
bool overloaded_p)
{
+ /* __arm_* functions are listed as arm_*, so that the associated GCC
+ code is not in the implementation namespace. */
+ if (strncmp (instance.base_name, "arm_", 4) == 0)
+ append_name ("__");
append_name (instance.base_name);
if (overloaded_p)
switch (instance.displacement_units ())
@@ -981,6 +1121,11 @@ function_builder::get_attributes (const function_instance &instance)
{
tree attrs = NULL_TREE;
+ if (instance.shared_za_p ())
+ attrs = add_attribute ("arm_shared_za", attrs);
+ if (instance.preserves_za_p ())
+ attrs = add_attribute ("arm_preserves_za", attrs);
+
if (!instance.modifies_global_state_p ())
{
if (instance.reads_global_state_p ())
@@ -1236,12 +1381,24 @@ function_resolver::lookup_form (mode_suffix_index mode,
/* Resolve the function to one with the mode suffix given by MODE and the
type suffixes given by TYPE0 and TYPE1. Return its function decl on
- success, otherwise report an error and return error_mark_node. */
+ success, otherwise report an error and return error_mark_node.
+
+ As a convenience, resolve_to (MODE, TYPE0) can be used for functions
+ whose first type suffix is explicit, with TYPE0 then describing the
+ second type suffix rather than the first. */
tree
function_resolver::resolve_to (mode_suffix_index mode,
type_suffix_index type0,
type_suffix_index type1)
{
+ /* Handle convert-like functions in which the first type suffix is
+ explicit. */
+ if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES && type0 != type_suffix_ids[0])
+ {
+ type1 = type0;
+ type0 = type_suffix_ids[0];
+ }
+
tree res = lookup_form (mode, type0, type1);
if (!res)
{
@@ -2167,6 +2324,7 @@ bool
function_resolver::check_gp_argument (unsigned int nops,
unsigned int &i, unsigned int &nargs)
{
+ gcc_assert (pred != PRED_za_m);
i = 0;
if (pred != PRED_none)
{
@@ -2367,9 +2525,7 @@ function_checker::function_checker (location_t location,
unsigned int nargs, tree *args)
: function_call_info (location, instance, fndecl),
m_fntype (fntype), m_nargs (nargs), m_args (args),
- /* We don't have to worry about unary _m operations here, since they
- never have arguments that need checking. */
- m_base_arg (pred != PRED_none ? 1 : 0)
+ m_base_arg (pred != PRED_none && pred != PRED_za_m ? 1 : 0)
{
}
@@ -2762,21 +2918,51 @@ function_expander::convert_to_pmode (rtx x)
}
/* Return the base address for a contiguous load or store function.
- MEM_MODE is the mode of the addressed memory. */
+ MEM_MODE is the mode of the addressed memory, BASE_ARGNO is
+ the index of the base argument, and VNUM_ARGNO is the index of
+ the vnum offset argument (if any). VL_ISA_MODE is AARCH64_FL_SM_ON
+ if the vnum argument is a factor of the SME vector length, 0 if it
+ is a factor of the current prevailing vector length. */
rtx
-function_expander::get_contiguous_base (machine_mode mem_mode)
+function_expander::get_contiguous_base (machine_mode mem_mode,
+ unsigned int base_argno,
+ unsigned int vnum_argno,
+ aarch64_feature_flags vl_isa_mode)
{
- rtx base = convert_to_pmode (args[1]);
+ rtx base = convert_to_pmode (args[base_argno]);
if (mode_suffix_id == MODE_vnum)
{
- /* Use the size of the memory mode for extending loads and truncating
- stores. Use the size of a full vector for non-extending loads
- and non-truncating stores (including svld[234] and svst[234]). */
- poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode),
- BYTES_PER_SVE_VECTOR);
- rtx offset = gen_int_mode (size, Pmode);
- offset = simplify_gen_binary (MULT, Pmode, args[2], offset);
- base = simplify_gen_binary (PLUS, Pmode, base, offset);
+ rtx vnum = args[vnum_argno];
+ if (vnum != const0_rtx)
+ {
+ /* Use the size of the memory mode for extending loads and truncating
+ stores. Use the size of a full vector for non-extending loads
+ and non-truncating stores (including svld[234] and svst[234]). */
+ poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode),
+ BYTES_PER_SVE_VECTOR);
+ rtx offset;
+ if ((vl_isa_mode & AARCH64_FL_SM_ON)
+ && !TARGET_STREAMING
+ && !size.is_constant ())
+ {
+ gcc_assert (known_eq (size, BYTES_PER_SVE_VECTOR));
+ if (CONST_INT_P (vnum) && IN_RANGE (INTVAL (vnum), -32, 31))
+ offset = aarch64_sme_vq_immediate (Pmode, INTVAL (vnum) * 16,
+ AARCH64_ISA_MODE);
+ else
+ {
+ offset = aarch64_sme_vq_immediate (Pmode, 16,
+ AARCH64_ISA_MODE);
+ offset = simplify_gen_binary (MULT, Pmode, vnum, offset);
+ }
+ }
+ else
+ {
+ offset = gen_int_mode (size, Pmode);
+ offset = simplify_gen_binary (MULT, Pmode, vnum, offset);
+ }
+ base = simplify_gen_binary (PLUS, Pmode, base, offset);
+ }
}
return base;
}
@@ -2883,7 +3069,7 @@ function_expander::add_input_operand (insn_code icode, rtx x)
/* Add an integer operand with value X to the instruction. */
void
-function_expander::add_integer_operand (HOST_WIDE_INT x)
+function_expander::add_integer_operand (poly_int64 x)
{
m_ops.safe_grow (m_ops.length () + 1, true);
create_integer_operand (&m_ops.last (), x);
@@ -3428,7 +3614,10 @@ init_builtins ()
sve_switcher sve;
register_builtin_types ();
if (in_lto_p)
- handle_arm_sve_h ();
+ {
+ handle_arm_sve_h ();
+ handle_arm_sme_h ();
+ }
}
/* Register vector type TYPE under its arm_sve.h name. */
@@ -3578,7 +3767,8 @@ handle_arm_sve_h ()
function_table = new hash_table<registered_function_hasher> (1023);
function_builder builder;
for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i)
- builder.register_function_group (function_groups[i]);
+ if (!(function_groups[i].required_extensions & AARCH64_FL_SME))
+ builder.register_function_group (function_groups[i]);
}
/* Return the function decl with SVE function subcode CODE, or error_mark_node
@@ -3591,6 +3781,33 @@ builtin_decl (unsigned int code, bool)
return (*registered_functions)[code]->decl;
}
+/* Implement #pragma GCC aarch64 "arm_sme.h". */
+void
+handle_arm_sme_h ()
+{
+ if (!function_table)
+ {
+ error ("%qs defined without first defining %qs",
+ "arm_sme.h", "arm_sve.h");
+ return;
+ }
+
+ static bool initialized_p;
+ if (initialized_p)
+ {
+ error ("duplicate definition of %qs", "arm_sme.h");
+ return;
+ }
+ initialized_p = true;
+
+ sme_switcher sme;
+
+ function_builder builder;
+ for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i)
+ if (function_groups[i].required_extensions & AARCH64_FL_SME)
+ builder.register_function_group (function_groups[i]);
+}
+
/* If we're implementing manual overloading, check whether the SVE
function with subcode CODE is overloaded, and if so attempt to
determine the corresponding non-overloaded function. The call
@@ -29,6 +29,10 @@
#define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E)
#endif
+#ifndef DEF_SME_ZA_SUFFIX
+#define DEF_SME_ZA_SUFFIX(A, B, C)
+#endif
+
#ifndef DEF_SVE_FUNCTION
#define DEF_SVE_FUNCTION(A, B, C, D)
#endif
@@ -95,10 +99,21 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode)
DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode)
DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode)
+/* Arbitrarily associate _za with bytes (by analogy with char's role in C). */
+DEF_SME_ZA_SUFFIX (za, 8, VNx16QImode)
+
+DEF_SME_ZA_SUFFIX (za8, 8, VNx16QImode)
+DEF_SME_ZA_SUFFIX (za16, 16, VNx8HImode)
+DEF_SME_ZA_SUFFIX (za32, 32, VNx4SImode)
+DEF_SME_ZA_SUFFIX (za64, 64, VNx2DImode)
+DEF_SME_ZA_SUFFIX (za128, 128, VNx1TImode)
+
#include "aarch64-sve-builtins-base.def"
#include "aarch64-sve-builtins-sve2.def"
+#include "aarch64-sve-builtins-sme.def"
#undef DEF_SVE_FUNCTION
+#undef DEF_SME_ZA_SUFFIX
#undef DEF_SVE_TYPE_SUFFIX
#undef DEF_SVE_TYPE
#undef DEF_SVE_MODE
@@ -97,6 +97,8 @@ const unsigned int CP_PREFETCH_MEMORY = 1U << 3;
const unsigned int CP_WRITE_MEMORY = 1U << 4;
const unsigned int CP_READ_FFR = 1U << 5;
const unsigned int CP_WRITE_FFR = 1U << 6;
+const unsigned int CP_READ_ZA = 1U << 7;
+const unsigned int CP_WRITE_ZA = 1U << 8;
/* Enumerates the SVE predicate and (data) vector types, together called
"vector types" for brevity. */
@@ -142,6 +144,10 @@ enum predication_index
/* Zero predication: set inactive lanes of the vector result to zero. */
PRED_z,
+ /* Merging predication for SME's ZA: merge into slices of the array
+ instead of overwriting the whole slices. */
+ PRED_za_m,
+
NUM_PREDS
};
@@ -176,6 +182,8 @@ enum type_suffix_index
{
#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \
TYPE_SUFFIX_ ## NAME,
+#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \
+ TYPE_SUFFIX_ ## NAME,
#include "aarch64-sve-builtins.def"
NUM_TYPE_SUFFIXES
};
@@ -229,9 +237,13 @@ struct type_suffix_info
unsigned int unsigned_p : 1;
/* True if the suffix is for a floating-point type. */
unsigned int float_p : 1;
+ /* True if the suffix is for a vector type (integer or float). */
+ unsigned int vector_p : 1;
/* True if the suffix is for a boolean type. */
unsigned int bool_p : 1;
- unsigned int spare : 12;
+ /* True if the suffix is for SME's ZA. */
+ unsigned int za_p : 1;
+ unsigned int spare : 10;
/* The associated vector or predicate mode. */
machine_mode vector_mode : 16;
@@ -283,6 +295,8 @@ public:
bool reads_global_state_p () const;
bool modifies_global_state_p () const;
bool could_trap_p () const;
+ bool shared_za_p () const;
+ bool preserves_za_p () const;
unsigned int vectors_per_tuple () const;
tree memory_scalar_type () const;
@@ -293,11 +307,13 @@ public:
tree displacement_vector_type () const;
units_index displacement_units () const;
+ unsigned int num_za_tiles () const;
+
const type_suffix_info &type_suffix (unsigned int) const;
tree scalar_type (unsigned int) const;
tree vector_type (unsigned int) const;
tree tuple_type (unsigned int) const;
- unsigned int elements_per_vq (unsigned int i) const;
+ unsigned int elements_per_vq (unsigned int) const;
machine_mode vector_mode (unsigned int) const;
machine_mode gp_mode (unsigned int) const;
@@ -532,7 +548,8 @@ public:
bool overlaps_input_p (rtx);
rtx convert_to_pmode (rtx);
- rtx get_contiguous_base (machine_mode);
+ rtx get_contiguous_base (machine_mode, unsigned int = 1, unsigned int = 2,
+ aarch64_feature_flags = 0);
rtx get_fallback_value (machine_mode, unsigned int,
unsigned int, unsigned int &);
rtx get_reg_target ();
@@ -540,7 +557,7 @@ public:
void add_output_operand (insn_code);
void add_input_operand (insn_code, rtx);
- void add_integer_operand (HOST_WIDE_INT);
+ void add_integer_operand (poly_int64);
void add_mem_operand (machine_mode, rtx);
void add_address_operand (rtx);
void add_fixed_operand (rtx);
@@ -660,7 +677,7 @@ public:
class sve_switcher : public aarch64_simd_switcher
{
public:
- sve_switcher ();
+ sve_switcher (aarch64_feature_flags = 0);
~sve_switcher ();
private:
@@ -668,10 +685,17 @@ private:
bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
};
+/* Extends sve_switch enough for defining arm_sme.h. */
+class sme_switcher : public sve_switcher
+{
+public:
+ sme_switcher () : sve_switcher (AARCH64_FL_SME) {}
+};
+
extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1];
extern const mode_suffix_info mode_suffixes[MODE_none + 1];
-extern tree scalar_types[NUM_VECTOR_TYPES];
+extern tree scalar_types[NUM_VECTOR_TYPES + 1];
extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
extern tree acle_svpattern;
extern tree acle_svprfop;
@@ -801,6 +825,16 @@ function_instance::displacement_vector_type () const
return acle_vector_types[0][mode_suffix ().displacement_vector_type];
}
+/* Return the number of ZA tiles associated with the _za<N> suffix
+ (which is always the first type suffix). */
+inline unsigned int
+function_instance::num_za_tiles () const
+{
+ auto &suffix = type_suffix (0);
+ gcc_checking_assert (suffix.za_p);
+ return suffix.element_bytes;
+}
+
/* If the function takes a vector or scalar displacement, return the units
in which the displacement is measured, otherwise return UNITS_none. */
inline units_index
@@ -5643,15 +5643,26 @@ aarch64_output_sve_scalar_inc_dec (rtx offset)
}
/* Return true if a single RDVL instruction can multiply FACTOR by the
- number of 128-bit quadwords in an SVE vector. */
+ number of 128-bit quadwords in an SVE vector. This is also the
+ range of ADDVL. */
static bool
-aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor)
+aarch64_sve_rdvl_addvl_factor_p (HOST_WIDE_INT factor)
{
return (multiple_p (factor, 16)
&& IN_RANGE (factor, -32 * 16, 31 * 16));
}
+/* Return true if ADDPL can be used to add FACTOR multiplied by the number
+ of quadwords in an SVE vector. */
+
+static bool
+aarch64_sve_addpl_factor_p (HOST_WIDE_INT factor)
+{
+ return (multiple_p (factor, 2)
+ && IN_RANGE (factor, -32 * 2, 31 * 2));
+}
+
/* Return true if we can move VALUE into a register using a single
RDVL instruction. */
@@ -5659,7 +5670,7 @@ static bool
aarch64_sve_rdvl_immediate_p (poly_int64 value)
{
HOST_WIDE_INT factor = value.coeffs[0];
- return value.coeffs[1] == factor && aarch64_sve_rdvl_factor_p (factor);
+ return value.coeffs[1] == factor && aarch64_sve_rdvl_addvl_factor_p (factor);
}
/* Likewise for rtx X. */
@@ -5695,10 +5706,8 @@ aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
HOST_WIDE_INT factor = value.coeffs[0];
if (factor == 0 || value.coeffs[1] != factor)
return false;
- /* FACTOR counts VG / 2, so a value of 2 is one predicate width
- and a value of 16 is one vector width. */
- return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
- || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
+ return (aarch64_sve_rdvl_addvl_factor_p (factor)
+ || aarch64_sve_addpl_factor_p (factor));
}
/* Likewise for rtx X. */
@@ -5798,11 +5807,11 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
number of 128-bit quadwords in an SME vector. ISA_MODE is the
ISA mode in which the calculation is being performed. */
-static rtx
+rtx
aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor,
aarch64_feature_flags isa_mode)
{
- gcc_assert (aarch64_sve_rdvl_factor_p (factor));
+ gcc_assert (aarch64_sve_rdvl_addvl_factor_p (factor));
if (isa_mode & AARCH64_FL_SM_ON)
/* We're in streaming mode, so we can use normal poly-int values. */
return gen_int_mode ({ factor, factor }, mode);
@@ -5845,7 +5854,7 @@ aarch64_rdsvl_immediate_p (const_rtx x)
{
HOST_WIDE_INT factor;
return (aarch64_sme_vq_unspec_p (x, &factor)
- && aarch64_sve_rdvl_factor_p (factor));
+ && aarch64_sve_rdvl_addvl_factor_p (factor));
}
/* Return the asm string for an RDSVL instruction that calculates X,
@@ -5862,6 +5871,38 @@ aarch64_output_rdsvl (const_rtx x)
return buffer;
}
+/* Return true if X is a constant that can be added using ADDSVL or ADDSPL. */
+
+bool
+aarch64_addsvl_addspl_immediate_p (const_rtx x)
+{
+ HOST_WIDE_INT factor;
+ return (aarch64_sme_vq_unspec_p (x, &factor)
+ && (aarch64_sve_rdvl_addvl_factor_p (factor)
+ || aarch64_sve_addpl_factor_p (factor)));
+}
+
+/* X is a constant that satisfies aarch64_addsvl_addspl_immediate_p.
+ Return the asm string for the associated instruction. */
+
+char *
+aarch64_output_addsvl_addspl (rtx x)
+{
+ static char buffer[sizeof ("addspl\t%x0, %x1, #-") + 3 * sizeof (int)];
+ HOST_WIDE_INT factor;
+ if (!aarch64_sme_vq_unspec_p (x, &factor))
+ gcc_unreachable ();
+ if (aarch64_sve_rdvl_addvl_factor_p (factor))
+ snprintf (buffer, sizeof (buffer), "addsvl\t%%x0, %%x1, #%d",
+ (int) factor / 16);
+ else if (aarch64_sve_addpl_factor_p (factor))
+ snprintf (buffer, sizeof (buffer), "addspl\t%%x0, %%x1, #%d",
+ (int) factor / 2);
+ else
+ gcc_unreachable ();
+ return buffer;
+}
+
/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
@@ -6471,7 +6512,7 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
shift = 0;
}
/* Try to use an unshifted RDVL. */
- else if (aarch64_sve_rdvl_factor_p (factor))
+ else if (aarch64_sve_rdvl_addvl_factor_p (factor))
{
val = gen_int_mode (poly_int64 (factor, factor), mode);
shift = 0;
@@ -11354,6 +11395,9 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
if (GET_CODE (x) == HIGH)
return true;
+ if (aarch64_rdsvl_immediate_p (x))
+ return true;
+
/* There's no way to calculate VL-based values using relocations. */
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, x, ALL)
@@ -11569,7 +11613,7 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
&& contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
index = SUBREG_REG (index);
- if (aarch64_sve_data_mode_p (mode))
+ if (aarch64_sve_data_mode_p (mode) || mode == VNx1TImode)
{
if (type != ADDRESS_REG_REG
|| (1 << shift) != GET_MODE_UNIT_SIZE (mode))
@@ -11672,7 +11716,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
&& ((vec_flags == 0
&& known_lt (GET_MODE_SIZE (mode), 16))
|| vec_flags == VEC_ADVSIMD
- || vec_flags & VEC_SVE_DATA));
+ || vec_flags & VEC_SVE_DATA
+ || mode == VNx1TImode));
/* For SVE, only accept [Rn], [Rn, #offset, MUL VL] and [Rn, Rm, LSL #shift].
The latter is not valid for SVE predicates, and that's rejected through
@@ -11791,7 +11836,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
/* Make "m" use the LD1 offset range for SVE data modes, so
that pre-RTL optimizers like ivopts will work to that
instead of the wider LDR/STR range. */
- if (vec_flags == VEC_SVE_DATA)
+ if (vec_flags == VEC_SVE_DATA || mode == VNx1TImode)
return (type == ADDR_QUERY_M
? offset_4bit_signed_scaled_p (mode, offset)
: offset_9bit_signed_scaled_p (mode, offset));
@@ -14090,6 +14135,51 @@ aarch64_output_casesi (rtx *operands)
return "";
}
+/* Return the asm string for an SME ZERO instruction whose 8-bit mask
+ operand is MASK, */
+const char *
+aarch64_output_sme_zero (rtx mask)
+{
+ auto mask_val = UINTVAL (mask);
+ if (mask_val == 0)
+ return "zero\t{}";
+
+ if (mask_val == 0xff)
+ return "zero\t{ za }";
+
+ static constexpr std::pair<unsigned int, char> tiles[] = {
+ { 0xff, 'b' },
+ { 0x55, 'h' },
+ { 0x11, 's' },
+ { 0x01, 'd' }
+ };
+ /* The last entry in the list has the form "za7.d }", but that's the
+ same length as "za7.d, ". */
+ static char buffer[sizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1];
+ unsigned int i = 0;
+ i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t");
+ const char *prefix = "{ ";
+ for (auto &tile : tiles)
+ {
+ auto tile_mask = tile.first;
+ unsigned int tile_index = 0;
+ while (tile_mask < 0x100)
+ {
+ if ((mask_val & tile_mask) == tile_mask)
+ {
+ i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c",
+ prefix, tile_index, tile.second);
+ prefix = ", ";
+ mask_val &= ~tile_mask;
+ }
+ tile_mask <<= 1;
+ tile_index += 1;
+ }
+ }
+ gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer));
+ snprintf (buffer + i, sizeof (buffer) - i, " }");
+ return buffer;
+}
/* Return size in bits of an arithmetic operand which is shifted/scaled and
masked such that it is suitable for a UXTB, UXTH, or UXTW extend
@@ -23015,6 +23105,31 @@ aarch64_sve_struct_memory_operand_p (rtx op)
&& offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
}
+/* Return true if OFFSET is a constant integer and if VNUM is
+ OFFSET * the number of bytes in an SVE vector. This is the requirement
+ that exists in SME LDR and STR instructions, where the VL offset must
+ equal the ZA slice offset. */
+bool
+aarch64_sme_ldr_vnum_offset_p (rtx offset, rtx vnum)
+{
+ if (!CONST_INT_P (offset) || !IN_RANGE (INTVAL (offset), 0, 15))
+ return false;
+
+ if (TARGET_STREAMING)
+ {
+ poly_int64 const_vnum;
+ return (poly_int_rtx_p (vnum, &const_vnum)
+ && known_eq (const_vnum,
+ INTVAL (offset) * BYTES_PER_SVE_VECTOR));
+ }
+ else
+ {
+ HOST_WIDE_INT factor;
+ return (aarch64_sme_vq_unspec_p (vnum, &factor)
+ && factor == INTVAL (offset) * 16);
+ }
+}
+
/* Emit a register copy from operand to operand, taking care not to
early-clobber source registers in the process.
@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
/* Macros to test ISA flags. */
#define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF)
+#define AARCH64_ISA_SM_ON (aarch64_isa_flags & AARCH64_FL_SM_ON)
#define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON)
#define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES)
#define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC)
@@ -224,6 +225,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
#define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME)
+#define AARCH64_ISA_SME_I16I64 (aarch64_isa_flags & AARCH64_FL_SME_I16I64)
+#define AARCH64_ISA_SME_F64F64 (aarch64_isa_flags & AARCH64_FL_SME_F64F64)
#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A)
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
@@ -256,6 +259,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
/* The current function is a normal non-streaming function. */
#define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF)
+/* The current function has a streaming body. */
+#define TARGET_STREAMING (AARCH64_ISA_SM_ON)
+
/* The current function has a streaming-compatible body. */
#define TARGET_STREAMING_COMPATIBLE \
((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0)
@@ -316,6 +322,15 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
imply anything about the state of PSTATE.SM. */
#define TARGET_SME (AARCH64_ISA_SME)
+/* Streaming-mode SME instructions. */
+#define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME)
+
+/* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */
+#define TARGET_SME_I16I64 (AARCH64_ISA_SME_I16I64)
+
+/* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64. */
+#define TARGET_SME_F64F64 (AARCH64_ISA_SME_F64F64)
+
/* ARMv8.3-A features. */
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
@@ -2097,10 +2097,10 @@ (define_expand "add<mode>3"
(define_insn "*add<mode>3_aarch64"
[(set
- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk")
+ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk,rk")
(plus:GPI
- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk")
- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))]
+ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk,rk")
+ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav,UaV")))]
""
"@
add\\t%<w>0, %<w>1, %2
@@ -2109,10 +2109,12 @@ (define_insn "*add<mode>3_aarch64"
sub\\t%<w>0, %<w>1, #%n2
#
* return aarch64_output_sve_scalar_inc_dec (operands[2]);
- * return aarch64_output_sve_addvl_addpl (operands[2]);"
+ * return aarch64_output_sve_addvl_addpl (operands[2]);
+ * return aarch64_output_addsvl_addspl (operands[2]);"
;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
- [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm")
- (set_attr "arch" "*,*,simd,*,*,sve,sve")]
+ [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,
+ alu_imm,alu_imm")
+ (set_attr "arch" "*,*,simd,*,*,sve,sve,sme")]
)
;; zero_extend version of above
new file mode 100644
@@ -0,0 +1,46 @@
+/* AArch64 SVE intrinsics include file.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _ARM_SME_H_
+#define _ARM_SME_H_
+
+#include <arm_sve.h>
+#pragma GCC aarch64 "arm_sme.h"
+
+__attribute__((arm_streaming_compatible))
+void __arm_za_disable(void);
+
+__attribute__((arm_streaming_compatible, arm_preserves_za))
+void *__arm_sc_memcpy(void *, const void *, __SIZE_TYPE__);
+
+__attribute__((arm_streaming_compatible, arm_preserves_za))
+void *__arm_sc_memmove(void *, const void *, __SIZE_TYPE__);
+
+__attribute__((arm_streaming_compatible, arm_preserves_za))
+void *__arm_sc_memset(void *, int, __SIZE_TYPE__);
+
+__attribute__((arm_streaming_compatible, arm_preserves_za))
+void *__arm_sc_memchr(void *, int, __SIZE_TYPE__);
+
+#endif
@@ -21,6 +21,9 @@
(define_register_constraint "k" "STACK_REG"
"@internal The stack register.")
+(define_register_constraint "Uci" "ZA_INDEX_REGS"
+ "@internal r12-r15, which can be used to index ZA.")
+
(define_register_constraint "Ucs" "TAILCALL_ADDR_REGS"
"@internal Registers suitable for an indirect tail call")
@@ -74,6 +77,12 @@ (define_constraint "Uav"
a single ADDVL or ADDPL."
(match_operand 0 "aarch64_sve_addvl_addpl_immediate"))
+(define_constraint "UaV"
+ "@internal
+ A constraint that matches a VG-based constant that can be added by
+ a single ADDVL or ADDPL."
+ (match_operand 0 "aarch64_addsvl_addspl_immediate"))
+
(define_constraint "Uat"
"@internal
A constraint that matches a VG-based constant that can be added by
@@ -450,6 +450,7 @@ (define_mode_iterator VNx4SI_ONLY [VNx4SI])
(define_mode_iterator VNx4SF_ONLY [VNx4SF])
(define_mode_iterator VNx2DI_ONLY [VNx2DI])
(define_mode_iterator VNx2DF_ONLY [VNx2DF])
+(define_mode_iterator VNx1TI_ONLY [VNx1TI])
;; All SVE vector structure modes.
(define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI
@@ -598,6 +599,15 @@ (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
;; Bfloat16 modes to which V4SF can be converted
(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
+;; The modes used to represent different ZA access sizes.
+(define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI])
+(define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")])
+
+;; The modes for which outer product instructions are supported.
+(define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
+(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF
+ (VNx2DF "TARGET_SME_F64F64")])
+
;; ------------------------------------------------------------------
;; Unspec enumerations for Advance SIMD. These could well go into
;; aarch64.md but for their use in int_iterators here.
@@ -976,6 +986,28 @@ (define_c_enum "unspec"
UNSPEC_BFCVTN2 ; Used in aarch64-simd.md.
UNSPEC_BFCVT ; Used in aarch64-simd.md.
UNSPEC_FCVTXN ; Used in aarch64-simd.md.
+
+ ;; All used in aarch64-sme.md
+ UNSPEC_SME_ADDHA
+ UNSPEC_SME_ADDVA
+ UNSPEC_SME_FMOPA
+ UNSPEC_SME_FMOPS
+ UNSPEC_SME_LD1_HOR
+ UNSPEC_SME_LD1_VER
+ UNSPEC_SME_READ_HOR
+ UNSPEC_SME_READ_VER
+ UNSPEC_SME_SMOPA
+ UNSPEC_SME_SMOPS
+ UNSPEC_SME_ST1_HOR
+ UNSPEC_SME_ST1_VER
+ UNSPEC_SME_SUMOPA
+ UNSPEC_SME_SUMOPS
+ UNSPEC_SME_UMOPA
+ UNSPEC_SME_UMOPS
+ UNSPEC_SME_USMOPA
+ UNSPEC_SME_USMOPS
+ UNSPEC_SME_WRITE_HOR
+ UNSPEC_SME_WRITE_VER
])
;; ------------------------------------------------------------------
@@ -1232,6 +1264,7 @@ (define_mode_attr Vetype [(V8QI "b") (V16QI "b")
(VNx4SF "s") (VNx2SF "s")
(VNx2DI "d")
(VNx2DF "d")
+ (VNx1TI "q")
(BF "h") (V4BF "h") (V8BF "h")
(HF "h")
(SF "s") (DF "d")
@@ -1250,6 +1283,7 @@ (define_mode_attr Vesize [(VNx16QI "b") (VNx8QI "b") (VNx4QI "b") (VNx2QI "b")
(VNx4SF "w") (VNx2SF "w")
(VNx2DI "d")
(VNx2DF "d")
+ (VNx1TI "q")
(VNx32QI "b") (VNx48QI "b") (VNx64QI "b")
(VNx16HI "h") (VNx24HI "h") (VNx32HI "h")
(VNx16HF "h") (VNx24HF "h") (VNx32HF "h")
@@ -1574,6 +1608,15 @@ (define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s")
(V4HF ".4s") (V2SF ".2d")
(SI "") (HI "")])
+;; Vector modes whose elements are four times wider.
+(define_mode_attr V4xWIDE [(VNx16QI "VNx4SI") (VNx8HI "VNx2DI")])
+
+;; Predicate modes for V4xWIDE.
+(define_mode_attr V4xWIDE_PRED [(VNx16QI "VNx4BI") (VNx8HI "VNx2BI")])
+
+;; Element suffix for V4xWIDE.
+(define_mode_attr V4xwetype [(VNx16QI "s") (VNx8HI "d")])
+
;; Lower part register suffixes for VQW/VQ_HSF.
(define_mode_attr Vhalftype [(V16QI "8b") (V8HI "4h")
(V4SI "2s") (V8HF "4h")
@@ -2046,6 +2089,7 @@ (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI "VNx8BI")
(VNx4SF "VNx4BI") (VNx2SF "VNx2BI")
(VNx2DI "VNx2BI")
(VNx2DF "VNx2BI")
+ (VNx1TI "VNx2BI")
(VNx32QI "VNx16BI")
(VNx16HI "VNx8BI") (VNx16HF "VNx8BI")
(VNx16BF "VNx8BI")
@@ -2126,6 +2170,17 @@ (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
;; The constraint to use for an SVE FCMLA lane index.
(define_mode_attr sve_lane_pair_con [(VNx8HF "y") (VNx4SF "x")])
+(define_mode_attr SME_FMOP_WIDE [(VNx8BF "VNx4SF") (VNx8HF "VNx4SF")
+ (VNx4SF "VNx4SF") (VNx2DF "VNx2DF")])
+
+(define_mode_attr SME_FMOP_WIDE_PRED [(VNx8BF "VNx4BI") (VNx8HF "VNx4BI")
+ (VNx4SF "VNx4BI") (VNx2DF "VNx2BI")])
+
+(define_mode_attr sme_fmop_wide_etype [(VNx8BF "s") (VNx8HF "s")
+ (VNx4SF "s") (VNx2DF "d")])
+
+(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")])
+
;; -------------------------------------------------------------------
;; Code Iterators
;; -------------------------------------------------------------------
@@ -3160,6 +3215,20 @@ (define_int_iterator FCMLA_OP [UNSPEC_FCMLA
(define_int_iterator FCMUL_OP [UNSPEC_FCMUL
UNSPEC_FCMUL_CONJ])
+(define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER])
+(define_int_iterator SME_READ [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER])
+(define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER])
+(define_int_iterator SME_WRITE [UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER])
+
+(define_int_iterator SME_UNARY_SDI [UNSPEC_SME_ADDHA UNSPEC_SME_ADDVA])
+
+(define_int_iterator SME_INT_MOP [UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS
+ UNSPEC_SME_SUMOPA UNSPEC_SME_SUMOPS
+ UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS
+ UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS])
+
+(define_int_iterator SME_FP_MOP [UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS])
+
;; Iterators for atomic operations.
(define_int_iterator ATOMIC_LDOP
@@ -3232,6 +3301,26 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
(UNSPEC_PMULLT "pmullt")
(UNSPEC_PMULLT_PAIR "pmullt_pair")
(UNSPEC_SMATMUL "smatmul")
+ (UNSPEC_SME_ADDHA "addha")
+ (UNSPEC_SME_ADDVA "addva")
+ (UNSPEC_SME_FMOPA "fmopa")
+ (UNSPEC_SME_FMOPS "fmops")
+ (UNSPEC_SME_LD1_HOR "ld1_hor")
+ (UNSPEC_SME_LD1_VER "ld1_ver")
+ (UNSPEC_SME_READ_HOR "read_hor")
+ (UNSPEC_SME_READ_VER "read_ver")
+ (UNSPEC_SME_SMOPA "smopa")
+ (UNSPEC_SME_SMOPS "smops")
+ (UNSPEC_SME_ST1_HOR "st1_hor")
+ (UNSPEC_SME_ST1_VER "st1_ver")
+ (UNSPEC_SME_SUMOPA "sumopa")
+ (UNSPEC_SME_SUMOPS "sumops")
+ (UNSPEC_SME_UMOPA "umopa")
+ (UNSPEC_SME_UMOPS "umops")
+ (UNSPEC_SME_USMOPA "usmopa")
+ (UNSPEC_SME_USMOPS "usmops")
+ (UNSPEC_SME_WRITE_HOR "write_hor")
+ (UNSPEC_SME_WRITE_VER "write_ver")
(UNSPEC_SQCADD90 "sqcadd90")
(UNSPEC_SQCADD270 "sqcadd270")
(UNSPEC_SQRDCMLAH "sqrdcmlah")
@@ -4001,6 +4090,15 @@ (define_int_attr min_elem_bits [(UNSPEC_RBIT "8")
(define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW")
(UNSPEC_WHILEWR "UNSPEC_WHILEWR")])
+(define_int_attr hv [(UNSPEC_SME_LD1_HOR "h")
+ (UNSPEC_SME_LD1_VER "v")
+ (UNSPEC_SME_READ_HOR "h")
+ (UNSPEC_SME_READ_VER "v")
+ (UNSPEC_SME_ST1_HOR "h")
+ (UNSPEC_SME_ST1_VER "v")
+ (UNSPEC_SME_WRITE_HOR "h")
+ (UNSPEC_SME_WRITE_VER "v")])
+
;; Iterators and attributes for fpcr fpsr getter setters
(define_int_iterator GET_FPSCR
@@ -168,11 +168,17 @@ (define_predicate "aarch64_split_add_offset_immediate"
(and (match_code "const_poly_int")
(match_test "aarch64_add_offset_temporaries (op) == 1")))
+(define_predicate "aarch64_addsvl_addspl_immediate"
+ (and (match_code "const")
+ (match_test "aarch64_addsvl_addspl_immediate_p (op)")))
+
(define_predicate "aarch64_pluslong_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_pluslong_immediate")
(and (match_test "TARGET_SVE")
- (match_operand 0 "aarch64_sve_plus_immediate"))))
+ (match_operand 0 "aarch64_sve_plus_immediate"))
+ (and (match_test "TARGET_SME")
+ (match_operand 0 "aarch64_addsvl_addspl_immediate"))))
(define_predicate "aarch64_pluslong_or_poly_operand"
(ior (match_operand 0 "aarch64_pluslong_operand")
@@ -63,6 +63,7 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \
$(srcdir)/config/aarch64/aarch64-sve-builtins.def \
$(srcdir)/config/aarch64/aarch64-sve-builtins-base.def \
$(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.def \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.def \
$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
$(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \
$(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
@@ -72,7 +73,8 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \
$(srcdir)/config/aarch64/aarch64-sve-builtins.h \
$(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
$(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \
- $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/aarch64/aarch64-sve-builtins.cc
@@ -113,6 +115,19 @@ aarch64-sve-builtins-sve2.o: \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc
+aarch64-sve-builtins-sme.o: \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \
+ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
+ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc
+
aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \
$(srcdir)/config/aarch64/iterators.md
$(SHELL) $(srcdir)/config/aarch64/geniterators.sh \
@@ -547,6 +547,12 @@ the following and their inverses no :samp:`{feature}` :
:samp:`sme`
Enable the Scalable Matrix Extension.
+:samp:`sme-i16i64`
+ Enable the FEAT_SME_I16I64 extension to SME.
+
+:samp:`sme-f64f64`
+ Enable the FEAT_SME_F64F64 extension to SME.
+
Feature ``crypto`` implies ``aes``, ``sha2``, and ``simd``,
which implies ``fp``.
Conversely, ``nofp`` implies ``nosimd``, which implies
new file mode 100644
@@ -0,0 +1,86 @@
+# Assembly-based regression-test driver for the SME ACLE
+# Copyright (C) 2009-2022 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>. */
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } {
+ return
+}
+
+# Load support procs.
+load_lib g++-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Force SME if we're not testing it already.
+if { [check_effective_target_aarch64_sme] } {
+ set sme_flags ""
+} else {
+ set sme_flags "-march=armv8.2-a+sme"
+}
+
+# Turn off any codegen tweaks by default that may affect expected assembly.
+# Tests relying on those should turn them on explicitly.
+set sme_flags "$sme_flags -mtune=generic -moverride=tune=none"
+
+global gcc_runtest_parallelize_limit_minor
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set old_limit_minor $gcc_runtest_parallelize_limit_minor
+ set gcc_runtest_parallelize_limit_minor 1
+}
+
+torture-init
+set-torture-options {
+ "-std=c++98 -O0 -g"
+ "-std=c++98 -O1 -g"
+ "-std=c++11 -O2 -g"
+ "-std=c++14 -O3 -g"
+ "-std=c++17 -Og -g"
+ "-std=c++2a -Os -g"
+ "-std=gnu++98 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps"
+ "-std=gnu++11 -Ofast -g"
+ "-std=gnu++17 -O3 -g"
+ "-std=gnu++2a -O0 -g"
+} {
+ "-DTEST_FULL"
+ "-DTEST_OVERLOADS"
+}
+
+# Main loop.
+set gcc_subdir [string replace $subdir 0 2 gcc]
+set files [glob -nocomplain $srcdir/$gcc_subdir/acle-asm/*.c]
+set save-dg-do-what-default ${dg-do-what-default}
+if { [check_effective_target_aarch64_asm_sme-i16i64_ok] } {
+ set dg-do-what-default assemble
+} else {
+ set dg-do-what-default compile
+}
+gcc-dg-runtest [lsort $files] "" "$sme_flags -fno-ipa-icf"
+set dg-do-what-default ${save-dg-do-what-default}
+
+torture-finish
+
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set gcc_runtest_parallelize_limit_minor $old_limit_minor
+}
+
+# All done.
+dg-finish
@@ -4,6 +4,6 @@
to be diagnosed. Any attempt to call the function before including
arm_sve.h will lead to a link failure. (Same for taking its address,
etc.) */
-extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t);
+extern __attribute__((arm_preserves_za)) __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t);
#pragma GCC aarch64 "arm_sve.h"
@@ -1,6 +1,6 @@
/* { dg-do compile } */
-__SVUint8_t
+__SVUint8_t __attribute__((arm_preserves_za))
svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y)
{
return x;
@@ -1,6 +1,6 @@
/* { dg-do compile } */
-__SVUint8_t
+__SVUint8_t __attribute__((arm_preserves_za))
svadd_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y)
{
return x;
new file mode 100644
@@ -0,0 +1,82 @@
+# Assembly-based regression-test driver for the SME ACLE
+# Copyright (C) 2009-2022 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>. */
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } {
+ return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Force SME if we're not testing it already.
+if { [check_effective_target_aarch64_sme] } {
+ set sme_flags ""
+} else {
+ set sme_flags "-march=armv8.2-a+sme"
+}
+
+# Turn off any codegen tweaks by default that may affect expected assembly.
+# Tests relying on those should turn them on explicitly.
+set sme_flags "$sme_flags -mtune=generic -moverride=tune=none"
+
+global gcc_runtest_parallelize_limit_minor
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set old_limit_minor $gcc_runtest_parallelize_limit_minor
+ set gcc_runtest_parallelize_limit_minor 1
+}
+
+torture-init
+set-torture-options {
+ "-std=c90 -O0 -g"
+ "-std=c90 -O1 -g"
+ "-std=c99 -O2 -g"
+ "-std=c11 -O3 -g"
+ "-std=gnu90 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps"
+ "-std=gnu99 -Ofast -g"
+ "-std=gnu11 -Os -g"
+} {
+ "-DTEST_FULL"
+ "-DTEST_OVERLOADS"
+}
+
+# Main loop.
+set files [glob -nocomplain $srcdir/$subdir/acle-asm/*.c]
+set save-dg-do-what-default ${dg-do-what-default}
+if { [check_effective_target_aarch64_asm_sme-i16i64_ok] } {
+ set dg-do-what-default assemble
+} else {
+ set dg-do-what-default compile
+}
+gcc-dg-runtest [lsort $files] "" "$sme_flags -fno-ipa-icf"
+set dg-do-what-default ${save-dg-do-what-default}
+
+torture-finish
+
+if { [info exists gcc_runtest_parallelize_limit_minor] } {
+ set gcc_runtest_parallelize_limit_minor $old_limit_minor
+}
+
+# All done.
+dg-finish
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** addha_za32_s32_0_p0_p1_z0:
+** addha za0\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za32_s32_0_p0_p1_z0, svint32_t,
+ svaddha_za32_s32_m (0, p0, p1, z0),
+ svaddha_za32_m (0, p0, p1, z0))
+
+/*
+** addha_za32_s32_0_p1_p0_z1:
+** addha za0\.s, p1/m, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za32_s32_0_p1_p0_z1, svint32_t,
+ svaddha_za32_s32_m (0, p1, p0, z1),
+ svaddha_za32_m (0, p1, p0, z1))
+
+/*
+** addha_za32_s32_1_p0_p1_z0:
+** addha za1\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za32_s32_1_p0_p1_z0, svint32_t,
+ svaddha_za32_s32_m (1, p0, p1, z0),
+ svaddha_za32_m (1, p0, p1, z0))
+
+/*
+** addha_za32_s32_3_p0_p1_z0:
+** addha za3\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za32_s32_3_p0_p1_z0, svint32_t,
+ svaddha_za32_s32_m (3, p0, p1, z0),
+ svaddha_za32_m (3, p0, p1, z0))
+
+/*
+** addha_za32_u32_0_p0_p1_z0:
+** addha za0\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za32_u32_0_p0_p1_z0, svuint32_t,
+ svaddha_za32_u32_m (0, p0, p1, z0),
+ svaddha_za32_m (0, p0, p1, z0))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** addha_za64_s64_0_p0_p1_z0:
+** addha za0\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za64_s64_0_p0_p1_z0, svint64_t,
+ svaddha_za64_s64_m (0, p0, p1, z0),
+ svaddha_za64_m (0, p0, p1, z0))
+
+/*
+** addha_za64_s64_0_p1_p0_z1:
+** addha za0\.d, p1/m, p0/m, z1\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za64_s64_0_p1_p0_z1, svint64_t,
+ svaddha_za64_s64_m (0, p1, p0, z1),
+ svaddha_za64_m (0, p1, p0, z1))
+
+/*
+** addha_za64_s64_1_p0_p1_z0:
+** addha za1\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za64_s64_1_p0_p1_z0, svint64_t,
+ svaddha_za64_s64_m (1, p0, p1, z0),
+ svaddha_za64_m (1, p0, p1, z0))
+
+/*
+** addha_za64_s64_7_p0_p1_z0:
+** addha za7\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za64_s64_7_p0_p1_z0, svint64_t,
+ svaddha_za64_s64_m (7, p0, p1, z0),
+ svaddha_za64_m (7, p0, p1, z0))
+
+/*
+** addha_za64_u64_0_p0_p1_z0:
+** addha za0\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addha_za64_u64_0_p0_p1_z0, svuint64_t,
+ svaddha_za64_u64_m (0, p0, p1, z0),
+ svaddha_za64_m (0, p0, p1, z0))
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** addva_za32_s32_0_p0_p1_z0:
+** addva za0\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za32_s32_0_p0_p1_z0, svint32_t,
+ svaddva_za32_s32_m (0, p0, p1, z0),
+ svaddva_za32_m (0, p0, p1, z0))
+
+/*
+** addva_za32_s32_0_p1_p0_z1:
+** addva za0\.s, p1/m, p0/m, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za32_s32_0_p1_p0_z1, svint32_t,
+ svaddva_za32_s32_m (0, p1, p0, z1),
+ svaddva_za32_m (0, p1, p0, z1))
+
+/*
+** addva_za32_s32_1_p0_p1_z0:
+** addva za1\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za32_s32_1_p0_p1_z0, svint32_t,
+ svaddva_za32_s32_m (1, p0, p1, z0),
+ svaddva_za32_m (1, p0, p1, z0))
+
+/*
+** addva_za32_s32_3_p0_p1_z0:
+** addva za3\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za32_s32_3_p0_p1_z0, svint32_t,
+ svaddva_za32_s32_m (3, p0, p1, z0),
+ svaddva_za32_m (3, p0, p1, z0))
+
+/*
+** addva_za32_u32_0_p0_p1_z0:
+** addva za0\.s, p0/m, p1/m, z0\.s
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za32_u32_0_p0_p1_z0, svuint32_t,
+ svaddva_za32_u32_m (0, p0, p1, z0),
+ svaddva_za32_m (0, p0, p1, z0))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** addva_za64_s64_0_p0_p1_z0:
+** addva za0\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za64_s64_0_p0_p1_z0, svint64_t,
+ svaddva_za64_s64_m (0, p0, p1, z0),
+ svaddva_za64_m (0, p0, p1, z0))
+
+/*
+** addva_za64_s64_0_p1_p0_z1:
+** addva za0\.d, p1/m, p0/m, z1\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za64_s64_0_p1_p0_z1, svint64_t,
+ svaddva_za64_s64_m (0, p1, p0, z1),
+ svaddva_za64_m (0, p1, p0, z1))
+
+/*
+** addva_za64_s64_1_p0_p1_z0:
+** addva za1\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za64_s64_1_p0_p1_z0, svint64_t,
+ svaddva_za64_s64_m (1, p0, p1, z0),
+ svaddva_za64_m (1, p0, p1, z0))
+
+/*
+** addva_za64_s64_7_p0_p1_z0:
+** addva za7\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za64_s64_7_p0_p1_z0, svint64_t,
+ svaddva_za64_s64_m (7, p0, p1, z0),
+ svaddva_za64_m (7, p0, p1, z0))
+
+/*
+** addva_za64_u64_0_p0_p1_z0:
+** addva za0\.d, p0/m, p1/m, z0\.d
+** ret
+*/
+TEST_UNIFORM_ZA (addva_za64_u64_0_p0_p1_z0, svuint64_t,
+ svaddva_za64_u64_m (0, p0, p1, z0),
+ svaddva_za64_m (0, p0, p1, z0))
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+#pragma GCC target "+nosme"
+
+/*
+** test_nosme:
+** ...
+** bl __arm_sme_state
+** lsr x0, x0, #?63
+** ...
+*/
+PROTO (test_nosme, int, ()) { return __arm_has_sme (); }
+
+#pragma GCC target "+sme"
+
+/*
+** test_sme:
+** mov w0, #?1
+** ret
+*/
+PROTO (test_sme, int, ()) { return __arm_has_sme (); }
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define NON_STREAMING
+#include "test_sme_acle.h"
+
+/*
+** test_sme:
+** mov w0, #?0
+** ret
+*/
+PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); }
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** test_sme:
+** mov w0, #?1
+** ret
+*/
+PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); }
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+#pragma GCC target "+nosme"
+
+/*
+** test_nosme:
+** ...
+** bl __arm_sme_state
+** and w0, w0, #?1
+** ...
+*/
+PROTO (test_nosme, int, ()) { return __arm_in_streaming_mode (); }
+
+#pragma GCC target "+sme"
+
+/*
+** test_sme:
+** mrs x([0-9]+), svcr
+** and w0, w\1, #?1
+** ret
+*/
+PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); }
new file mode 100644
@@ -0,0 +1,310 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cntb_1:
+** cntb x0
+** ret
+*/
+PROTO (cntb_1, uint64_t, ()) { return svcntsb (); }
+
+/*
+** cntb_2:
+** cntb x0, all, mul #2
+** ret
+*/
+PROTO (cntb_2, uint64_t, ()) { return svcntsb () * 2; }
+
+/*
+** cntb_3:
+** cntb x0, all, mul #3
+** ret
+*/
+PROTO (cntb_3, uint64_t, ()) { return svcntsb () * 3; }
+
+/*
+** cntb_4:
+** cntb x0, all, mul #4
+** ret
+*/
+PROTO (cntb_4, uint64_t, ()) { return svcntsb () * 4; }
+
+/*
+** cntb_8:
+** cntb x0, all, mul #8
+** ret
+*/
+PROTO (cntb_8, uint64_t, ()) { return svcntsb () * 8; }
+
+/*
+** cntb_15:
+** cntb x0, all, mul #15
+** ret
+*/
+PROTO (cntb_15, uint64_t, ()) { return svcntsb () * 15; }
+
+/*
+** cntb_16:
+** cntb x0, all, mul #16
+** ret
+*/
+PROTO (cntb_16, uint64_t, ()) { return svcntsb () * 16; }
+
+/*
+** cntb_17:
+** rdvl x0, #17
+** ret
+*/
+PROTO (cntb_17, uint64_t, ()) { return svcntsb () * 17; }
+
+/*
+** cntb_31:
+** rdvl x0, #31
+** ret
+*/
+PROTO (cntb_31, uint64_t, ()) { return svcntsb () * 31; }
+
+/*
+** cntb_32:
+** cntb (x[0-9]+)
+** lsl x0, \1, 5
+** ret
+*/
+PROTO (cntb_32, uint64_t, ()) { return svcntsb () * 32; }
+
+/* Other sequences would be OK. */
+/*
+** cntb_33:
+** cntb (x[0-9]+)
+** lsl x0, \1, 5
+** incb x0
+** ret
+*/
+PROTO (cntb_33, uint64_t, ()) { return svcntsb () * 33; }
+
+/*
+** cntb_64:
+** cntb (x[0-9]+)
+** lsl x0, \1, 6
+** ret
+*/
+PROTO (cntb_64, uint64_t, ()) { return svcntsb () * 64; }
+
+/*
+** cntb_128:
+** cntb (x[0-9]+)
+** lsl x0, \1, 7
+** ret
+*/
+PROTO (cntb_128, uint64_t, ()) { return svcntsb () * 128; }
+
+/* Other sequences would be OK. */
+/*
+** cntb_129:
+** cntb (x[0-9]+)
+** lsl x0, \1, 7
+** incb x0
+** ret
+*/
+PROTO (cntb_129, uint64_t, ()) { return svcntsb () * 129; }
+
+/*
+** cntb_m1:
+** rdvl x0, #-1
+** ret
+*/
+PROTO (cntb_m1, uint64_t, ()) { return -svcntsb (); }
+
+/*
+** cntb_m13:
+** rdvl x0, #-13
+** ret
+*/
+PROTO (cntb_m13, uint64_t, ()) { return -svcntsb () * 13; }
+
+/*
+** cntb_m15:
+** rdvl x0, #-15
+** ret
+*/
+PROTO (cntb_m15, uint64_t, ()) { return -svcntsb () * 15; }
+
+/*
+** cntb_m16:
+** rdvl x0, #-16
+** ret
+*/
+PROTO (cntb_m16, uint64_t, ()) { return -svcntsb () * 16; }
+
+/*
+** cntb_m17:
+** rdvl x0, #-17
+** ret
+*/
+PROTO (cntb_m17, uint64_t, ()) { return -svcntsb () * 17; }
+
+/*
+** cntb_m32:
+** rdvl x0, #-32
+** ret
+*/
+PROTO (cntb_m32, uint64_t, ()) { return -svcntsb () * 32; }
+
+/*
+** cntb_m33:
+** rdvl x0, #-32
+** decb x0
+** ret
+*/
+PROTO (cntb_m33, uint64_t, ()) { return -svcntsb () * 33; }
+
+/*
+** cntb_m34:
+** rdvl (x[0-9]+), #-17
+** lsl x0, \1, #?1
+** ret
+*/
+PROTO (cntb_m34, uint64_t, ()) { return -svcntsb () * 34; }
+
+/*
+** cntb_m64:
+** rdvl (x[0-9]+), #-1
+** lsl x0, \1, #?6
+** ret
+*/
+PROTO (cntb_m64, uint64_t, ()) { return -svcntsb () * 64; }
+
+/*
+** incb_1:
+** incb x0
+** ret
+*/
+PROTO (incb_1, uint64_t, (uint64_t x0)) { return x0 + svcntsb (); }
+
+/*
+** incb_2:
+** incb x0, all, mul #2
+** ret
+*/
+PROTO (incb_2, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 2; }
+
+/*
+** incb_3:
+** incb x0, all, mul #3
+** ret
+*/
+PROTO (incb_3, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 3; }
+
+/*
+** incb_4:
+** incb x0, all, mul #4
+** ret
+*/
+PROTO (incb_4, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 4; }
+
+/*
+** incb_8:
+** incb x0, all, mul #8
+** ret
+*/
+PROTO (incb_8, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 8; }
+
+/*
+** incb_15:
+** incb x0, all, mul #15
+** ret
+*/
+PROTO (incb_15, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 15; }
+
+/*
+** incb_16:
+** incb x0, all, mul #16
+** ret
+*/
+PROTO (incb_16, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 16; }
+
+/*
+** incb_17:
+** addvl x0, x0, #17
+** ret
+*/
+PROTO (incb_17, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 17; }
+
+/*
+** incb_31:
+** addvl x0, x0, #31
+** ret
+*/
+PROTO (incb_31, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 31; }
+
+/*
+** decb_1:
+** decb x0
+** ret
+*/
+PROTO (decb_1, uint64_t, (uint64_t x0)) { return x0 - svcntsb (); }
+
+/*
+** decb_2:
+** decb x0, all, mul #2
+** ret
+*/
+PROTO (decb_2, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 2; }
+
+/*
+** decb_3:
+** decb x0, all, mul #3
+** ret
+*/
+PROTO (decb_3, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 3; }
+
+/*
+** decb_4:
+** decb x0, all, mul #4
+** ret
+*/
+PROTO (decb_4, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 4; }
+
+/*
+** decb_8:
+** decb x0, all, mul #8
+** ret
+*/
+PROTO (decb_8, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 8; }
+
+/*
+** decb_15:
+** decb x0, all, mul #15
+** ret
+*/
+PROTO (decb_15, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 15; }
+
+/*
+** decb_16:
+** decb x0, all, mul #16
+** ret
+*/
+PROTO (decb_16, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 16; }
+
+/*
+** decb_17:
+** addvl x0, x0, #-17
+** ret
+*/
+PROTO (decb_17, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 17; }
+
+/*
+** decb_31:
+** addvl x0, x0, #-31
+** ret
+*/
+PROTO (decb_31, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 31; }
+
+/*
+** decb_32:
+** addvl x0, x0, #-32
+** ret
+*/
+PROTO (decb_32, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 32; }
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cntsb:
+** rdsvl x0, #1
+** ret
+*/
+PROTO (cntsb, uint64_t, ()) { return svcntsb (); }
new file mode 100644
@@ -0,0 +1,277 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cntd_1:
+** cntd x0
+** ret
+*/
+PROTO (cntd_1, uint64_t, ()) { return svcntsd (); }
+
+/*
+** cntd_2:
+** cntw x0
+** ret
+*/
+PROTO (cntd_2, uint64_t, ()) { return svcntsd () * 2; }
+
+/*
+** cntd_3:
+** cntd x0, all, mul #3
+** ret
+*/
+PROTO (cntd_3, uint64_t, ()) { return svcntsd () * 3; }
+
+/*
+** cntd_4:
+** cnth x0
+** ret
+*/
+PROTO (cntd_4, uint64_t, ()) { return svcntsd () * 4; }
+
+/*
+** cntd_8:
+** cntb x0
+** ret
+*/
+PROTO (cntd_8, uint64_t, ()) { return svcntsd () * 8; }
+
+/*
+** cntd_15:
+** cntd x0, all, mul #15
+** ret
+*/
+PROTO (cntd_15, uint64_t, ()) { return svcntsd () * 15; }
+
+/*
+** cntd_16:
+** cntb x0, all, mul #2
+** ret
+*/
+PROTO (cntd_16, uint64_t, ()) { return svcntsd () * 16; }
+
+/* Other sequences would be OK. */
+/*
+** cntd_17:
+** rdvl (x[0-9]+), #17
+** asr x0, \1, 3
+** ret
+*/
+PROTO (cntd_17, uint64_t, ()) { return svcntsd () * 17; }
+
+/*
+** cntd_32:
+** cntb x0, all, mul #4
+** ret
+*/
+PROTO (cntd_32, uint64_t, ()) { return svcntsd () * 32; }
+
+/*
+** cntd_64:
+** cntb x0, all, mul #8
+** ret
+*/
+PROTO (cntd_64, uint64_t, ()) { return svcntsd () * 64; }
+
+/*
+** cntd_128:
+** cntb x0, all, mul #16
+** ret
+*/
+PROTO (cntd_128, uint64_t, ()) { return svcntsd () * 128; }
+
+/*
+** cntd_m1:
+** cntd (x[0-9]+)
+** neg x0, \1
+** ret
+*/
+PROTO (cntd_m1, uint64_t, ()) { return -svcntsd (); }
+
+/*
+** cntd_m13:
+** cntd (x[0-9]+), all, mul #13
+** neg x0, \1
+** ret
+*/
+PROTO (cntd_m13, uint64_t, ()) { return -svcntsd () * 13; }
+
+/*
+** cntd_m15:
+** cntd (x[0-9]+), all, mul #15
+** neg x0, \1
+** ret
+*/
+PROTO (cntd_m15, uint64_t, ()) { return -svcntsd () * 15; }
+
+/*
+** cntd_m16:
+** rdvl x0, #-2
+** ret
+*/
+PROTO (cntd_m16, uint64_t, ()) { return -svcntsd () * 16; }
+
+/* Other sequences would be OK. */
+/*
+** cntd_m17:
+** rdvl (x[0-9]+), #-17
+** asr x0, \1, 3
+** ret
+*/
+PROTO (cntd_m17, uint64_t, ()) { return -svcntsd () * 17; }
+
+/*
+** incd_1:
+** incd x0
+** ret
+*/
+PROTO (incd_1, uint64_t, (uint64_t x0)) { return x0 + svcntsd (); }
+
+/*
+** incd_2:
+** incw x0
+** ret
+*/
+PROTO (incd_2, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 2; }
+
+/*
+** incd_3:
+** incd x0, all, mul #3
+** ret
+*/
+PROTO (incd_3, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 3; }
+
+/*
+** incd_4:
+** inch x0
+** ret
+*/
+PROTO (incd_4, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 4; }
+
+/*
+** incd_7:
+** incd x0, all, mul #7
+** ret
+*/
+PROTO (incd_7, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 7; }
+
+/*
+** incd_8:
+** incb x0
+** ret
+*/
+PROTO (incd_8, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 8; }
+
+/*
+** incd_9:
+** incd x0, all, mul #9
+** ret
+*/
+PROTO (incd_9, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 9; }
+
+/*
+** incd_15:
+** incd x0, all, mul #15
+** ret
+*/
+PROTO (incd_15, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 15; }
+
+/*
+** incd_16:
+** incb x0, all, mul #2
+** ret
+*/
+PROTO (incd_16, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 16; }
+
+/*
+** incd_18:
+** incw x0, all, mul #9
+** ret
+*/
+PROTO (incd_18, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 18; }
+
+/*
+** incd_30:
+** incw x0, all, mul #15
+** ret
+*/
+PROTO (incd_30, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 30; }
+
+/*
+** decd_1:
+** decd x0
+** ret
+*/
+PROTO (decd_1, uint64_t, (uint64_t x0)) { return x0 - svcntsd (); }
+
+/*
+** decd_2:
+** decw x0
+** ret
+*/
+PROTO (decd_2, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 2; }
+
+/*
+** decd_3:
+** decd x0, all, mul #3
+** ret
+*/
+PROTO (decd_3, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 3; }
+
+/*
+** decd_4:
+** dech x0
+** ret
+*/
+PROTO (decd_4, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 4; }
+
+/*
+** decd_7:
+** decd x0, all, mul #7
+** ret
+*/
+PROTO (decd_7, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 7; }
+
+/*
+** decd_8:
+** decb x0
+** ret
+*/
+PROTO (decd_8, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 8; }
+
+/*
+** decd_9:
+** decd x0, all, mul #9
+** ret
+*/
+PROTO (decd_9, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 9; }
+
+/*
+** decd_15:
+** decd x0, all, mul #15
+** ret
+*/
+PROTO (decd_15, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 15; }
+
+/*
+** decd_16:
+** decb x0, all, mul #2
+** ret
+*/
+PROTO (decd_16, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 16; }
+
+/*
+** decd_18:
+** decw x0, all, mul #9
+** ret
+*/
+PROTO (decd_18, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 18; }
+
+/*
+** decd_30:
+** decw x0, all, mul #15
+** ret
+*/
+PROTO (decd_30, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 30; }
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cntsd:
+** rdsvl (x[0-9])+, #1
+** lsr x0, \1, #?3
+** ret
+*/
+PROTO (cntsd, uint64_t, ()) { return svcntsd (); }
new file mode 100644
@@ -0,0 +1,279 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cnth_1:
+** cnth x0
+** ret
+*/
+PROTO (cnth_1, uint64_t, ()) { return svcntsh (); }
+
+/*
+** cnth_2:
+** cntb x0
+** ret
+*/
+PROTO (cnth_2, uint64_t, ()) { return svcntsh () * 2; }
+
+/*
+** cnth_3:
+** cnth x0, all, mul #3
+** ret
+*/
+PROTO (cnth_3, uint64_t, ()) { return svcntsh () * 3; }
+
+/*
+** cnth_4:
+** cntb x0, all, mul #2
+** ret
+*/
+PROTO (cnth_4, uint64_t, ()) { return svcntsh () * 4; }
+
+/*
+** cnth_8:
+** cntb x0, all, mul #4
+** ret
+*/
+PROTO (cnth_8, uint64_t, ()) { return svcntsh () * 8; }
+
+/*
+** cnth_15:
+** cnth x0, all, mul #15
+** ret
+*/
+PROTO (cnth_15, uint64_t, ()) { return svcntsh () * 15; }
+
+/*
+** cnth_16:
+** cntb x0, all, mul #8
+** ret
+*/
+PROTO (cnth_16, uint64_t, ()) { return svcntsh () * 16; }
+
+/* Other sequences would be OK. */
+/*
+** cnth_17:
+** rdvl (x[0-9]+), #17
+** asr x0, \1, 1
+** ret
+*/
+PROTO (cnth_17, uint64_t, ()) { return svcntsh () * 17; }
+
+/*
+** cnth_32:
+** cntb x0, all, mul #16
+** ret
+*/
+PROTO (cnth_32, uint64_t, ()) { return svcntsh () * 32; }
+
+/*
+** cnth_64:
+** cntb (x[0-9]+)
+** lsl x0, \1, 5
+** ret
+*/
+PROTO (cnth_64, uint64_t, ()) { return svcntsh () * 64; }
+
+/*
+** cnth_128:
+** cntb (x[0-9]+)
+** lsl x0, \1, 6
+** ret
+*/
+PROTO (cnth_128, uint64_t, ()) { return svcntsh () * 128; }
+
+/*
+** cnth_m1:
+** cnth (x[0-9]+)
+** neg x0, \1
+** ret
+*/
+PROTO (cnth_m1, uint64_t, ()) { return -svcntsh (); }
+
+/*
+** cnth_m13:
+** cnth (x[0-9]+), all, mul #13
+** neg x0, \1
+** ret
+*/
+PROTO (cnth_m13, uint64_t, ()) { return -svcntsh () * 13; }
+
+/*
+** cnth_m15:
+** cnth (x[0-9]+), all, mul #15
+** neg x0, \1
+** ret
+*/
+PROTO (cnth_m15, uint64_t, ()) { return -svcntsh () * 15; }
+
+/*
+** cnth_m16:
+** rdvl x0, #-8
+** ret
+*/
+PROTO (cnth_m16, uint64_t, ()) { return -svcntsh () * 16; }
+
+/* Other sequences would be OK. */
+/*
+** cnth_m17:
+** rdvl (x[0-9]+), #-17
+** asr x0, \1, 1
+** ret
+*/
+PROTO (cnth_m17, uint64_t, ()) { return -svcntsh () * 17; }
+
+/*
+** inch_1:
+** inch x0
+** ret
+*/
+PROTO (inch_1, uint64_t, (uint64_t x0)) { return x0 + svcntsh (); }
+
+/*
+** inch_2:
+** incb x0
+** ret
+*/
+PROTO (inch_2, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 2; }
+
+/*
+** inch_3:
+** inch x0, all, mul #3
+** ret
+*/
+PROTO (inch_3, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 3; }
+
+/*
+** inch_4:
+** incb x0, all, mul #2
+** ret
+*/
+PROTO (inch_4, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 4; }
+
+/*
+** inch_7:
+** inch x0, all, mul #7
+** ret
+*/
+PROTO (inch_7, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 7; }
+
+/*
+** inch_8:
+** incb x0, all, mul #4
+** ret
+*/
+PROTO (inch_8, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 8; }
+
+/*
+** inch_9:
+** inch x0, all, mul #9
+** ret
+*/
+PROTO (inch_9, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 9; }
+
+/*
+** inch_15:
+** inch x0, all, mul #15
+** ret
+*/
+PROTO (inch_15, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 15; }
+
+/*
+** inch_16:
+** incb x0, all, mul #8
+** ret
+*/
+PROTO (inch_16, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 16; }
+
+/*
+** inch_18:
+** incb x0, all, mul #9
+** ret
+*/
+PROTO (inch_18, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 18; }
+
+/*
+** inch_30:
+** incb x0, all, mul #15
+** ret
+*/
+PROTO (inch_30, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 30; }
+
+/*
+** dech_1:
+** dech x0
+** ret
+*/
+PROTO (dech_1, uint64_t, (uint64_t x0)) { return x0 - svcntsh (); }
+
+/*
+** dech_2:
+** decb x0
+** ret
+*/
+PROTO (dech_2, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 2; }
+
+/*
+** dech_3:
+** dech x0, all, mul #3
+** ret
+*/
+PROTO (dech_3, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 3; }
+
+/*
+** dech_4:
+** decb x0, all, mul #2
+** ret
+*/
+PROTO (dech_4, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 4; }
+
+/*
+** dech_7:
+** dech x0, all, mul #7
+** ret
+*/
+PROTO (dech_7, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 7; }
+
+/*
+** dech_8:
+** decb x0, all, mul #4
+** ret
+*/
+PROTO (dech_8, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 8; }
+
+/*
+** dech_9:
+** dech x0, all, mul #9
+** ret
+*/
+PROTO (dech_9, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 9; }
+
+/*
+** dech_15:
+** dech x0, all, mul #15
+** ret
+*/
+PROTO (dech_15, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 15; }
+
+/*
+** dech_16:
+** decb x0, all, mul #8
+** ret
+*/
+PROTO (dech_16, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 16; }
+
+/*
+** dech_18:
+** decb x0, all, mul #9
+** ret
+*/
+PROTO (dech_18, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 18; }
+
+/*
+** dech_30:
+** decb x0, all, mul #15
+** ret
+*/
+PROTO (dech_30, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 30; }
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cntsh:
+** rdsvl (x[0-9])+, #1
+** lsr x0, \1, #?1
+** ret
+*/
+PROTO (cntsh, uint64_t, ()) { return svcntsh (); }
new file mode 100644
@@ -0,0 +1,278 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cntw_1:
+** cntw x0
+** ret
+*/
+PROTO (cntw_1, uint64_t, ()) { return svcntsw (); }
+
+/*
+** cntw_2:
+** cnth x0
+** ret
+*/
+PROTO (cntw_2, uint64_t, ()) { return svcntsw () * 2; }
+
+/*
+** cntw_3:
+** cntw x0, all, mul #3
+** ret
+*/
+PROTO (cntw_3, uint64_t, ()) { return svcntsw () * 3; }
+
+/*
+** cntw_4:
+** cntb x0
+** ret
+*/
+PROTO (cntw_4, uint64_t, ()) { return svcntsw () * 4; }
+
+/*
+** cntw_8:
+** cntb x0, all, mul #2
+** ret
+*/
+PROTO (cntw_8, uint64_t, ()) { return svcntsw () * 8; }
+
+/*
+** cntw_15:
+** cntw x0, all, mul #15
+** ret
+*/
+PROTO (cntw_15, uint64_t, ()) { return svcntsw () * 15; }
+
+/*
+** cntw_16:
+** cntb x0, all, mul #4
+** ret
+*/
+PROTO (cntw_16, uint64_t, ()) { return svcntsw () * 16; }
+
+/* Other sequences would be OK. */
+/*
+** cntw_17:
+** rdvl (x[0-9]+), #17
+** asr x0, \1, 2
+** ret
+*/
+PROTO (cntw_17, uint64_t, ()) { return svcntsw () * 17; }
+
+/*
+** cntw_32:
+** cntb x0, all, mul #8
+** ret
+*/
+PROTO (cntw_32, uint64_t, ()) { return svcntsw () * 32; }
+
+/*
+** cntw_64:
+** cntb x0, all, mul #16
+** ret
+*/
+PROTO (cntw_64, uint64_t, ()) { return svcntsw () * 64; }
+
+/*
+** cntw_128:
+** cntb (x[0-9]+)
+** lsl x0, \1, 5
+** ret
+*/
+PROTO (cntw_128, uint64_t, ()) { return svcntsw () * 128; }
+
+/*
+** cntw_m1:
+** cntw (x[0-9]+)
+** neg x0, \1
+** ret
+*/
+PROTO (cntw_m1, uint64_t, ()) { return -svcntsw (); }
+
+/*
+** cntw_m13:
+** cntw (x[0-9]+), all, mul #13
+** neg x0, \1
+** ret
+*/
+PROTO (cntw_m13, uint64_t, ()) { return -svcntsw () * 13; }
+
+/*
+** cntw_m15:
+** cntw (x[0-9]+), all, mul #15
+** neg x0, \1
+** ret
+*/
+PROTO (cntw_m15, uint64_t, ()) { return -svcntsw () * 15; }
+
+/*
+** cntw_m16:
+** rdvl (x[0-9]+), #-4
+** ret
+*/
+PROTO (cntw_m16, uint64_t, ()) { return -svcntsw () * 16; }
+
+/* Other sequences would be OK. */
+/*
+** cntw_m17:
+** rdvl (x[0-9]+), #-17
+** asr x0, \1, 2
+** ret
+*/
+PROTO (cntw_m17, uint64_t, ()) { return -svcntsw () * 17; }
+
+/*
+** incw_1:
+** incw x0
+** ret
+*/
+PROTO (incw_1, uint64_t, (uint64_t x0)) { return x0 + svcntsw (); }
+
+/*
+** incw_2:
+** inch x0
+** ret
+*/
+PROTO (incw_2, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 2; }
+
+/*
+** incw_3:
+** incw x0, all, mul #3
+** ret
+*/
+PROTO (incw_3, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 3; }
+
+/*
+** incw_4:
+** incb x0
+** ret
+*/
+PROTO (incw_4, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 4; }
+
+/*
+** incw_7:
+** incw x0, all, mul #7
+** ret
+*/
+PROTO (incw_7, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 7; }
+
+/*
+** incw_8:
+** incb x0, all, mul #2
+** ret
+*/
+PROTO (incw_8, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 8; }
+
+/*
+** incw_9:
+** incw x0, all, mul #9
+** ret
+*/
+PROTO (incw_9, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 9; }
+
+/*
+** incw_15:
+** incw x0, all, mul #15
+** ret
+*/
+PROTO (incw_15, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 15; }
+
+/*
+** incw_16:
+** incb x0, all, mul #4
+** ret
+*/
+PROTO (incw_16, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 16; }
+
+/*
+** incw_18:
+** inch x0, all, mul #9
+** ret
+*/
+PROTO (incw_18, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 18; }
+
+/*
+** incw_30:
+** inch x0, all, mul #15
+** ret
+*/
+PROTO (incw_30, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 30; }
+
+/*
+** decw_1:
+** decw x0
+** ret
+*/
+PROTO (decw_1, uint64_t, (uint64_t x0)) { return x0 - svcntsw (); }
+
+/*
+** decw_2:
+** dech x0
+** ret
+*/
+PROTO (decw_2, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 2; }
+
+/*
+** decw_3:
+** decw x0, all, mul #3
+** ret
+*/
+PROTO (decw_3, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 3; }
+
+/*
+** decw_4:
+** decb x0
+** ret
+*/
+PROTO (decw_4, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 4; }
+
+/*
+** decw_7:
+** decw x0, all, mul #7
+** ret
+*/
+PROTO (decw_7, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 7; }
+
+/*
+** decw_8:
+** decb x0, all, mul #2
+** ret
+*/
+PROTO (decw_8, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 8; }
+
+/*
+** decw_9:
+** decw x0, all, mul #9
+** ret
+*/
+PROTO (decw_9, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 9; }
+
+/*
+** decw_15:
+** decw x0, all, mul #15
+** ret
+*/
+PROTO (decw_15, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 15; }
+
+/*
+** decw_16:
+** decb x0, all, mul #4
+** ret
+*/
+PROTO (decw_16, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 16; }
+
+/*
+** decw_18:
+** dech x0, all, mul #9
+** ret
+*/
+PROTO (decw_18, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 18; }
+
+/*
+** decw_30:
+** dech x0, all, mul #15
+** ret
+*/
+PROTO (decw_30, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 30; }
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#define NO_SHARED_ZA
+#include "test_sme_acle.h"
+
+/*
+** cntsw:
+** rdsvl (x[0-9])+, #1
+** lsr x0, \1, #?2
+** ret
+*/
+PROTO (cntsw, uint64_t, ()) { return svcntsw (); }
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_vnum_za128_0_0:
+** mov (w1[2-5]), w0
+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za128_0_0,
+ svld1_hor_vnum_za128 (0, w0, p0, x1, 0),
+ svld1_hor_vnum_za128 (0, w0, p0, x1, 0))
+
+/*
+** ld1_vnum_za128_5_0:
+** incb x1, all, mul #13
+** mov (w1[2-5]), w0
+** ld1q { za5h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za128_5_0,
+ svld1_hor_vnum_za128 (5, w0, p0, x1, 13),
+ svld1_hor_vnum_za128 (5, w0, p0, x1, 13))
+
+/*
+** ld1_vnum_za128_11_0:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** ld1q { za11h\.q\[\3, 0\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za128_11_0,
+ svld1_hor_vnum_za128 (11, w0, p0, x1, x2),
+ svld1_hor_vnum_za128 (11, w0, p0, x1, x2))
+
+/*
+** ld1_vnum_za128_0_1:
+** add (w1[2-5]), w0, #?1
+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za128_0_1,
+ svld1_hor_vnum_za128 (0, w0 + 1, p0, x1, 0),
+ svld1_hor_vnum_za128 (0, w0 + 1, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_vnum_za16_0_0:
+** mov (w1[2-5]), w0
+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za16_0_0,
+ svld1_hor_vnum_za16 (0, w0, p0, x1, 0),
+ svld1_hor_vnum_za16 (0, w0, p0, x1, 0))
+
+/*
+** ld1_vnum_za16_0_1:
+** incb x1, all, mul #9
+** mov (w1[2-5]), w0
+** ld1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za16_0_1,
+ svld1_hor_vnum_za16 (0, w0 + 1, p0, x1, 9),
+ svld1_hor_vnum_za16 (0, w0 + 1, p0, x1, 9))
+
+/*
+** ld1_vnum_za16_1_7:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** ld1h { za1h\.h\[\3, 7\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za16_1_7,
+ svld1_hor_vnum_za16 (1, w0 + 7, p0, x1, x2),
+ svld1_hor_vnum_za16 (1, w0 + 7, p0, x1, x2))
+
+/*
+** ld1_vnum_za16_0_8:
+** add (w1[2-5]), w0, #?8
+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za16_0_8,
+ svld1_hor_vnum_za16 (0, w0 + 8, p0, x1, 0),
+ svld1_hor_vnum_za16 (0, w0 + 8, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_vnum_za32_0_0:
+** mov (w1[2-5]), w0
+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za32_0_0,
+ svld1_hor_vnum_za32 (0, w0, p0, x1, 0),
+ svld1_hor_vnum_za32 (0, w0, p0, x1, 0))
+
+/*
+** ld1_vnum_za32_0_1:
+** incb x1, all, mul #5
+** mov (w1[2-5]), w0
+** ld1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za32_0_1,
+ svld1_hor_vnum_za32 (0, w0 + 1, p0, x1, 5),
+ svld1_hor_vnum_za32 (0, w0 + 1, p0, x1, 5))
+
+/*
+** ld1_vnum_za32_2_3:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** ld1w { za2h\.s\[\3, 3\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za32_2_3,
+ svld1_hor_vnum_za32 (2, w0 + 3, p0, x1, x2),
+ svld1_hor_vnum_za32 (2, w0 + 3, p0, x1, x2))
+
+/*
+** ld1_vnum_za32_0_4:
+** add (w1[2-5]), w0, #?4
+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za32_0_4,
+ svld1_hor_vnum_za32 (0, w0 + 4, p0, x1, 0),
+ svld1_hor_vnum_za32 (0, w0 + 4, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_vnum_za64_0_0:
+** mov (w1[2-5]), w0
+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za64_0_0,
+ svld1_hor_vnum_za64 (0, w0, p0, x1, 0),
+ svld1_hor_vnum_za64 (0, w0, p0, x1, 0))
+
+/*
+** ld1_vnum_za64_0_1:
+** incb x1, all, mul #13
+** mov (w1[2-5]), w0
+** ld1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za64_0_1,
+ svld1_hor_vnum_za64 (0, w0 + 1, p0, x1, 13),
+ svld1_hor_vnum_za64 (0, w0 + 1, p0, x1, 13))
+
+/*
+** ld1_vnum_za64_5_1:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** ld1d { za5h\.d\[\3, 1\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za64_5_1,
+ svld1_hor_vnum_za64 (5, w0 + 1, p0, x1, x2),
+ svld1_hor_vnum_za64 (5, w0 + 1, p0, x1, x2))
+
+/*
+** ld1_vnum_za64_0_2:
+** add (w1[2-5]), w0, #?2
+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za64_0_2,
+ svld1_hor_vnum_za64 (0, w0 + 2, p0, x1, 0),
+ svld1_hor_vnum_za64 (0, w0 + 2, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_vnum_za8_0_0:
+** mov (w1[2-5]), w0
+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za8_0_0,
+ svld1_hor_vnum_za8 (0, w0, p0, x1, 0),
+ svld1_hor_vnum_za8 (0, w0, p0, x1, 0))
+
+/*
+** ld1_vnum_za8_0_1:
+** incb x1, all, mul #11
+** mov (w1[2-5]), w0
+** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za8_0_1,
+ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 11),
+ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 11))
+
+/*
+** ld1_vnum_za8_0_15:
+** cntb (x[0-9]+)
+** mul (x[0-9]+), (?:\1, x2|x2, \1)
+** mov (w1[2-5]), w0
+** ld1b { za0h\.b\[\3, 15\] }, p0/z, \[x1, \2\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za8_0_15,
+ svld1_hor_vnum_za8 (0, w0 + 15, p0, x1, x2),
+ svld1_hor_vnum_za8 (0, w0 + 15, p0, x1, x2))
+
+/*
+** ld1_vnum_za8_0_16:
+** add (w1[2-5]), w0, #?16
+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_vnum_za8_0_16,
+ svld1_hor_vnum_za8 (0, w0 + 16, p0, x1, 0),
+ svld1_hor_vnum_za8 (0, w0 + 16, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,63 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_za128_0_0:
+** mov (w1[2-5]), w0
+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za128_0_0,
+ svld1_hor_za128 (0, w0, p0, x1),
+ svld1_hor_za128 (0, w0, p0, x1))
+
+/*
+** ld1_za128_0_1:
+** add (w1[2-5]), w0, #?1
+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za128_0_1,
+ svld1_hor_za128 (0, w0 + 1, p0, x1),
+ svld1_hor_za128 (0, w0 + 1, p0, x1))
+
+/*
+** ld1_za128_7_0:
+** mov (w1[2-5]), w0
+** ld1q { za7h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za128_7_0,
+ svld1_hor_za128 (7, w0, p0, x1),
+ svld1_hor_za128 (7, w0, p0, x1))
+
+/*
+** ld1_za128_13_0:
+** mov (w1[2-5]), w0
+** ld1q { za13h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za128_13_0,
+ svld1_hor_za128 (13, w0, p0, x1),
+ svld1_hor_za128 (13, w0, p0, x1))
+
+/*
+** ld1_za128_15_0:
+** mov (w1[2-5]), w0
+** ld1q { za15h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za128_15_0,
+ svld1_hor_za128 (15, w0, p0, x1),
+ svld1_hor_za128 (15, w0, p0, x1))
+
+/*
+** ld1_za128_9_0_index:
+** mov (w1[2-5]), w0
+** ld1q { za9h\.q\[\1, 0\] }, p0/z, \[x1, x2, lsl #?4\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za128_9_0_index,
+ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16),
+ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16))
new file mode 100644
@@ -0,0 +1,94 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_za16_0_0:
+** mov (w1[2-5]), w0
+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_0_0,
+ svld1_hor_za16 (0, w0, p0, x1),
+ svld1_hor_za16 (0, w0, p0, x1))
+
+/*
+** ld1_za16_0_1:
+** mov (w1[2-5]), w0
+** ld1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_0_1,
+ svld1_hor_za16 (0, w0 + 1, p0, x1),
+ svld1_hor_za16 (0, w0 + 1, p0, x1))
+
+/*
+** ld1_za16_0_7:
+** mov (w1[2-5]), w0
+** ld1h { za0h\.h\[\1, 7\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_0_7,
+ svld1_hor_za16 (0, w0 + 7, p0, x1),
+ svld1_hor_za16 (0, w0 + 7, p0, x1))
+
+/*
+** ld1_za16_1_0:
+** mov (w1[2-5]), w0
+** ld1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_1_0,
+ svld1_hor_za16 (1, w0, p0, x1),
+ svld1_hor_za16 (1, w0, p0, x1))
+
+
+/*
+** ld1_za16_1_1:
+** mov (w1[2-5]), w0
+** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_1_1,
+ svld1_hor_za16 (1, w0 + 1, p0, x1),
+ svld1_hor_za16 (1, w0 + 1, p0, x1))
+
+/*
+** ld1_za16_1_7:
+** mov (w1[2-5]), w0
+** ld1h { za1h\.h\[\1, 7\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_1_7,
+ svld1_hor_za16 (1, w0 + 7, p0, x1),
+ svld1_hor_za16 (1, w0 + 7, p0, x1))
+
+/*
+** ld1_za16_1_5_index:
+** mov (w1[2-5]), w0
+** ld1h { za1h\.h\[\1, 5\] }, p0/z, \[x1, x2, lsl #?1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_1_5_index,
+ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2),
+ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2))
+
+/*
+** ld1_za16_0_8:
+** add (w1[2-5]), w0, #?8
+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_0_8,
+ svld1_hor_za16 (0, w0 + 8, p0, x1),
+ svld1_hor_za16 (0, w0 + 8, p0, x1))
+
+/*
+** ld1_za16_0_m1:
+** sub (w1[2-5]), w0, #?1
+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za16_0_m1,
+ svld1_hor_za16 (0, w0 - 1, p0, x1),
+ svld1_hor_za16 (0, w0 - 1, p0, x1))
new file mode 100644
@@ -0,0 +1,93 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_za32_0_0:
+** mov (w1[2-5]), w0
+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_0_0,
+ svld1_hor_za32 (0, w0, p0, x1),
+ svld1_hor_za32 (0, w0, p0, x1))
+
+/*
+** ld1_za32_0_1:
+** mov (w1[2-5]), w0
+** ld1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_0_1,
+ svld1_hor_za32 (0, w0 + 1, p0, x1),
+ svld1_hor_za32 (0, w0 + 1, p0, x1))
+
+/*
+** ld1_za32_0_3:
+** mov (w1[2-5]), w0
+** ld1w { za0h\.s\[\1, 3\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_0_3,
+ svld1_hor_za32 (0, w0 + 3, p0, x1),
+ svld1_hor_za32 (0, w0 + 3, p0, x1))
+
+/*
+** ld1_za32_3_0:
+** mov (w1[2-5]), w0
+** ld1w { za3h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_3_0,
+ svld1_hor_za32 (3, w0, p0, x1),
+ svld1_hor_za32 (3, w0, p0, x1))
+
+/*
+** ld1_za32_3_1:
+** mov (w1[2-5]), w0
+** ld1w { za3h\.s\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_3_1,
+ svld1_hor_za32 (3, w0 + 1, p0, x1),
+ svld1_hor_za32 (3, w0 + 1, p0, x1))
+
+/*
+** ld1_za32_3_3:
+** mov (w1[2-5]), w0
+** ld1w { za3h\.s\[\1, 3\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_3_3,
+ svld1_hor_za32 (3, w0 + 3, p0, x1),
+ svld1_hor_za32 (3, w0 + 3, p0, x1))
+
+/*
+** ld1_za32_1_2_index:
+** mov (w1[2-5]), w0
+** ld1w { za1h\.s\[\1, 2\] }, p0/z, \[x1, x2, lsl #?2\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_1_2_index,
+ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4),
+ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4))
+
+/*
+** ld1_za32_0_4:
+** add (w1[2-5]), w0, #?4
+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_0_4,
+ svld1_hor_za32 (0, w0 + 4, p0, x1),
+ svld1_hor_za32 (0, w0 + 4, p0, x1))
+
+/*
+** ld1_za32_0_m1:
+** sub (w1[2-5]), w0, #?1
+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za32_0_m1,
+ svld1_hor_za32 (0, w0 - 1, p0, x1),
+ svld1_hor_za32 (0, w0 - 1, p0, x1))
new file mode 100644
@@ -0,0 +1,73 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_za64_0_0:
+** mov (w1[2-5]), w0
+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za64_0_0,
+ svld1_hor_za64 (0, w0, p0, x1),
+ svld1_hor_za64 (0, w0, p0, x1))
+
+/*
+** ld1_za64_0_1:
+** mov (w1[2-5]), w0
+** ld1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za64_0_1,
+ svld1_hor_za64 (0, w0 + 1, p0, x1),
+ svld1_hor_za64 (0, w0 + 1, p0, x1))
+
+/*
+** ld1_za64_7_0:
+** mov (w1[2-5]), w0
+** ld1d { za7h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za64_7_0,
+ svld1_hor_za64 (7, w0, p0, x1),
+ svld1_hor_za64 (7, w0, p0, x1))
+
+/*
+** ld1_za64_7_1:
+** mov (w1[2-5]), w0
+** ld1d { za7h\.d\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za64_7_1,
+ svld1_hor_za64 (7, w0 + 1, p0, x1),
+ svld1_hor_za64 (7, w0 + 1, p0, x1))
+
+/*
+** ld1_za64_5_1_index:
+** mov (w1[2-5]), w0
+** ld1d { za5h\.d\[\1, 1\] }, p0/z, \[x1, x2, lsl #?3\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za64_5_1_index,
+ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8),
+ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8))
+
+/*
+** ld1_za64_0_2:
+** add (w1[2-5]), w0, #?2
+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za64_0_2,
+ svld1_hor_za64 (0, w0 + 2, p0, x1),
+ svld1_hor_za64 (0, w0 + 2, p0, x1))
+
+/*
+** ld1_za64_0_m1:
+** sub (w1[2-5]), w0, #?1
+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za64_0_m1,
+ svld1_hor_za64 (0, w0 - 1, p0, x1),
+ svld1_hor_za64 (0, w0 - 1, p0, x1))
new file mode 100644
@@ -0,0 +1,63 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ld1_za8_0_0:
+** mov (w1[2-5]), w0
+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za8_0_0,
+ svld1_hor_za8 (0, w0, p0, x1),
+ svld1_hor_za8 (0, w0, p0, x1))
+
+/*
+** ld1_za8_0_1:
+** mov (w1[2-5]), w0
+** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za8_0_1,
+ svld1_hor_za8 (0, w0 + 1, p0, x1),
+ svld1_hor_za8 (0, w0 + 1, p0, x1))
+
+/*
+** ld1_za8_0_15:
+** mov (w1[2-5]), w0
+** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za8_0_15,
+ svld1_hor_za8 (0, w0 + 15, p0, x1),
+ svld1_hor_za8 (0, w0 + 15, p0, x1))
+
+/*
+** ld1_za8_0_13_index:
+** mov (w1[2-5]), w0
+** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1, x2\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za8_0_13_index,
+ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2),
+ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2))
+
+/*
+** ld1_za8_0_16:
+** add (w1[2-5]), w0, #?16
+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za8_0_16,
+ svld1_hor_za8 (0, w0 + 16, p0, x1),
+ svld1_hor_za8 (0, w0 + 16, p0, x1))
+
+/*
+** ld1_za8_0_m1:
+** sub (w1[2-5]), w0, #?1
+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_LOAD_ZA (ld1_za8_0_m1,
+ svld1_hor_za8 (0, w0 - 1, p0, x1),
+ svld1_hor_za8 (0, w0 - 1, p0, x1))
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
@@ -0,0 +1,121 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ldr_vnum_za_0:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_0,
+ svldr_vnum_za (w0, x1, 0),
+ svldr_vnum_za (w0, x1, 0))
+
+/*
+** ldr_vnum_za_1:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_1,
+ svldr_vnum_za (w0 + 1, x1, 1),
+ svldr_vnum_za (w0 + 1, x1, 1))
+
+/*
+** ldr_vnum_za_13:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 13\], \[x1, #13, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_13,
+ svldr_vnum_za (w0 + 13, x1, 13),
+ svldr_vnum_za (w0 + 13, x1, 13))
+
+/*
+** ldr_vnum_za_15:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 15\], \[x1, #15, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_15,
+ svldr_vnum_za (w0 + 15, x1, 15),
+ svldr_vnum_za (w0 + 15, x1, 15))
+
+/*
+** ldr_vnum_za_16:
+** (
+** add (w1[2-5]), w0, #?16
+** incb x1, all, mul #16
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1, all, mul #16
+** add (w1[2-5]), w0, #?16
+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_16,
+ svldr_vnum_za (w0 + 16, x1, 16),
+ svldr_vnum_za (w0 + 16, x1, 16))
+
+/*
+** ldr_vnum_za_m1:
+** (
+** sub (w1[2-5]), w0, #?1
+** decb x1
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** decb x1
+** sub (w1[2-5]), w0, #?1
+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_m1,
+ svldr_vnum_za (w0 - 1, x1, -1),
+ svldr_vnum_za (w0 - 1, x1, -1))
+
+/*
+** ldr_vnum_za_mixed_1:
+** add (w1[2-5]), w0, #?1
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_1,
+ svldr_vnum_za (w0 + 1, x1, 0),
+ svldr_vnum_za (w0 + 1, x1, 0))
+
+/*
+** ldr_vnum_za_mixed_2:
+** (
+** mov (w1[2-5]), w0
+** incb x1
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1
+** mov (w1[2-5]), w0
+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_2,
+ svldr_vnum_za (w0, x1, 1),
+ svldr_vnum_za (w0, x1, 1))
+
+/*
+** ldr_vnum_za_mixed_3:
+** (
+** add (w1[2-5]), w0, #?2
+** incb x1
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1
+** add (w1[2-5]), w0, #?2
+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_3,
+ svldr_vnum_za (w0 + 2, x1, 1),
+ svldr_vnum_za (w0 + 2, x1, 1))
new file mode 100644
@@ -0,0 +1,166 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#include "test_sme_acle.h"
+
+/*
+** ldr_vnum_za_0:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_0,
+ svldr_vnum_za (w0, x1, 0),
+ svldr_vnum_za (w0, x1, 0))
+
+/*
+** ldr_vnum_za_1:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_1,
+ svldr_vnum_za (w0 + 1, x1, 1),
+ svldr_vnum_za (w0 + 1, x1, 1))
+
+/*
+** ldr_vnum_za_13:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 13\], \[x1, #13, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_13,
+ svldr_vnum_za (w0 + 13, x1, 13),
+ svldr_vnum_za (w0 + 13, x1, 13))
+
+/*
+** ldr_vnum_za_15:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 15\], \[x1, #15, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_15,
+ svldr_vnum_za (w0 + 15, x1, 15),
+ svldr_vnum_za (w0 + 15, x1, 15))
+
+/*
+** ldr_vnum_za_16:
+** (
+** add (w1[2-5]), w0, #?16
+** addsvl (x[0-9]+), x1, #16
+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #16
+** add (w1[2-5]), w0, #?16
+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_16,
+ svldr_vnum_za (w0 + 16, x1, 16),
+ svldr_vnum_za (w0 + 16, x1, 16))
+
+/*
+** ldr_vnum_za_m1:
+** (
+** sub (w1[2-5]), w0, #?1
+** addsvl (x[0-9]+), x1, #-1
+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #-1
+** sub (w1[2-5]), w0, #?1
+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_m1,
+ svldr_vnum_za (w0 - 1, x1, -1),
+ svldr_vnum_za (w0 - 1, x1, -1))
+
+/*
+** ldr_vnum_za_mixed_1:
+** add (w1[2-5]), w0, #?1
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_1,
+ svldr_vnum_za (w0 + 1, x1, 0),
+ svldr_vnum_za (w0 + 1, x1, 0))
+
+/*
+** ldr_vnum_za_mixed_2:
+** (
+** mov (w1[2-5]), w0
+** addsvl (x[0-9]+), x1, #1
+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #1
+** mov (w1[2-5]), w0
+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_2,
+ svldr_vnum_za (w0, x1, 1),
+ svldr_vnum_za (w0, x1, 1))
+
+/*
+** ldr_vnum_za_mixed_3:
+** (
+** add (w1[2-5]), w0, #?2
+** addsvl (x[0-9]+), x1, #1
+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #1
+** add (w1[2-5]), w0, #?2
+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_3,
+ svldr_vnum_za (w0 + 2, x1, 1),
+ svldr_vnum_za (w0 + 2, x1, 1))
+
+/*
+** ldr_vnum_za_mixed_4:
+** ...
+** addsvl x[0-9]+, x1, #-32
+** ...
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_4,
+ svldr_vnum_za (w0 + 3, x1, -32),
+ svldr_vnum_za (w0 + 3, x1, -32))
+
+/*
+** ldr_vnum_za_mixed_5:
+** ...
+** rdsvl x[0-9]+, #1
+** ...
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_5,
+ svldr_vnum_za (w0 + 3, x1, -33),
+ svldr_vnum_za (w0 + 3, x1, -33))
+
+/*
+** ldr_vnum_za_mixed_6:
+** ...
+** addsvl x[0-9]+, x1, #31
+** ...
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_6,
+ svldr_vnum_za (w0 + 4, x1, 31),
+ svldr_vnum_za (w0 + 4, x1, 31))
+
+/*
+** ldr_vnum_za_mixed_7:
+** ...
+** rdsvl x[0-9]+, #1
+** ...
+** ret
+*/
+TEST_LOAD_ZA (ldr_vnum_za_mixed_7,
+ svldr_vnum_za (w0 + 3, x1, 32),
+ svldr_vnum_za (w0 + 3, x1, 32))
new file mode 100644
@@ -0,0 +1,104 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** ldr_za_0:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_0,
+ svldr_za (w0, x1),
+ svldr_za (w0, x1))
+
+/*
+** ldr_za_1_vnum:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_1_vnum,
+ svldr_za (w0 + 1, x1 + svcntsb ()),
+ svldr_za (w0 + 1, x1 + svcntsb ()))
+
+/*
+** ldr_za_13_vnum:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 13\], \[x1, #13, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_13_vnum,
+ svldr_za (w0 + 13, x1 + svcntsb () * 13),
+ svldr_za (w0 + 13, x1 + svcntsb () * 13))
+
+/*
+** ldr_za_15_vnum:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 15\], \[x1, #15, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_15_vnum,
+ svldr_za (w0 + 15, x1 + svcntsb () * 15),
+ svldr_za (w0 + 15, x1 + svcntsb () * 15))
+
+/*
+** ldr_za_16_vnum:
+** (
+** add (w1[2-5]), w0, #?16
+** incb x1, all, mul #16
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1, all, mul #16
+** add (w1[2-5]), w0, #?16
+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_16_vnum,
+ svldr_za (w0 + 16, x1 + svcntsb () * 16),
+ svldr_za (w0 + 16, x1 + svcntsb () * 16))
+
+/*
+** ldr_za_m1_vnum:
+** (
+** sub (w1[2-5]), w0, #?1
+** decb x1
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** decb x1
+** sub (w1[2-5]), w0, #?1
+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_m1_vnum,
+ svldr_za (w0 - 1, x1 - svcntsb ()),
+ svldr_za (w0 - 1, x1 - svcntsb ()))
+
+/*
+** ldr_za_2:
+** add (w1[2-5]), w0, #?2
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_2,
+ svldr_za (w0 + 2, x1),
+ svldr_za (w0 + 2, x1))
+
+/*
+** ldr_za_offset:
+** (
+** mov (w1[2-5]), w0
+** add (x[0-9]+), x1, #?1
+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** add (x[0-9]+), x1, #?1
+** mov (w1[2-5]), w0
+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_offset,
+ svldr_za (w0, x1 + 1),
+ svldr_za (w0, x1 + 1))
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#include "test_sme_acle.h"
+
+/*
+** ldr_za_0:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_0,
+ svldr_za (w0, x1),
+ svldr_za (w0, x1))
+
+/*
+** ldr_za_1_vnum:
+** mov (w1[2-5]), w0
+** ldr za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_1_vnum,
+ svldr_za (w0 + 1, x1 + svcntsb ()),
+ svldr_za (w0 + 1, x1 + svcntsb ()))
+
+/*
+** ldr_za_2:
+** add (w1[2-5]), w0, #?2
+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_2,
+ svldr_za (w0 + 2, x1),
+ svldr_za (w0 + 2, x1))
+
+/*
+** ldr_za_offset:
+** (
+** mov (w1[2-5]), w0
+** add (x[0-9]+), x1, #?1
+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** add (x[0-9]+), x1, #?1
+** mov (w1[2-5]), w0
+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_LOAD_ZA (ldr_za_offset,
+ svldr_za (w0, x1 + 1),
+ svldr_za (w0, x1 + 1))
new file mode 100644
@@ -0,0 +1,102 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** mopa_za32_s8_0_p0_p1_z0_z1:
+** smopa za0\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_s8_0_p0_p1_z0_z1, svint8_t,
+ svmopa_za32_s8_m (0, p0, p1, z0, z1),
+ svmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za32_s8_0_p1_p0_z1_z0:
+** smopa za0\.s, p1/m, p0/m, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_s8_0_p1_p0_z1_z0, svint8_t,
+ svmopa_za32_s8_m (0, p1, p0, z1, z0),
+ svmopa_za32_m (0, p1, p0, z1, z0))
+
+/*
+** mopa_za32_s8_3_p0_p1_z0_z1:
+** smopa za3\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_s8_3_p0_p1_z0_z1, svint8_t,
+ svmopa_za32_s8_m (3, p0, p1, z0, z1),
+ svmopa_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mopa_za32_u8_0_p0_p1_z0_z1:
+** umopa za0\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_u8_0_p0_p1_z0_z1, svuint8_t,
+ svmopa_za32_u8_m (0, p0, p1, z0, z1),
+ svmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za32_u8_3_p0_p1_z0_z1:
+** umopa za3\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_u8_3_p0_p1_z0_z1, svuint8_t,
+ svmopa_za32_u8_m (3, p0, p1, z0, z1),
+ svmopa_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mopa_za32_bf16_0_p0_p1_z0_z1:
+** bfmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t,
+ svmopa_za32_bf16_m (0, p0, p1, z0, z1),
+ svmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za32_bf16_3_p0_p1_z0_z1:
+** bfmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t,
+ svmopa_za32_bf16_m (3, p0, p1, z0, z1),
+ svmopa_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mopa_za32_f16_0_p0_p1_z0_z1:
+** fmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_f16_0_p0_p1_z0_z1, svfloat16_t,
+ svmopa_za32_f16_m (0, p0, p1, z0, z1),
+ svmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za32_f16_3_p0_p1_z0_z1:
+** fmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_f16_3_p0_p1_z0_z1, svfloat16_t,
+ svmopa_za32_f16_m (3, p0, p1, z0, z1),
+ svmopa_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mopa_za32_f32_0_p0_p1_z0_z1:
+** fmopa za0\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_f32_0_p0_p1_z0_z1, svfloat32_t,
+ svmopa_za32_f32_m (0, p0, p1, z0, z1),
+ svmopa_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za32_f32_3_p0_p1_z0_z1:
+** fmopa za3\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za32_f32_3_p0_p1_z0_z1, svfloat32_t,
+ svmopa_za32_f32_m (3, p0, p1, z0, z1),
+ svmopa_za32_m (3, p0, p1, z0, z1))
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** mopa_za64_s16_0_p0_p1_z0_z1:
+** smopa za0\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za64_s16_0_p0_p1_z0_z1, svint16_t,
+ svmopa_za64_s16_m (0, p0, p1, z0, z1),
+ svmopa_za64_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za64_s16_0_p1_p0_z1_z0:
+** smopa za0\.d, p1/m, p0/m, z1\.h, z0\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za64_s16_0_p1_p0_z1_z0, svint16_t,
+ svmopa_za64_s16_m (0, p1, p0, z1, z0),
+ svmopa_za64_m (0, p1, p0, z1, z0))
+
+/*
+** mopa_za64_s16_7_p0_p1_z0_z1:
+** smopa za7\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za64_s16_7_p0_p1_z0_z1, svint16_t,
+ svmopa_za64_s16_m (7, p0, p1, z0, z1),
+ svmopa_za64_m (7, p0, p1, z0, z1))
+
+/*
+** mopa_za64_u16_0_p0_p1_z0_z1:
+** umopa za0\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za64_u16_0_p0_p1_z0_z1, svuint16_t,
+ svmopa_za64_u16_m (0, p0, p1, z0, z1),
+ svmopa_za64_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za64_u16_7_p0_p1_z0_z1:
+** umopa za7\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za64_u16_7_p0_p1_z0_z1, svuint16_t,
+ svmopa_za64_u16_m (7, p0, p1, z0, z1),
+ svmopa_za64_m (7, p0, p1, z0, z1))
+
+#pragma GCC target "+nosme-i16i64+sme-f64f64"
+
+/*
+** mopa_za64_f64_0_p0_p1_z0_z1:
+** fmopa za0\.d, p0/m, p1/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za64_f64_0_p0_p1_z0_z1, svfloat64_t,
+ svmopa_za64_f64_m (0, p0, p1, z0, z1),
+ svmopa_za64_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za64_f64_7_p0_p1_z0_z1:
+** fmopa za7\.d, p0/m, p1/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_ZA (mopa_za64_f64_7_p0_p1_z0_z1, svfloat64_t,
+ svmopa_za64_f64_m (7, p0, p1, z0, z1),
+ svmopa_za64_m (7, p0, p1, z0, z1))
new file mode 100644
@@ -0,0 +1,102 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** mops_za32_s8_0_p0_p1_z0_z1:
+** smops za0\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_s8_0_p0_p1_z0_z1, svint8_t,
+ svmops_za32_s8_m (0, p0, p1, z0, z1),
+ svmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za32_s8_0_p1_p0_z1_z0:
+** smops za0\.s, p1/m, p0/m, z1\.b, z0\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_s8_0_p1_p0_z1_z0, svint8_t,
+ svmops_za32_s8_m (0, p1, p0, z1, z0),
+ svmops_za32_m (0, p1, p0, z1, z0))
+
+/*
+** mops_za32_s8_3_p0_p1_z0_z1:
+** smops za3\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_s8_3_p0_p1_z0_z1, svint8_t,
+ svmops_za32_s8_m (3, p0, p1, z0, z1),
+ svmops_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mops_za32_u8_0_p0_p1_z0_z1:
+** umops za0\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_u8_0_p0_p1_z0_z1, svuint8_t,
+ svmops_za32_u8_m (0, p0, p1, z0, z1),
+ svmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za32_u8_3_p0_p1_z0_z1:
+** umops za3\.s, p0/m, p1/m, z0\.b, z1\.b
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_u8_3_p0_p1_z0_z1, svuint8_t,
+ svmops_za32_u8_m (3, p0, p1, z0, z1),
+ svmops_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mops_za32_bf16_0_p0_p1_z0_z1:
+** bfmops za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t,
+ svmops_za32_bf16_m (0, p0, p1, z0, z1),
+ svmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za32_bf16_3_p0_p1_z0_z1:
+** bfmops za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t,
+ svmops_za32_bf16_m (3, p0, p1, z0, z1),
+ svmops_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mops_za32_f16_0_p0_p1_z0_z1:
+** fmops za0\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_f16_0_p0_p1_z0_z1, svfloat16_t,
+ svmops_za32_f16_m (0, p0, p1, z0, z1),
+ svmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za32_f16_3_p0_p1_z0_z1:
+** fmops za3\.s, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_f16_3_p0_p1_z0_z1, svfloat16_t,
+ svmops_za32_f16_m (3, p0, p1, z0, z1),
+ svmops_za32_m (3, p0, p1, z0, z1))
+
+/*
+** mops_za32_f32_0_p0_p1_z0_z1:
+** fmops za0\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_f32_0_p0_p1_z0_z1, svfloat32_t,
+ svmops_za32_f32_m (0, p0, p1, z0, z1),
+ svmops_za32_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za32_f32_3_p0_p1_z0_z1:
+** fmops za3\.s, p0/m, p1/m, z0\.s, z1\.s
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za32_f32_3_p0_p1_z0_z1, svfloat32_t,
+ svmops_za32_f32_m (3, p0, p1, z0, z1),
+ svmops_za32_m (3, p0, p1, z0, z1))
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** mops_za64_s16_0_p0_p1_z0_z1:
+** smops za0\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za64_s16_0_p0_p1_z0_z1, svint16_t,
+ svmops_za64_s16_m (0, p0, p1, z0, z1),
+ svmops_za64_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za64_s16_0_p1_p0_z1_z0:
+** smops za0\.d, p1/m, p0/m, z1\.h, z0\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za64_s16_0_p1_p0_z1_z0, svint16_t,
+ svmops_za64_s16_m (0, p1, p0, z1, z0),
+ svmops_za64_m (0, p1, p0, z1, z0))
+
+/*
+** mops_za64_s16_7_p0_p1_z0_z1:
+** smops za7\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za64_s16_7_p0_p1_z0_z1, svint16_t,
+ svmops_za64_s16_m (7, p0, p1, z0, z1),
+ svmops_za64_m (7, p0, p1, z0, z1))
+
+/*
+** mops_za64_u16_0_p0_p1_z0_z1:
+** umops za0\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za64_u16_0_p0_p1_z0_z1, svuint16_t,
+ svmops_za64_u16_m (0, p0, p1, z0, z1),
+ svmops_za64_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za64_u16_7_p0_p1_z0_z1:
+** umops za7\.d, p0/m, p1/m, z0\.h, z1\.h
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za64_u16_7_p0_p1_z0_z1, svuint16_t,
+ svmops_za64_u16_m (7, p0, p1, z0, z1),
+ svmops_za64_m (7, p0, p1, z0, z1))
+
+#pragma GCC target "+nosme-i16i64+sme-f64f64"
+
+/*
+** mops_za64_f64_0_p0_p1_z0_z1:
+** fmops za0\.d, p0/m, p1/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za64_f64_0_p0_p1_z0_z1, svfloat64_t,
+ svmops_za64_f64_m (0, p0, p1, z0, z1),
+ svmops_za64_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za64_f64_7_p0_p1_z0_z1:
+** fmops za7\.d, p0/m, p1/m, z0\.d, z1\.d
+** ret
+*/
+TEST_UNIFORM_ZA (mops_za64_f64_7_p0_p1_z0_z1, svfloat64_t,
+ svmops_za64_f64_m (7, p0, p1, z0, z1),
+ svmops_za64_m (7, p0, p1, z0, z1))
new file mode 100644
@@ -0,0 +1,367 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za128_s8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t,
+ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s8_0_1_tied:
+** add (w1[2-5]), w0, #?1
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t,
+ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 + 1),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za128_s8_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_m1_tied, svint8_t,
+ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 - 1),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za128_s8_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za1h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_1_0_tied, svint8_t,
+ z0 = svread_hor_za128_s8_m (z0, p0, 1, w0),
+ z0 = svread_hor_za128_m (z0, p0, 1, w0))
+
+/*
+** read_za128_s8_15_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za15h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_15_0_tied, svint8_t,
+ z0 = svread_hor_za128_s8_m (z0, p0, 15, w0),
+ z0 = svread_hor_za128_m (z0, p0, 15, w0))
+
+/*
+** read_za128_s8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_0_untied, svint8_t,
+ z0 = svread_hor_za128_s8_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u8_0_0_tied, svuint8_t,
+ z0 = svread_hor_za128_u8_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u8_0_0_untied, svuint8_t,
+ z0 = svread_hor_za128_u8_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_s16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s16_0_0_tied, svint16_t,
+ z0 = svread_hor_za128_s16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s16_0_0_untied, svint16_t,
+ z0 = svread_hor_za128_s16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u16_0_0_tied, svuint16_t,
+ z0 = svread_hor_za128_u16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u16_0_0_untied, svuint16_t,
+ z0 = svread_hor_za128_u16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_f16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_f16_0_0_tied, svfloat16_t,
+ z0 = svread_hor_za128_f16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_f16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_f16_0_0_untied, svfloat16_t,
+ z0 = svread_hor_za128_f16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_bf16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_bf16_0_0_tied, svbfloat16_t,
+ z0 = svread_hor_za128_bf16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_bf16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_bf16_0_0_untied, svbfloat16_t,
+ z0 = svread_hor_za128_bf16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_s32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s32_0_0_tied, svint32_t,
+ z0 = svread_hor_za128_s32_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s32_0_0_untied, svint32_t,
+ z0 = svread_hor_za128_s32_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u32_0_0_tied, svuint32_t,
+ z0 = svread_hor_za128_u32_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u32_0_0_untied, svuint32_t,
+ z0 = svread_hor_za128_u32_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_f32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_f32_0_0_tied, svfloat32_t,
+ z0 = svread_hor_za128_f32_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_f32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_f32_0_0_untied, svfloat32_t,
+ z0 = svread_hor_za128_f32_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_s64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s64_0_0_tied, svint64_t,
+ z0 = svread_hor_za128_s64_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s64_0_0_untied, svint64_t,
+ z0 = svread_hor_za128_s64_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u64_0_0_tied, svuint64_t,
+ z0 = svread_hor_za128_u64_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u64_0_0_untied, svuint64_t,
+ z0 = svread_hor_za128_u64_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_f64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_f64_0_0_tied, svfloat64_t,
+ z0 = svread_hor_za128_f64_m (z0, p0, 0, w0),
+ z0 = svread_hor_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_f64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0h\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0h\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_f64_0_0_untied, svfloat64_t,
+ z0 = svread_hor_za128_f64_m (z1, p0, 0, w0),
+ z0 = svread_hor_za128_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,171 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za16_s16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_s16_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 1),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za16_s16_0_7_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\1, 7\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_7_tied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 7),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 7))
+
+/*
+** read_za16_s16_0_8_tied:
+** add (w1[2-5]), w0, #?8
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_8_tied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 8),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 8))
+
+/*
+** read_za16_s16_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_m1_tied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 - 1),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za16_s16_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za1h\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_1_0_tied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0),
+ z0 = svread_hor_za16_m (z0, p0, 1, w0))
+
+/*
+** read_za16_s16_1_7_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za1h\.h\[\1, 7\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_1_7_tied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0 + 7),
+ z0 = svread_hor_za16_m (z0, p0, 1, w0 + 7))
+
+/*
+** read_za16_s16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_0_untied, svint16_t,
+ z0 = svread_hor_za16_s16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za16_m (z1, p0, 0, w0))
+
+/*
+** read_za16_u16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_u16_0_0_tied, svuint16_t,
+ z0 = svread_hor_za16_u16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_u16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_u16_0_0_untied, svuint16_t,
+ z0 = svread_hor_za16_u16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za16_m (z1, p0, 0, w0))
+
+/*
+** read_za16_f16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_f16_0_0_tied, svfloat16_t,
+ z0 = svread_hor_za16_f16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_f16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_f16_0_0_untied, svfloat16_t,
+ z0 = svread_hor_za16_f16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za16_m (z1, p0, 0, w0))
+
+/*
+** read_za16_bf16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_bf16_0_0_tied, svbfloat16_t,
+ z0 = svread_hor_za16_bf16_m (z0, p0, 0, w0),
+ z0 = svread_hor_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_bf16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0h\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0h\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_bf16_0_0_untied, svbfloat16_t,
+ z0 = svread_hor_za16_bf16_m (z1, p0, 0, w0),
+ z0 = svread_hor_za16_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,164 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za32_s32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0),
+ z0 = svread_hor_za32_m (z0, p0, 0, w0))
+
+/*
+** read_za32_s32_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 1),
+ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za32_s32_0_3_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\1, 3\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_3_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 3),
+ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 3))
+
+/*
+** read_za32_s32_0_4_tied:
+** add (w1[2-5]), w0, #?4
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_4_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 4),
+ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 4))
+
+/*
+** read_za32_s32_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_m1_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 - 1),
+ z0 = svread_hor_za32_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za32_s32_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za1h\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_1_0_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0),
+ z0 = svread_hor_za32_m (z0, p0, 1, w0))
+
+/*
+** read_za32_s32_1_3_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za1h\.s\[\1, 3\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_1_3_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0 + 3),
+ z0 = svread_hor_za32_m (z0, p0, 1, w0 + 3))
+
+/*
+** read_za32_s32_3_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za3h\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_3_0_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0),
+ z0 = svread_hor_za32_m (z0, p0, 3, w0))
+
+/*
+** read_za32_s32_3_3_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za3h\.s\[\1, 3\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_3_3_tied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0 + 3),
+ z0 = svread_hor_za32_m (z0, p0, 3, w0 + 3))
+
+/*
+** read_za32_s32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_0_untied, svint32_t,
+ z0 = svread_hor_za32_s32_m (z1, p0, 0, w0),
+ z0 = svread_hor_za32_m (z1, p0, 0, w0))
+
+/*
+** read_za32_u32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_u32_0_0_tied, svuint32_t,
+ z0 = svread_hor_za32_u32_m (z0, p0, 0, w0),
+ z0 = svread_hor_za32_m (z0, p0, 0, w0))
+
+/*
+** read_za32_u32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za32_u32_0_0_untied, svuint32_t,
+ z0 = svread_hor_za32_u32_m (z1, p0, 0, w0),
+ z0 = svread_hor_za32_m (z1, p0, 0, w0))
+
+/*
+** read_za32_f32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_f32_0_0_tied, svfloat32_t,
+ z0 = svread_hor_za32_f32_m (z0, p0, 0, w0),
+ z0 = svread_hor_za32_m (z0, p0, 0, w0))
+
+/*
+** read_za32_f32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.s, p0/m, za0h\.s\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0h\.s\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za32_f32_0_0_untied, svfloat32_t,
+ z0 = svread_hor_za32_f32_m (z1, p0, 0, w0),
+ z0 = svread_hor_za32_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,154 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za64_s64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0),
+ z0 = svread_hor_za64_m (z0, p0, 0, w0))
+
+/*
+** read_za64_s64_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0h\.d\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 1),
+ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za64_s64_0_2_tied:
+** add (w1[2-5]), w0, #?2
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_2_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 2),
+ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 2))
+
+/*
+** read_za64_s64_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_m1_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 - 1),
+ z0 = svread_hor_za64_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za64_s64_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za1h\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_1_0_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0),
+ z0 = svread_hor_za64_m (z0, p0, 1, w0))
+
+/*
+** read_za64_s64_1_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za1h\.d\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_1_1_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0 + 1),
+ z0 = svread_hor_za64_m (z0, p0, 1, w0 + 1))
+
+/*
+** read_za64_s64_7_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za7h\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_7_0_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0),
+ z0 = svread_hor_za64_m (z0, p0, 7, w0))
+
+/*
+** read_za64_s64_7_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za7h\.d\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_7_1_tied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0 + 1),
+ z0 = svread_hor_za64_m (z0, p0, 7, w0 + 1))
+
+/*
+** read_za64_s64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0h\.d\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_0_untied, svint64_t,
+ z0 = svread_hor_za64_s64_m (z1, p0, 0, w0),
+ z0 = svread_hor_za64_m (z1, p0, 0, w0))
+
+/*
+** read_za64_u64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_u64_0_0_tied, svuint64_t,
+ z0 = svread_hor_za64_u64_m (z0, p0, 0, w0),
+ z0 = svread_hor_za64_m (z0, p0, 0, w0))
+
+/*
+** read_za64_u64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0h\.d\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za64_u64_0_0_untied, svuint64_t,
+ z0 = svread_hor_za64_u64_m (z1, p0, 0, w0),
+ z0 = svread_hor_za64_m (z1, p0, 0, w0))
+
+/*
+** read_za64_f64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_f64_0_0_tied, svfloat64_t,
+ z0 = svread_hor_za64_f64_m (z0, p0, 0, w0),
+ z0 = svread_hor_za64_m (z0, p0, 0, w0))
+
+/*
+** read_za64_f64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.d, p0/m, za0h\.d\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0h\.d\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za64_f64_0_0_untied, svfloat64_t,
+ z0 = svread_hor_za64_f64_m (z1, p0, 0, w0),
+ z0 = svread_hor_za64_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,97 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za8_s8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t,
+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0),
+ z0 = svread_hor_za8_m (z0, p0, 0, w0))
+
+/*
+** read_za8_s8_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t,
+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 1),
+ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za8_s8_0_15_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\1, 15\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_15_tied, svint8_t,
+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 15),
+ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 15))
+
+/*
+** read_za8_s8_0_16_tied:
+** add (w1[2-5]), w0, #?16
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_16_tied, svint8_t,
+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 16),
+ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 16))
+
+/*
+** read_za8_s8_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_m1_tied, svint8_t,
+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 - 1),
+ z0 = svread_hor_za8_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za8_s8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_0_untied, svint8_t,
+ z0 = svread_hor_za8_s8_m (z1, p0, 0, w0),
+ z0 = svread_hor_za8_m (z1, p0, 0, w0))
+
+/*
+** read_za8_u8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_u8_0_0_tied, svuint8_t,
+ z0 = svread_hor_za8_u8_m (z0, p0, 0, w0),
+ z0 = svread_hor_za8_m (z0, p0, 0, w0))
+
+/*
+** read_za8_u8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.b, p0/m, za0h\.b\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0h\.b\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za8_u8_0_0_untied, svuint8_t,
+ z0 = svread_hor_za8_u8_m (z1, p0, 0, w0),
+ z0 = svread_hor_za8_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,367 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za128_s8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t,
+ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s8_0_1_tied:
+** add (w1[2-5]), w0, #?1
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t,
+ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 + 1),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za128_s8_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_m1_tied, svint8_t,
+ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 - 1),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za128_s8_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za1v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_1_0_tied, svint8_t,
+ z0 = svread_ver_za128_s8_m (z0, p0, 1, w0),
+ z0 = svread_ver_za128_m (z0, p0, 1, w0))
+
+/*
+** read_za128_s8_15_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za15v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_15_0_tied, svint8_t,
+ z0 = svread_ver_za128_s8_m (z0, p0, 15, w0),
+ z0 = svread_ver_za128_m (z0, p0, 15, w0))
+
+/*
+** read_za128_s8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s8_0_0_untied, svint8_t,
+ z0 = svread_ver_za128_s8_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u8_0_0_tied, svuint8_t,
+ z0 = svread_ver_za128_u8_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u8_0_0_untied, svuint8_t,
+ z0 = svread_ver_za128_u8_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_s16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s16_0_0_tied, svint16_t,
+ z0 = svread_ver_za128_s16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s16_0_0_untied, svint16_t,
+ z0 = svread_ver_za128_s16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u16_0_0_tied, svuint16_t,
+ z0 = svread_ver_za128_u16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u16_0_0_untied, svuint16_t,
+ z0 = svread_ver_za128_u16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_f16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_f16_0_0_tied, svfloat16_t,
+ z0 = svread_ver_za128_f16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_f16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_f16_0_0_untied, svfloat16_t,
+ z0 = svread_ver_za128_f16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_bf16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_bf16_0_0_tied, svbfloat16_t,
+ z0 = svread_ver_za128_bf16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_bf16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_bf16_0_0_untied, svbfloat16_t,
+ z0 = svread_ver_za128_bf16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_s32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s32_0_0_tied, svint32_t,
+ z0 = svread_ver_za128_s32_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s32_0_0_untied, svint32_t,
+ z0 = svread_ver_za128_s32_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u32_0_0_tied, svuint32_t,
+ z0 = svread_ver_za128_u32_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u32_0_0_untied, svuint32_t,
+ z0 = svread_ver_za128_u32_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_f32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_f32_0_0_tied, svfloat32_t,
+ z0 = svread_ver_za128_f32_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_f32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_f32_0_0_untied, svfloat32_t,
+ z0 = svread_ver_za128_f32_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_s64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_s64_0_0_tied, svint64_t,
+ z0 = svread_ver_za128_s64_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_s64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_s64_0_0_untied, svint64_t,
+ z0 = svread_ver_za128_s64_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_u64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_u64_0_0_tied, svuint64_t,
+ z0 = svread_ver_za128_u64_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_u64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_u64_0_0_untied, svuint64_t,
+ z0 = svread_ver_za128_u64_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
+
+/*
+** read_za128_f64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za128_f64_0_0_tied, svfloat64_t,
+ z0 = svread_ver_za128_f64_m (z0, p0, 0, w0),
+ z0 = svread_ver_za128_m (z0, p0, 0, w0))
+
+/*
+** read_za128_f64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.q, p0/m, za0v\.q\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.q, p0/m, za0v\.q\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za128_f64_0_0_untied, svfloat64_t,
+ z0 = svread_ver_za128_f64_m (z1, p0, 0, w0),
+ z0 = svread_ver_za128_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,171 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za16_s16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_s16_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 1),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za16_s16_0_7_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\1, 7\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_7_tied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 7),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 7))
+
+/*
+** read_za16_s16_0_8_tied:
+** add (w1[2-5]), w0, #?8
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_8_tied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 8),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 8))
+
+/*
+** read_za16_s16_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_m1_tied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 - 1),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za16_s16_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za1v\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_1_0_tied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0),
+ z0 = svread_ver_za16_m (z0, p0, 1, w0))
+
+/*
+** read_za16_s16_1_7_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za1v\.h\[\1, 7\]
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_1_7_tied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0 + 7),
+ z0 = svread_ver_za16_m (z0, p0, 1, w0 + 7))
+
+/*
+** read_za16_s16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_s16_0_0_untied, svint16_t,
+ z0 = svread_ver_za16_s16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za16_m (z1, p0, 0, w0))
+
+/*
+** read_za16_u16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_u16_0_0_tied, svuint16_t,
+ z0 = svread_ver_za16_u16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_u16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_u16_0_0_untied, svuint16_t,
+ z0 = svread_ver_za16_u16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za16_m (z1, p0, 0, w0))
+
+/*
+** read_za16_f16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_f16_0_0_tied, svfloat16_t,
+ z0 = svread_ver_za16_f16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_f16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_f16_0_0_untied, svfloat16_t,
+ z0 = svread_ver_za16_f16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za16_m (z1, p0, 0, w0))
+
+/*
+** read_za16_bf16_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za16_bf16_0_0_tied, svbfloat16_t,
+ z0 = svread_ver_za16_bf16_m (z0, p0, 0, w0),
+ z0 = svread_ver_za16_m (z0, p0, 0, w0))
+
+/*
+** read_za16_bf16_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.h, p0/m, za0v\.h\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.h, p0/m, za0v\.h\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za16_bf16_0_0_untied, svbfloat16_t,
+ z0 = svread_ver_za16_bf16_m (z1, p0, 0, w0),
+ z0 = svread_ver_za16_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,164 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za32_s32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0),
+ z0 = svread_ver_za32_m (z0, p0, 0, w0))
+
+/*
+** read_za32_s32_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 1),
+ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za32_s32_0_3_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\1, 3\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_3_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 3),
+ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 3))
+
+/*
+** read_za32_s32_0_4_tied:
+** add (w1[2-5]), w0, #?4
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_4_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 4),
+ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 4))
+
+/*
+** read_za32_s32_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_m1_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 - 1),
+ z0 = svread_ver_za32_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za32_s32_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za1v\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_1_0_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0),
+ z0 = svread_ver_za32_m (z0, p0, 1, w0))
+
+/*
+** read_za32_s32_1_3_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za1v\.s\[\1, 3\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_1_3_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0 + 3),
+ z0 = svread_ver_za32_m (z0, p0, 1, w0 + 3))
+
+/*
+** read_za32_s32_3_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za3v\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_3_0_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0),
+ z0 = svread_ver_za32_m (z0, p0, 3, w0))
+
+/*
+** read_za32_s32_3_3_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za3v\.s\[\1, 3\]
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_3_3_tied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0 + 3),
+ z0 = svread_ver_za32_m (z0, p0, 3, w0 + 3))
+
+/*
+** read_za32_s32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za32_s32_0_0_untied, svint32_t,
+ z0 = svread_ver_za32_s32_m (z1, p0, 0, w0),
+ z0 = svread_ver_za32_m (z1, p0, 0, w0))
+
+/*
+** read_za32_u32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_u32_0_0_tied, svuint32_t,
+ z0 = svread_ver_za32_u32_m (z0, p0, 0, w0),
+ z0 = svread_ver_za32_m (z0, p0, 0, w0))
+
+/*
+** read_za32_u32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za32_u32_0_0_untied, svuint32_t,
+ z0 = svread_ver_za32_u32_m (z1, p0, 0, w0),
+ z0 = svread_ver_za32_m (z1, p0, 0, w0))
+
+/*
+** read_za32_f32_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za32_f32_0_0_tied, svfloat32_t,
+ z0 = svread_ver_za32_f32_m (z0, p0, 0, w0),
+ z0 = svread_ver_za32_m (z0, p0, 0, w0))
+
+/*
+** read_za32_f32_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.s, p0/m, za0v\.s\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.s, p0/m, za0v\.s\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za32_f32_0_0_untied, svfloat32_t,
+ z0 = svread_ver_za32_f32_m (z1, p0, 0, w0),
+ z0 = svread_ver_za32_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,154 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za64_s64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0),
+ z0 = svread_ver_za64_m (z0, p0, 0, w0))
+
+/*
+** read_za64_s64_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0v\.d\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 1),
+ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za64_s64_0_2_tied:
+** add (w1[2-5]), w0, #?2
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_2_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 2),
+ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 2))
+
+/*
+** read_za64_s64_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_m1_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 - 1),
+ z0 = svread_ver_za64_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za64_s64_1_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za1v\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_1_0_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0),
+ z0 = svread_ver_za64_m (z0, p0, 1, w0))
+
+/*
+** read_za64_s64_1_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za1v\.d\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_1_1_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0 + 1),
+ z0 = svread_ver_za64_m (z0, p0, 1, w0 + 1))
+
+/*
+** read_za64_s64_7_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za7v\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_7_0_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0),
+ z0 = svread_ver_za64_m (z0, p0, 7, w0))
+
+/*
+** read_za64_s64_7_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za7v\.d\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_7_1_tied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0 + 1),
+ z0 = svread_ver_za64_m (z0, p0, 7, w0 + 1))
+
+/*
+** read_za64_s64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0v\.d\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za64_s64_0_0_untied, svint64_t,
+ z0 = svread_ver_za64_s64_m (z1, p0, 0, w0),
+ z0 = svread_ver_za64_m (z1, p0, 0, w0))
+
+/*
+** read_za64_u64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_u64_0_0_tied, svuint64_t,
+ z0 = svread_ver_za64_u64_m (z0, p0, 0, w0),
+ z0 = svread_ver_za64_m (z0, p0, 0, w0))
+
+/*
+** read_za64_u64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0v\.d\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za64_u64_0_0_untied, svuint64_t,
+ z0 = svread_ver_za64_u64_m (z1, p0, 0, w0),
+ z0 = svread_ver_za64_m (z1, p0, 0, w0))
+
+/*
+** read_za64_f64_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za64_f64_0_0_tied, svfloat64_t,
+ z0 = svread_ver_za64_f64_m (z0, p0, 0, w0),
+ z0 = svread_ver_za64_m (z0, p0, 0, w0))
+
+/*
+** read_za64_f64_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.d, p0/m, za0v\.d\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.d, p0/m, za0v\.d\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za64_f64_0_0_untied, svfloat64_t,
+ z0 = svread_ver_za64_f64_m (z1, p0, 0, w0),
+ z0 = svread_ver_za64_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,97 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** read_za8_s8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t,
+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0),
+ z0 = svread_ver_za8_m (z0, p0, 0, w0))
+
+/*
+** read_za8_s8_0_1_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\1, 1\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t,
+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 1),
+ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 1))
+
+/*
+** read_za8_s8_0_15_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\1, 15\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_15_tied, svint8_t,
+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 15),
+ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 15))
+
+/*
+** read_za8_s8_0_16_tied:
+** add (w1[2-5]), w0, #?16
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_16_tied, svint8_t,
+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 16),
+ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 16))
+
+/*
+** read_za8_s8_0_m1_tied:
+** sub (w1[2-5]), w0, #?1
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_m1_tied, svint8_t,
+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 - 1),
+ z0 = svread_ver_za8_m (z0, p0, 0, w0 - 1))
+
+/*
+** read_za8_s8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za8_s8_0_0_untied, svint8_t,
+ z0 = svread_ver_za8_s8_m (z1, p0, 0, w0),
+ z0 = svread_ver_za8_m (z1, p0, 0, w0))
+
+/*
+** read_za8_u8_0_0_tied:
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** ret
+*/
+TEST_READ_ZA (read_za8_u8_0_0_tied, svuint8_t,
+ z0 = svread_ver_za8_u8_m (z0, p0, 0, w0),
+ z0 = svread_ver_za8_m (z0, p0, 0, w0))
+
+/*
+** read_za8_u8_0_0_untied:
+** (
+** mov (w1[2-5]), w0
+** mov z0\.d, z1\.d
+** mova z0\.b, p0/m, za0v\.b\[\1, 0\]
+** |
+** mov z0\.d, z1\.d
+** mov (w1[2-5]), w0
+** mova z0\.b, p0/m, za0v\.b\[\2, 0\]
+** )
+** ret
+*/
+TEST_READ_ZA (read_za8_u8_0_0_untied, svuint8_t,
+ z0 = svread_ver_za8_u8_m (z1, p0, 0, w0),
+ z0 = svread_ver_za8_m (z1, p0, 0, w0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_vnum_za128_0_0:
+** mov (w1[2-5]), w0
+** st1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za128_0_0,
+ svst1_hor_vnum_za128 (0, w0, p0, x1, 0),
+ svst1_hor_vnum_za128 (0, w0, p0, x1, 0))
+
+/*
+** st1_vnum_za128_5_0:
+** incb x1, all, mul #13
+** mov (w1[2-5]), w0
+** st1q { za5h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za128_5_0,
+ svst1_hor_vnum_za128 (5, w0, p0, x1, 13),
+ svst1_hor_vnum_za128 (5, w0, p0, x1, 13))
+
+/*
+** st1_vnum_za128_11_0:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** st1q { za11h\.q\[\3, 0\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za128_11_0,
+ svst1_hor_vnum_za128 (11, w0, p0, x1, x2),
+ svst1_hor_vnum_za128 (11, w0, p0, x1, x2))
+
+/*
+** st1_vnum_za128_0_1:
+** add (w1[2-5]), w0, #?1
+** st1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za128_0_1,
+ svst1_hor_vnum_za128 (0, w0 + 1, p0, x1, 0),
+ svst1_hor_vnum_za128 (0, w0 + 1, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_vnum_za16_0_0:
+** mov (w1[2-5]), w0
+** st1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za16_0_0,
+ svst1_hor_vnum_za16 (0, w0, p0, x1, 0),
+ svst1_hor_vnum_za16 (0, w0, p0, x1, 0))
+
+/*
+** st1_vnum_za16_0_1:
+** incb x1, all, mul #9
+** mov (w1[2-5]), w0
+** st1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za16_0_1,
+ svst1_hor_vnum_za16 (0, w0 + 1, p0, x1, 9),
+ svst1_hor_vnum_za16 (0, w0 + 1, p0, x1, 9))
+
+/*
+** st1_vnum_za16_1_7:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** st1h { za1h\.h\[\3, 7\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za16_1_7,
+ svst1_hor_vnum_za16 (1, w0 + 7, p0, x1, x2),
+ svst1_hor_vnum_za16 (1, w0 + 7, p0, x1, x2))
+
+/*
+** st1_vnum_za16_0_8:
+** add (w1[2-5]), w0, #?8
+** st1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za16_0_8,
+ svst1_hor_vnum_za16 (0, w0 + 8, p0, x1, 0),
+ svst1_hor_vnum_za16 (0, w0 + 8, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_vnum_za32_0_0:
+** mov (w1[2-5]), w0
+** st1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za32_0_0,
+ svst1_hor_vnum_za32 (0, w0, p0, x1, 0),
+ svst1_hor_vnum_za32 (0, w0, p0, x1, 0))
+
+/*
+** st1_vnum_za32_0_1:
+** incb x1, all, mul #5
+** mov (w1[2-5]), w0
+** st1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za32_0_1,
+ svst1_hor_vnum_za32 (0, w0 + 1, p0, x1, 5),
+ svst1_hor_vnum_za32 (0, w0 + 1, p0, x1, 5))
+
+/*
+** st1_vnum_za32_2_3:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** st1w { za2h\.s\[\3, 3\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za32_2_3,
+ svst1_hor_vnum_za32 (2, w0 + 3, p0, x1, x2),
+ svst1_hor_vnum_za32 (2, w0 + 3, p0, x1, x2))
+
+/*
+** st1_vnum_za32_0_4:
+** add (w1[2-5]), w0, #?4
+** st1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za32_0_4,
+ svst1_hor_vnum_za32 (0, w0 + 4, p0, x1, 0),
+ svst1_hor_vnum_za32 (0, w0 + 4, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_vnum_za64_0_0:
+** mov (w1[2-5]), w0
+** st1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za64_0_0,
+ svst1_hor_vnum_za64 (0, w0, p0, x1, 0),
+ svst1_hor_vnum_za64 (0, w0, p0, x1, 0))
+
+/*
+** st1_vnum_za64_0_1:
+** incb x1, all, mul #13
+** mov (w1[2-5]), w0
+** st1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za64_0_1,
+ svst1_hor_vnum_za64 (0, w0 + 1, p0, x1, 13),
+ svst1_hor_vnum_za64 (0, w0 + 1, p0, x1, 13))
+
+/*
+** st1_vnum_za64_5_1:
+** cntb (x[0-9]+)
+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1
+** mov (w1[2-5]), w0
+** st1d { za5h\.d\[\3, 1\] }, p0/z, \[\2\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za64_5_1,
+ svst1_hor_vnum_za64 (5, w0 + 1, p0, x1, x2),
+ svst1_hor_vnum_za64 (5, w0 + 1, p0, x1, x2))
+
+/*
+** st1_vnum_za64_0_2:
+** add (w1[2-5]), w0, #?2
+** st1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za64_0_2,
+ svst1_hor_vnum_za64 (0, w0 + 2, p0, x1, 0),
+ svst1_hor_vnum_za64 (0, w0 + 2, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_vnum_za8_0_0:
+** mov (w1[2-5]), w0
+** st1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za8_0_0,
+ svst1_hor_vnum_za8 (0, w0, p0, x1, 0),
+ svst1_hor_vnum_za8 (0, w0, p0, x1, 0))
+
+/*
+** st1_vnum_za8_0_1:
+** incb x1, all, mul #11
+** mov (w1[2-5]), w0
+** st1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za8_0_1,
+ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 11),
+ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 11))
+
+/*
+** st1_vnum_za8_0_15:
+** cntb (x[0-9]+)
+** mul (x[0-9]+), (?:\1, x2|x2, \1)
+** mov (w1[2-5]), w0
+** st1b { za0h\.b\[\3, 15\] }, p0/z, \[x1, \2\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za8_0_15,
+ svst1_hor_vnum_za8 (0, w0 + 15, p0, x1, x2),
+ svst1_hor_vnum_za8 (0, w0 + 15, p0, x1, x2))
+
+/*
+** st1_vnum_za8_0_16:
+** add (w1[2-5]), w0, #?16
+** st1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_vnum_za8_0_16,
+ svst1_hor_vnum_za8 (0, w0 + 16, p0, x1, 0),
+ svst1_hor_vnum_za8 (0, w0 + 16, p0, x1, 0))
new file mode 100644
@@ -0,0 +1,63 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_za128_0_0:
+** mov (w1[2-5]), w0
+** st1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za128_0_0,
+ svst1_hor_za128 (0, w0, p0, x1),
+ svst1_hor_za128 (0, w0, p0, x1))
+
+/*
+** st1_za128_0_1:
+** add (w1[2-5]), w0, #?1
+** st1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za128_0_1,
+ svst1_hor_za128 (0, w0 + 1, p0, x1),
+ svst1_hor_za128 (0, w0 + 1, p0, x1))
+
+/*
+** st1_za128_7_0:
+** mov (w1[2-5]), w0
+** st1q { za7h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za128_7_0,
+ svst1_hor_za128 (7, w0, p0, x1),
+ svst1_hor_za128 (7, w0, p0, x1))
+
+/*
+** st1_za128_13_0:
+** mov (w1[2-5]), w0
+** st1q { za13h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za128_13_0,
+ svst1_hor_za128 (13, w0, p0, x1),
+ svst1_hor_za128 (13, w0, p0, x1))
+
+/*
+** st1_za128_15_0:
+** mov (w1[2-5]), w0
+** st1q { za15h\.q\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za128_15_0,
+ svst1_hor_za128 (15, w0, p0, x1),
+ svst1_hor_za128 (15, w0, p0, x1))
+
+/*
+** st1_za128_9_0_index:
+** mov (w1[2-5]), w0
+** st1q { za9h\.q\[\1, 0\] }, p0/z, \[x1, x2, lsl #?4\]
+** ret
+*/
+TEST_STORE_ZA (st1_za128_9_0_index,
+ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16),
+ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16))
new file mode 100644
@@ -0,0 +1,94 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_za16_0_0:
+** mov (w1[2-5]), w0
+** st1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_0_0,
+ svst1_hor_za16 (0, w0, p0, x1),
+ svst1_hor_za16 (0, w0, p0, x1))
+
+/*
+** st1_za16_0_1:
+** mov (w1[2-5]), w0
+** st1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_0_1,
+ svst1_hor_za16 (0, w0 + 1, p0, x1),
+ svst1_hor_za16 (0, w0 + 1, p0, x1))
+
+/*
+** st1_za16_0_7:
+** mov (w1[2-5]), w0
+** st1h { za0h\.h\[\1, 7\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_0_7,
+ svst1_hor_za16 (0, w0 + 7, p0, x1),
+ svst1_hor_za16 (0, w0 + 7, p0, x1))
+
+/*
+** st1_za16_1_0:
+** mov (w1[2-5]), w0
+** st1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_1_0,
+ svst1_hor_za16 (1, w0, p0, x1),
+ svst1_hor_za16 (1, w0, p0, x1))
+
+
+/*
+** st1_za16_1_1:
+** mov (w1[2-5]), w0
+** st1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_1_1,
+ svst1_hor_za16 (1, w0 + 1, p0, x1),
+ svst1_hor_za16 (1, w0 + 1, p0, x1))
+
+/*
+** st1_za16_1_7:
+** mov (w1[2-5]), w0
+** st1h { za1h\.h\[\1, 7\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_1_7,
+ svst1_hor_za16 (1, w0 + 7, p0, x1),
+ svst1_hor_za16 (1, w0 + 7, p0, x1))
+
+/*
+** st1_za16_1_5_index:
+** mov (w1[2-5]), w0
+** st1h { za1h\.h\[\1, 5\] }, p0/z, \[x1, x2, lsl #?1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_1_5_index,
+ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2),
+ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2))
+
+/*
+** st1_za16_0_8:
+** add (w1[2-5]), w0, #?8
+** st1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_0_8,
+ svst1_hor_za16 (0, w0 + 8, p0, x1),
+ svst1_hor_za16 (0, w0 + 8, p0, x1))
+
+/*
+** st1_za16_0_m1:
+** sub (w1[2-5]), w0, #?1
+** st1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za16_0_m1,
+ svst1_hor_za16 (0, w0 - 1, p0, x1),
+ svst1_hor_za16 (0, w0 - 1, p0, x1))
new file mode 100644
@@ -0,0 +1,93 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_za32_0_0:
+** mov (w1[2-5]), w0
+** st1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_0_0,
+ svst1_hor_za32 (0, w0, p0, x1),
+ svst1_hor_za32 (0, w0, p0, x1))
+
+/*
+** st1_za32_0_1:
+** mov (w1[2-5]), w0
+** st1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_0_1,
+ svst1_hor_za32 (0, w0 + 1, p0, x1),
+ svst1_hor_za32 (0, w0 + 1, p0, x1))
+
+/*
+** st1_za32_0_3:
+** mov (w1[2-5]), w0
+** st1w { za0h\.s\[\1, 3\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_0_3,
+ svst1_hor_za32 (0, w0 + 3, p0, x1),
+ svst1_hor_za32 (0, w0 + 3, p0, x1))
+
+/*
+** st1_za32_3_0:
+** mov (w1[2-5]), w0
+** st1w { za3h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_3_0,
+ svst1_hor_za32 (3, w0, p0, x1),
+ svst1_hor_za32 (3, w0, p0, x1))
+
+/*
+** st1_za32_3_1:
+** mov (w1[2-5]), w0
+** st1w { za3h\.s\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_3_1,
+ svst1_hor_za32 (3, w0 + 1, p0, x1),
+ svst1_hor_za32 (3, w0 + 1, p0, x1))
+
+/*
+** st1_za32_3_3:
+** mov (w1[2-5]), w0
+** st1w { za3h\.s\[\1, 3\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_3_3,
+ svst1_hor_za32 (3, w0 + 3, p0, x1),
+ svst1_hor_za32 (3, w0 + 3, p0, x1))
+
+/*
+** st1_za32_1_2_index:
+** mov (w1[2-5]), w0
+** st1w { za1h\.s\[\1, 2\] }, p0/z, \[x1, x2, lsl #?2\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_1_2_index,
+ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4),
+ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4))
+
+/*
+** st1_za32_0_4:
+** add (w1[2-5]), w0, #?4
+** st1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_0_4,
+ svst1_hor_za32 (0, w0 + 4, p0, x1),
+ svst1_hor_za32 (0, w0 + 4, p0, x1))
+
+/*
+** st1_za32_0_m1:
+** sub (w1[2-5]), w0, #?1
+** st1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za32_0_m1,
+ svst1_hor_za32 (0, w0 - 1, p0, x1),
+ svst1_hor_za32 (0, w0 - 1, p0, x1))
new file mode 100644
@@ -0,0 +1,73 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_za64_0_0:
+** mov (w1[2-5]), w0
+** st1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za64_0_0,
+ svst1_hor_za64 (0, w0, p0, x1),
+ svst1_hor_za64 (0, w0, p0, x1))
+
+/*
+** st1_za64_0_1:
+** mov (w1[2-5]), w0
+** st1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za64_0_1,
+ svst1_hor_za64 (0, w0 + 1, p0, x1),
+ svst1_hor_za64 (0, w0 + 1, p0, x1))
+
+/*
+** st1_za64_7_0:
+** mov (w1[2-5]), w0
+** st1d { za7h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za64_7_0,
+ svst1_hor_za64 (7, w0, p0, x1),
+ svst1_hor_za64 (7, w0, p0, x1))
+
+/*
+** st1_za64_7_1:
+** mov (w1[2-5]), w0
+** st1d { za7h\.d\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za64_7_1,
+ svst1_hor_za64 (7, w0 + 1, p0, x1),
+ svst1_hor_za64 (7, w0 + 1, p0, x1))
+
+/*
+** st1_za64_5_1_index:
+** mov (w1[2-5]), w0
+** st1d { za5h\.d\[\1, 1\] }, p0/z, \[x1, x2, lsl #?3\]
+** ret
+*/
+TEST_STORE_ZA (st1_za64_5_1_index,
+ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8),
+ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8))
+
+/*
+** st1_za64_0_2:
+** add (w1[2-5]), w0, #?2
+** st1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za64_0_2,
+ svst1_hor_za64 (0, w0 + 2, p0, x1),
+ svst1_hor_za64 (0, w0 + 2, p0, x1))
+
+/*
+** st1_za64_0_m1:
+** sub (w1[2-5]), w0, #?1
+** st1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za64_0_m1,
+ svst1_hor_za64 (0, w0 - 1, p0, x1),
+ svst1_hor_za64 (0, w0 - 1, p0, x1))
new file mode 100644
@@ -0,0 +1,63 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** st1_za8_0_0:
+** mov (w1[2-5]), w0
+** st1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za8_0_0,
+ svst1_hor_za8 (0, w0, p0, x1),
+ svst1_hor_za8 (0, w0, p0, x1))
+
+/*
+** st1_za8_0_1:
+** mov (w1[2-5]), w0
+** st1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za8_0_1,
+ svst1_hor_za8 (0, w0 + 1, p0, x1),
+ svst1_hor_za8 (0, w0 + 1, p0, x1))
+
+/*
+** st1_za8_0_15:
+** mov (w1[2-5]), w0
+** st1b { za0h\.b\[\1, 15\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za8_0_15,
+ svst1_hor_za8 (0, w0 + 15, p0, x1),
+ svst1_hor_za8 (0, w0 + 15, p0, x1))
+
+/*
+** st1_za8_0_13_index:
+** mov (w1[2-5]), w0
+** st1b { za0h\.b\[\1, 15\] }, p0/z, \[x1, x2\]
+** ret
+*/
+TEST_STORE_ZA (st1_za8_0_13_index,
+ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2),
+ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2))
+
+/*
+** st1_za8_0_16:
+** add (w1[2-5]), w0, #?16
+** st1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za8_0_16,
+ svst1_hor_za8 (0, w0 + 16, p0, x1),
+ svst1_hor_za8 (0, w0 + 16, p0, x1))
+
+/*
+** st1_za8_0_m1:
+** sub (w1[2-5]), w0, #?1
+** st1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\]
+** ret
+*/
+TEST_STORE_ZA (st1_za8_0_m1,
+ svst1_hor_za8 (0, w0 - 1, p0, x1),
+ svst1_hor_za8 (0, w0 - 1, p0, x1))
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
new file mode 100644
@@ -0,0 +1,121 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** str_vnum_za_0:
+** mov (w1[2-5]), w0
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_0,
+ svstr_vnum_za (w0, x1, 0),
+ svstr_vnum_za (w0, x1, 0))
+
+/*
+** str_vnum_za_1:
+** mov (w1[2-5]), w0
+** str za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_1,
+ svstr_vnum_za (w0 + 1, x1, 1),
+ svstr_vnum_za (w0 + 1, x1, 1))
+
+/*
+** str_vnum_za_13:
+** mov (w1[2-5]), w0
+** str za\[\1, 13\], \[x1, #13, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_13,
+ svstr_vnum_za (w0 + 13, x1, 13),
+ svstr_vnum_za (w0 + 13, x1, 13))
+
+/*
+** str_vnum_za_15:
+** mov (w1[2-5]), w0
+** str za\[\1, 15\], \[x1, #15, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_15,
+ svstr_vnum_za (w0 + 15, x1, 15),
+ svstr_vnum_za (w0 + 15, x1, 15))
+
+/*
+** str_vnum_za_16:
+** (
+** add (w1[2-5]), w0, #?16
+** incb x1, all, mul #16
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1, all, mul #16
+** add (w1[2-5]), w0, #?16
+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_16,
+ svstr_vnum_za (w0 + 16, x1, 16),
+ svstr_vnum_za (w0 + 16, x1, 16))
+
+/*
+** str_vnum_za_m1:
+** (
+** sub (w1[2-5]), w0, #?1
+** decb x1
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** decb x1
+** sub (w1[2-5]), w0, #?1
+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_m1,
+ svstr_vnum_za (w0 - 1, x1, -1),
+ svstr_vnum_za (w0 - 1, x1, -1))
+
+/*
+** str_vnum_za_mixed_1:
+** add (w1[2-5]), w0, #?1
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_1,
+ svstr_vnum_za (w0 + 1, x1, 0),
+ svstr_vnum_za (w0 + 1, x1, 0))
+
+/*
+** str_vnum_za_mixed_2:
+** (
+** mov (w1[2-5]), w0
+** incb x1
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1
+** mov (w1[2-5]), w0
+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_2,
+ svstr_vnum_za (w0, x1, 1),
+ svstr_vnum_za (w0, x1, 1))
+
+/*
+** str_vnum_za_mixed_3:
+** (
+** add (w1[2-5]), w0, #?2
+** incb x1
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1
+** add (w1[2-5]), w0, #?2
+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_3,
+ svstr_vnum_za (w0 + 2, x1, 1),
+ svstr_vnum_za (w0 + 2, x1, 1))
new file mode 100644
@@ -0,0 +1,166 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#include "test_sme_acle.h"
+
+/*
+** str_vnum_za_0:
+** mov (w1[2-5]), w0
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_0,
+ svstr_vnum_za (w0, x1, 0),
+ svstr_vnum_za (w0, x1, 0))
+
+/*
+** str_vnum_za_1:
+** mov (w1[2-5]), w0
+** str za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_1,
+ svstr_vnum_za (w0 + 1, x1, 1),
+ svstr_vnum_za (w0 + 1, x1, 1))
+
+/*
+** str_vnum_za_13:
+** mov (w1[2-5]), w0
+** str za\[\1, 13\], \[x1, #13, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_13,
+ svstr_vnum_za (w0 + 13, x1, 13),
+ svstr_vnum_za (w0 + 13, x1, 13))
+
+/*
+** str_vnum_za_15:
+** mov (w1[2-5]), w0
+** str za\[\1, 15\], \[x1, #15, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_15,
+ svstr_vnum_za (w0 + 15, x1, 15),
+ svstr_vnum_za (w0 + 15, x1, 15))
+
+/*
+** str_vnum_za_16:
+** (
+** add (w1[2-5]), w0, #?16
+** addsvl (x[0-9]+), x1, #16
+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #16
+** add (w1[2-5]), w0, #?16
+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_16,
+ svstr_vnum_za (w0 + 16, x1, 16),
+ svstr_vnum_za (w0 + 16, x1, 16))
+
+/*
+** str_vnum_za_m1:
+** (
+** sub (w1[2-5]), w0, #?1
+** addsvl (x[0-9]+), x1, #-1
+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #-1
+** sub (w1[2-5]), w0, #?1
+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_m1,
+ svstr_vnum_za (w0 - 1, x1, -1),
+ svstr_vnum_za (w0 - 1, x1, -1))
+
+/*
+** str_vnum_za_mixed_1:
+** add (w1[2-5]), w0, #?1
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_1,
+ svstr_vnum_za (w0 + 1, x1, 0),
+ svstr_vnum_za (w0 + 1, x1, 0))
+
+/*
+** str_vnum_za_mixed_2:
+** (
+** mov (w1[2-5]), w0
+** addsvl (x[0-9]+), x1, #1
+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #1
+** mov (w1[2-5]), w0
+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_2,
+ svstr_vnum_za (w0, x1, 1),
+ svstr_vnum_za (w0, x1, 1))
+
+/*
+** str_vnum_za_mixed_3:
+** (
+** add (w1[2-5]), w0, #?2
+** addsvl (x[0-9]+), x1, #1
+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** addsvl (x[0-9]+), x1, #1
+** add (w1[2-5]), w0, #?2
+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_3,
+ svstr_vnum_za (w0 + 2, x1, 1),
+ svstr_vnum_za (w0 + 2, x1, 1))
+
+/*
+** str_vnum_za_mixed_4:
+** ...
+** addsvl x[0-9]+, x1, #-32
+** ...
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_4,
+ svstr_vnum_za (w0 + 3, x1, -32),
+ svstr_vnum_za (w0 + 3, x1, -32))
+
+/*
+** str_vnum_za_mixed_5:
+** ...
+** rdsvl x[0-9]+, #1
+** ...
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_5,
+ svstr_vnum_za (w0 + 3, x1, -33),
+ svstr_vnum_za (w0 + 3, x1, -33))
+
+/*
+** str_vnum_za_mixed_6:
+** ...
+** addsvl x[0-9]+, x1, #31
+** ...
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_6,
+ svstr_vnum_za (w0 + 4, x1, 31),
+ svstr_vnum_za (w0 + 4, x1, 31))
+
+/*
+** str_vnum_za_mixed_7:
+** ...
+** rdsvl x[0-9]+, #1
+** ...
+** ret
+*/
+TEST_STORE_ZA (str_vnum_za_mixed_7,
+ svstr_vnum_za (w0 + 3, x1, 32),
+ svstr_vnum_za (w0 + 3, x1, 32))
new file mode 100644
@@ -0,0 +1,104 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** str_za_0:
+** mov (w1[2-5]), w0
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_za_0,
+ svstr_za (w0, x1),
+ svstr_za (w0, x1))
+
+/*
+** str_za_1_vnum:
+** mov (w1[2-5]), w0
+** str za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_za_1_vnum,
+ svstr_za (w0 + 1, x1 + svcntsb ()),
+ svstr_za (w0 + 1, x1 + svcntsb ()))
+
+/*
+** str_za_13_vnum:
+** mov (w1[2-5]), w0
+** str za\[\1, 13\], \[x1, #13, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_za_13_vnum,
+ svstr_za (w0 + 13, x1 + svcntsb () * 13),
+ svstr_za (w0 + 13, x1 + svcntsb () * 13))
+
+/*
+** str_za_15_vnum:
+** mov (w1[2-5]), w0
+** str za\[\1, 15\], \[x1, #15, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_za_15_vnum,
+ svstr_za (w0 + 15, x1 + svcntsb () * 15),
+ svstr_za (w0 + 15, x1 + svcntsb () * 15))
+
+/*
+** str_za_16_vnum:
+** (
+** add (w1[2-5]), w0, #?16
+** incb x1, all, mul #16
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** incb x1, all, mul #16
+** add (w1[2-5]), w0, #?16
+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_za_16_vnum,
+ svstr_za (w0 + 16, x1 + svcntsb () * 16),
+ svstr_za (w0 + 16, x1 + svcntsb () * 16))
+
+/*
+** str_za_m1_vnum:
+** (
+** sub (w1[2-5]), w0, #?1
+** decb x1
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** |
+** decb x1
+** sub (w1[2-5]), w0, #?1
+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_za_m1_vnum,
+ svstr_za (w0 - 1, x1 - svcntsb ()),
+ svstr_za (w0 - 1, x1 - svcntsb ()))
+
+/*
+** str_za_2:
+** add (w1[2-5]), w0, #?2
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_za_2,
+ svstr_za (w0 + 2, x1),
+ svstr_za (w0 + 2, x1))
+
+/*
+** str_za_offset:
+** (
+** mov (w1[2-5]), w0
+** add (x[0-9]+), x1, #?1
+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** add (x[0-9]+), x1, #?1
+** mov (w1[2-5]), w0
+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_za_offset,
+ svstr_za (w0, x1 + 1),
+ svstr_za (w0, x1 + 1))
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#include "test_sme_acle.h"
+
+/*
+** str_za_0:
+** mov (w1[2-5]), w0
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_za_0,
+ svstr_za (w0, x1),
+ svstr_za (w0, x1))
+
+/*
+** str_za_1_vnum:
+** mov (w1[2-5]), w0
+** str za\[\1, 1\], \[x1, #1, mul vl\]
+** ret
+*/
+TEST_STORE_ZA (str_za_1_vnum,
+ svstr_za (w0 + 1, x1 + svcntsb ()),
+ svstr_za (w0 + 1, x1 + svcntsb ()))
+
+/*
+** str_za_2:
+** add (w1[2-5]), w0, #?2
+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\]
+** ret
+*/
+TEST_STORE_ZA (str_za_2,
+ svstr_za (w0 + 2, x1),
+ svstr_za (w0 + 2, x1))
+
+/*
+** str_za_offset:
+** (
+** mov (w1[2-5]), w0
+** add (x[0-9]+), x1, #?1
+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\]
+** |
+** add (x[0-9]+), x1, #?1
+** mov (w1[2-5]), w0
+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\]
+** )
+** ret
+*/
+TEST_STORE_ZA (str_za_offset,
+ svstr_za (w0, x1 + 1),
+ svstr_za (w0, x1 + 1))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** sumopa_za32_s8_0_p0_p1_z0_z4:
+** sumopa za0\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (sumopa_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t,
+ svsumopa_za32_s8_m (0, p0, p1, z0, z4),
+ svsumopa_za32_m (0, p0, p1, z0, z4))
+
+/*
+** sumopa_za32_s8_0_p1_p0_z4_z0:
+** sumopa za0\.s, p1/m, p0/m, z4\.b, z0\.b
+** ret
+*/
+TEST_DUAL_ZA (sumopa_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t,
+ svsumopa_za32_s8_m (0, p1, p0, z4, z0),
+ svsumopa_za32_m (0, p1, p0, z4, z0))
+
+/*
+** sumopa_za32_s8_3_p0_p1_z0_z4:
+** sumopa za3\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (sumopa_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t,
+ svsumopa_za32_s8_m (3, p0, p1, z0, z4),
+ svsumopa_za32_m (3, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** sumopa_za64_s16_0_p0_p1_z0_z4:
+** sumopa za0\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (sumopa_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t,
+ svsumopa_za64_s16_m (0, p0, p1, z0, z4),
+ svsumopa_za64_m (0, p0, p1, z0, z4))
+
+/*
+** sumopa_za64_s16_0_p1_p0_z4_z0:
+** sumopa za0\.d, p1/m, p0/m, z4\.h, z0\.h
+** ret
+*/
+TEST_DUAL_ZA (sumopa_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t,
+ svsumopa_za64_s16_m (0, p1, p0, z4, z0),
+ svsumopa_za64_m (0, p1, p0, z4, z0))
+
+/*
+** sumopa_za64_s16_7_p0_p1_z0_z4:
+** sumopa za7\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (sumopa_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t,
+ svsumopa_za64_s16_m (7, p0, p1, z0, z4),
+ svsumopa_za64_m (7, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** sumops_za32_s8_0_p0_p1_z0_z4:
+** sumops za0\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (sumops_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t,
+ svsumops_za32_s8_m (0, p0, p1, z0, z4),
+ svsumops_za32_m (0, p0, p1, z0, z4))
+
+/*
+** sumops_za32_s8_0_p1_p0_z4_z0:
+** sumops za0\.s, p1/m, p0/m, z4\.b, z0\.b
+** ret
+*/
+TEST_DUAL_ZA (sumops_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t,
+ svsumops_za32_s8_m (0, p1, p0, z4, z0),
+ svsumops_za32_m (0, p1, p0, z4, z0))
+
+/*
+** sumops_za32_s8_3_p0_p1_z0_z4:
+** sumops za3\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (sumops_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t,
+ svsumops_za32_s8_m (3, p0, p1, z0, z4),
+ svsumops_za32_m (3, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** sumops_za64_s16_0_p0_p1_z0_z4:
+** sumops za0\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (sumops_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t,
+ svsumops_za64_s16_m (0, p0, p1, z0, z4),
+ svsumops_za64_m (0, p0, p1, z0, z4))
+
+/*
+** sumops_za64_s16_0_p1_p0_z4_z0:
+** sumops za0\.d, p1/m, p0/m, z4\.h, z0\.h
+** ret
+*/
+TEST_DUAL_ZA (sumops_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t,
+ svsumops_za64_s16_m (0, p1, p0, z4, z0),
+ svsumops_za64_m (0, p1, p0, z4, z0))
+
+/*
+** sumops_za64_s16_7_p0_p1_z0_z4:
+** sumops za7\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (sumops_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t,
+ svsumops_za64_s16_m (7, p0, p1, z0, z4),
+ svsumops_za64_m (7, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,62 @@
+#ifndef TEST_SME_ACLE_H
+#define TEST_SME_ACLE_H 1
+
+#if (!defined(STREAMING_COMPATIBLE) \
+ && !defined(NON_STREAMING) \
+ && !defined(STREAMING))
+#define STREAMING
+#endif
+
+#if !defined(NO_SHARED_ZA)
+#define SHARED_ZA
+#endif
+
+#include "../../sve/acle/asm/test_sve_acle.h"
+
+#include <arm_sme.h>
+
+#define TEST_LOAD_ZA(NAME, CODE1, CODE2) \
+ PROTO (NAME, void, (svbool_t p0, int32_t w0, const char *x1, \
+ uint64_t x2)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_STORE_ZA(NAME, CODE1, CODE2) \
+ PROTO (NAME, void, (svbool_t p0, int32_t w0, char *x1, \
+ uint64_t x2)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_READ_ZA(NAME, TYPE, CODE1, CODE2) \
+ PROTO (NAME, TYPE, (TYPE z0, TYPE z1, svbool_t p0, \
+ int32_t w0)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ return z0; \
+ }
+
+#define TEST_WRITE_ZA(NAME, TYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \
+ int32_t w0)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_UNIFORM_ZA(NAME, TYPE, CODE1, CODE2) \
+ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \
+ svbool_t p1)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#define TEST_DUAL_ZA(NAME, TYPE1, TYPE2, CODE1, CODE2) \
+ PROTO (NAME, void, (TYPE1 z0, TYPE1 z1, TYPE1 z2, TYPE1 z3, \
+ TYPE2 z4, TYPE2 z5, TYPE2 z6, TYPE2 z7, \
+ svbool_t p0, svbool_t p1)) \
+ { \
+ INVOKE (CODE1, CODE2); \
+ }
+
+#endif
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#include "test_sme_acle.h"
+
+/*
+** undef_za_1:
+** ret
+*/
+PROTO (undef_za_1, void, ()) { svundef_za (); }
+
+/*
+** undef_za_2:
+** ret
+*/
+PROTO (undef_za_2, void, ())
+{
+ svzero_za ();
+ svundef_za ();
+}
+
+/*
+** undef_za_3:
+** mov (w1[2-5]), #?0
+** str za\[\1, 0\], \[x0\]
+** ret
+*/
+PROTO (undef_za_3, void, (void *ptr))
+{
+ svzero_za ();
+ svundef_za ();
+ svstr_za (0, ptr);
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** usmopa_za32_u8_0_p0_p1_z0_z4:
+** usmopa za0\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (usmopa_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t,
+ svusmopa_za32_u8_m (0, p0, p1, z0, z4),
+ svusmopa_za32_m (0, p0, p1, z0, z4))
+
+/*
+** usmopa_za32_u8_0_p1_p0_z4_z0:
+** usmopa za0\.s, p1/m, p0/m, z4\.b, z0\.b
+** ret
+*/
+TEST_DUAL_ZA (usmopa_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t,
+ svusmopa_za32_u8_m (0, p1, p0, z4, z0),
+ svusmopa_za32_m (0, p1, p0, z4, z0))
+
+/*
+** usmopa_za32_u8_3_p0_p1_z0_z4:
+** usmopa za3\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (usmopa_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t,
+ svusmopa_za32_u8_m (3, p0, p1, z0, z4),
+ svusmopa_za32_m (3, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** usmopa_za64_u16_0_p0_p1_z0_z4:
+** usmopa za0\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (usmopa_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t,
+ svusmopa_za64_u16_m (0, p0, p1, z0, z4),
+ svusmopa_za64_m (0, p0, p1, z0, z4))
+
+/*
+** usmopa_za64_u16_0_p1_p0_z4_z0:
+** usmopa za0\.d, p1/m, p0/m, z4\.h, z0\.h
+** ret
+*/
+TEST_DUAL_ZA (usmopa_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t,
+ svusmopa_za64_u16_m (0, p1, p0, z4, z0),
+ svusmopa_za64_m (0, p1, p0, z4, z0))
+
+/*
+** usmopa_za64_u16_7_p0_p1_z0_z4:
+** usmopa za7\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (usmopa_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t,
+ svusmopa_za64_u16_m (7, p0, p1, z0, z4),
+ svusmopa_za64_m (7, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** usmops_za32_u8_0_p0_p1_z0_z4:
+** usmops za0\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (usmops_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t,
+ svusmops_za32_u8_m (0, p0, p1, z0, z4),
+ svusmops_za32_m (0, p0, p1, z0, z4))
+
+/*
+** usmops_za32_u8_0_p1_p0_z4_z0:
+** usmops za0\.s, p1/m, p0/m, z4\.b, z0\.b
+** ret
+*/
+TEST_DUAL_ZA (usmops_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t,
+ svusmops_za32_u8_m (0, p1, p0, z4, z0),
+ svusmops_za32_m (0, p1, p0, z4, z0))
+
+/*
+** usmops_za32_u8_3_p0_p1_z0_z4:
+** usmops za3\.s, p0/m, p1/m, z0\.b, z4\.b
+** ret
+*/
+TEST_DUAL_ZA (usmops_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t,
+ svusmops_za32_u8_m (3, p0, p1, z0, z4),
+ svusmops_za32_m (3, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,32 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+#pragma GCC target "+sme-i16i64"
+
+/*
+** usmops_za64_u16_0_p0_p1_z0_z4:
+** usmops za0\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (usmops_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t,
+ svusmops_za64_u16_m (0, p0, p1, z0, z4),
+ svusmops_za64_m (0, p0, p1, z0, z4))
+
+/*
+** usmops_za64_u16_0_p1_p0_z4_z0:
+** usmops za0\.d, p1/m, p0/m, z4\.h, z0\.h
+** ret
+*/
+TEST_DUAL_ZA (usmops_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t,
+ svusmops_za64_u16_m (0, p1, p0, z4, z0),
+ svusmops_za64_m (0, p1, p0, z4, z0))
+
+/*
+** usmops_za64_u16_7_p0_p1_z0_z4:
+** usmops za7\.d, p0/m, p1/m, z0\.h, z4\.h
+** ret
+*/
+TEST_DUAL_ZA (usmops_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t,
+ svusmops_za64_u16_m (7, p0, p1, z0, z4),
+ svusmops_za64_m (7, p0, p1, z0, z4))
new file mode 100644
@@ -0,0 +1,173 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za128_s8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t,
+ svwrite_hor_za128_s8_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s8_0_1_z0:
+** add (w1[2-5]), w0, #?1
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t,
+ svwrite_hor_za128_s8_m (0, w0 + 1, p0, z0),
+ svwrite_hor_za128_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za128_s8_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_m1_z0, svint8_t,
+ svwrite_hor_za128_s8_m (0, w0 - 1, p0, z0),
+ svwrite_hor_za128_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za128_s8_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_1_0_z0, svint8_t,
+ svwrite_hor_za128_s8_m (1, w0, p0, z0),
+ svwrite_hor_za128_m (1, w0, p0, z0))
+
+/*
+** write_za128_s8_15_0_z0:
+** mov (w1[2-5]), w0
+** mova za15h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_15_0_z0, svint8_t,
+ svwrite_hor_za128_s8_m (15, w0, p0, z0),
+ svwrite_hor_za128_m (15, w0, p0, z0))
+
+/*
+** write_za128_s8_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z1\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_0_z1, svint8_t,
+ svwrite_hor_za128_s8_m (0, w0, p0, z1),
+ svwrite_hor_za128_m (0, w0, p0, z1))
+
+/*
+** write_za128_u8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u8_0_0_z0, svuint8_t,
+ svwrite_hor_za128_u8_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s16_0_0_z0, svint16_t,
+ svwrite_hor_za128_s16_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_u16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u16_0_0_z0, svuint16_t,
+ svwrite_hor_za128_u16_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_f16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_f16_0_0_z0, svfloat16_t,
+ svwrite_hor_za128_f16_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_bf16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_bf16_0_0_z0, svbfloat16_t,
+ svwrite_hor_za128_bf16_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s32_0_0_z0, svint32_t,
+ svwrite_hor_za128_s32_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_u32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u32_0_0_z0, svuint32_t,
+ svwrite_hor_za128_u32_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_f32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_f32_0_0_z0, svfloat32_t,
+ svwrite_hor_za128_f32_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s64_0_0_z0, svint64_t,
+ svwrite_hor_za128_s64_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_u64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u64_0_0_z0, svuint64_t,
+ svwrite_hor_za128_u64_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_f64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_f64_0_0_z0, svfloat64_t,
+ svwrite_hor_za128_f64_m (0, w0, p0, z0),
+ svwrite_hor_za128_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,113 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za16_s16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t,
+ svwrite_hor_za16_s16_m (0, w0, p0, z0),
+ svwrite_hor_za16_m (0, w0, p0, z0))
+
+/*
+** write_za16_s16_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.h\[\1, 1\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t,
+ svwrite_hor_za16_s16_m (0, w0 + 1, p0, z0),
+ svwrite_hor_za16_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za16_s16_0_7_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.h\[\1, 7\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_7_z0, svint16_t,
+ svwrite_hor_za16_s16_m (0, w0 + 7, p0, z0),
+ svwrite_hor_za16_m (0, w0 + 7, p0, z0))
+
+/*
+** write_za16_s16_0_8_z0:
+** add (w1[2-5]), w0, #?8
+** mova za0h\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_8_z0, svint16_t,
+ svwrite_hor_za16_s16_m (0, w0 + 8, p0, z0),
+ svwrite_hor_za16_m (0, w0 + 8, p0, z0))
+
+/*
+** write_za16_s16_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0h\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_m1_z0, svint16_t,
+ svwrite_hor_za16_s16_m (0, w0 - 1, p0, z0),
+ svwrite_hor_za16_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za16_s16_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1h\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_1_0_z0, svint16_t,
+ svwrite_hor_za16_s16_m (1, w0, p0, z0),
+ svwrite_hor_za16_m (1, w0, p0, z0))
+
+/*
+** write_za16_s16_1_7_z0:
+** mov (w1[2-5]), w0
+** mova za1h\.h\[\1, 7\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_1_7_z0, svint16_t,
+ svwrite_hor_za16_s16_m (1, w0 + 7, p0, z0),
+ svwrite_hor_za16_m (1, w0 + 7, p0, z0))
+
+/*
+** write_za16_s16_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0h\.h\[\1, 0\], p0/m, z1\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_0_z1, svint16_t,
+ svwrite_hor_za16_s16_m (0, w0, p0, z1),
+ svwrite_hor_za16_m (0, w0, p0, z1))
+
+/*
+** write_za16_u16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_u16_0_0_z0, svuint16_t,
+ svwrite_hor_za16_u16_m (0, w0, p0, z0),
+ svwrite_hor_za16_m (0, w0, p0, z0))
+
+/*
+** write_za16_f16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_f16_0_0_z0, svfloat16_t,
+ svwrite_hor_za16_f16_m (0, w0, p0, z0),
+ svwrite_hor_za16_m (0, w0, p0, z0))
+
+/*
+** write_za16_bf16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_bf16_0_0_z0, svbfloat16_t,
+ svwrite_hor_za16_bf16_m (0, w0, p0, z0),
+ svwrite_hor_za16_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za32_s32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t,
+ svwrite_hor_za32_s32_m (0, w0, p0, z0),
+ svwrite_hor_za32_m (0, w0, p0, z0))
+
+/*
+** write_za32_s32_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.s\[\1, 1\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t,
+ svwrite_hor_za32_s32_m (0, w0 + 1, p0, z0),
+ svwrite_hor_za32_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za32_s32_0_3_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.s\[\1, 3\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_3_z0, svint32_t,
+ svwrite_hor_za32_s32_m (0, w0 + 3, p0, z0),
+ svwrite_hor_za32_m (0, w0 + 3, p0, z0))
+
+/*
+** write_za32_s32_0_4_z0:
+** add (w1[2-5]), w0, #?4
+** mova za0h\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_4_z0, svint32_t,
+ svwrite_hor_za32_s32_m (0, w0 + 4, p0, z0),
+ svwrite_hor_za32_m (0, w0 + 4, p0, z0))
+
+/*
+** write_za32_s32_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0h\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_m1_z0, svint32_t,
+ svwrite_hor_za32_s32_m (0, w0 - 1, p0, z0),
+ svwrite_hor_za32_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za32_s32_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1h\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_1_0_z0, svint32_t,
+ svwrite_hor_za32_s32_m (1, w0, p0, z0),
+ svwrite_hor_za32_m (1, w0, p0, z0))
+
+/*
+** write_za32_s32_1_3_z0:
+** mov (w1[2-5]), w0
+** mova za1h\.s\[\1, 3\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_1_3_z0, svint32_t,
+ svwrite_hor_za32_s32_m (1, w0 + 3, p0, z0),
+ svwrite_hor_za32_m (1, w0 + 3, p0, z0))
+
+/*
+** write_za32_s32_3_0_z0:
+** mov (w1[2-5]), w0
+** mova za3h\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_3_0_z0, svint32_t,
+ svwrite_hor_za32_s32_m (3, w0, p0, z0),
+ svwrite_hor_za32_m (3, w0, p0, z0))
+
+/*
+** write_za32_s32_3_3_z0:
+** mov (w1[2-5]), w0
+** mova za3h\.s\[\1, 3\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_3_3_z0, svint32_t,
+ svwrite_hor_za32_s32_m (3, w0 + 3, p0, z0),
+ svwrite_hor_za32_m (3, w0 + 3, p0, z0))
+
+/*
+** write_za32_s32_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0h\.s\[\1, 0\], p0/m, z1\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_0_z1, svint32_t,
+ svwrite_hor_za32_s32_m (0, w0, p0, z1),
+ svwrite_hor_za32_m (0, w0, p0, z1))
+
+/*
+** write_za32_u32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_u32_0_0_z0, svuint32_t,
+ svwrite_hor_za32_u32_m (0, w0, p0, z0),
+ svwrite_hor_za32_m (0, w0, p0, z0))
+
+/*
+** write_za32_f32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_f32_0_0_z0, svfloat32_t,
+ svwrite_hor_za32_f32_m (0, w0, p0, z0),
+ svwrite_hor_za32_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,113 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za64_s64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t,
+ svwrite_hor_za64_s64_m (0, w0, p0, z0),
+ svwrite_hor_za64_m (0, w0, p0, z0))
+
+/*
+** write_za64_s64_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.d\[\1, 1\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t,
+ svwrite_hor_za64_s64_m (0, w0 + 1, p0, z0),
+ svwrite_hor_za64_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za64_s64_0_2_z0:
+** add (w1[2-5]), w0, #?2
+** mova za0h\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_2_z0, svint64_t,
+ svwrite_hor_za64_s64_m (0, w0 + 2, p0, z0),
+ svwrite_hor_za64_m (0, w0 + 2, p0, z0))
+
+/*
+** write_za64_s64_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0h\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_m1_z0, svint64_t,
+ svwrite_hor_za64_s64_m (0, w0 - 1, p0, z0),
+ svwrite_hor_za64_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za64_s64_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1h\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_1_0_z0, svint64_t,
+ svwrite_hor_za64_s64_m (1, w0, p0, z0),
+ svwrite_hor_za64_m (1, w0, p0, z0))
+
+/*
+** write_za64_s64_1_1_z0:
+** mov (w1[2-5]), w0
+** mova za1h\.d\[\1, 1\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_1_1_z0, svint64_t,
+ svwrite_hor_za64_s64_m (1, w0 + 1, p0, z0),
+ svwrite_hor_za64_m (1, w0 + 1, p0, z0))
+
+/*
+** write_za64_s64_7_0_z0:
+** mov (w1[2-5]), w0
+** mova za7h\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_7_0_z0, svint64_t,
+ svwrite_hor_za64_s64_m (7, w0, p0, z0),
+ svwrite_hor_za64_m (7, w0, p0, z0))
+
+/*
+** write_za64_s64_7_1_z0:
+** mov (w1[2-5]), w0
+** mova za7h\.d\[\1, 1\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_7_1_z0, svint64_t,
+ svwrite_hor_za64_s64_m (7, w0 + 1, p0, z0),
+ svwrite_hor_za64_m (7, w0 + 1, p0, z0))
+
+/*
+** write_za64_s64_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0h\.d\[\1, 0\], p0/m, z1\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_0_z1, svint64_t,
+ svwrite_hor_za64_s64_m (0, w0, p0, z1),
+ svwrite_hor_za64_m (0, w0, p0, z1))
+
+/*
+** write_za64_u64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_u64_0_0_z0, svuint64_t,
+ svwrite_hor_za64_u64_m (0, w0, p0, z0),
+ svwrite_hor_za64_m (0, w0, p0, z0))
+
+/*
+** write_za64_f64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_f64_0_0_z0, svfloat64_t,
+ svwrite_hor_za64_f64_m (0, w0, p0, z0),
+ svwrite_hor_za64_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,73 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za8_s8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t,
+ svwrite_hor_za8_s8_m (0, w0, p0, z0),
+ svwrite_hor_za8_m (0, w0, p0, z0))
+
+/*
+** write_za8_s8_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.b\[\1, 1\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t,
+ svwrite_hor_za8_s8_m (0, w0 + 1, p0, z0),
+ svwrite_hor_za8_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za8_s8_0_15_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.b\[\1, 15\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_15_z0, svint8_t,
+ svwrite_hor_za8_s8_m (0, w0 + 15, p0, z0),
+ svwrite_hor_za8_m (0, w0 + 15, p0, z0))
+
+/*
+** write_za8_s8_0_16_z0:
+** add (w1[2-5]), w0, #?16
+** mova za0h\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_16_z0, svint8_t,
+ svwrite_hor_za8_s8_m (0, w0 + 16, p0, z0),
+ svwrite_hor_za8_m (0, w0 + 16, p0, z0))
+
+/*
+** write_za8_s8_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0h\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_m1_z0, svint8_t,
+ svwrite_hor_za8_s8_m (0, w0 - 1, p0, z0),
+ svwrite_hor_za8_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za8_s8_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0h\.b\[\1, 0\], p0/m, z1\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_0_z1, svint8_t,
+ svwrite_hor_za8_s8_m (0, w0, p0, z1),
+ svwrite_hor_za8_m (0, w0, p0, z1))
+
+/*
+** write_za8_u8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0h\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_u8_0_0_z0, svuint8_t,
+ svwrite_hor_za8_u8_m (0, w0, p0, z0),
+ svwrite_hor_za8_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,173 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za128_s8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t,
+ svwrite_ver_za128_s8_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s8_0_1_z0:
+** add (w1[2-5]), w0, #?1
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t,
+ svwrite_ver_za128_s8_m (0, w0 + 1, p0, z0),
+ svwrite_ver_za128_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za128_s8_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_m1_z0, svint8_t,
+ svwrite_ver_za128_s8_m (0, w0 - 1, p0, z0),
+ svwrite_ver_za128_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za128_s8_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_1_0_z0, svint8_t,
+ svwrite_ver_za128_s8_m (1, w0, p0, z0),
+ svwrite_ver_za128_m (1, w0, p0, z0))
+
+/*
+** write_za128_s8_15_0_z0:
+** mov (w1[2-5]), w0
+** mova za15v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_15_0_z0, svint8_t,
+ svwrite_ver_za128_s8_m (15, w0, p0, z0),
+ svwrite_ver_za128_m (15, w0, p0, z0))
+
+/*
+** write_za128_s8_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z1\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s8_0_0_z1, svint8_t,
+ svwrite_ver_za128_s8_m (0, w0, p0, z1),
+ svwrite_ver_za128_m (0, w0, p0, z1))
+
+/*
+** write_za128_u8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u8_0_0_z0, svuint8_t,
+ svwrite_ver_za128_u8_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s16_0_0_z0, svint16_t,
+ svwrite_ver_za128_s16_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_u16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u16_0_0_z0, svuint16_t,
+ svwrite_ver_za128_u16_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_f16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_f16_0_0_z0, svfloat16_t,
+ svwrite_ver_za128_f16_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_bf16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_bf16_0_0_z0, svbfloat16_t,
+ svwrite_ver_za128_bf16_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s32_0_0_z0, svint32_t,
+ svwrite_ver_za128_s32_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_u32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u32_0_0_z0, svuint32_t,
+ svwrite_ver_za128_u32_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_f32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_f32_0_0_z0, svfloat32_t,
+ svwrite_ver_za128_f32_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_s64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_s64_0_0_z0, svint64_t,
+ svwrite_ver_za128_s64_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_u64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_u64_0_0_z0, svuint64_t,
+ svwrite_ver_za128_u64_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
+
+/*
+** write_za128_f64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.q\[\1, 0\], p0/m, z0\.q
+** ret
+*/
+TEST_WRITE_ZA (write_za128_f64_0_0_z0, svfloat64_t,
+ svwrite_ver_za128_f64_m (0, w0, p0, z0),
+ svwrite_ver_za128_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,113 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za16_s16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t,
+ svwrite_ver_za16_s16_m (0, w0, p0, z0),
+ svwrite_ver_za16_m (0, w0, p0, z0))
+
+/*
+** write_za16_s16_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.h\[\1, 1\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t,
+ svwrite_ver_za16_s16_m (0, w0 + 1, p0, z0),
+ svwrite_ver_za16_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za16_s16_0_7_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.h\[\1, 7\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_7_z0, svint16_t,
+ svwrite_ver_za16_s16_m (0, w0 + 7, p0, z0),
+ svwrite_ver_za16_m (0, w0 + 7, p0, z0))
+
+/*
+** write_za16_s16_0_8_z0:
+** add (w1[2-5]), w0, #?8
+** mova za0v\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_8_z0, svint16_t,
+ svwrite_ver_za16_s16_m (0, w0 + 8, p0, z0),
+ svwrite_ver_za16_m (0, w0 + 8, p0, z0))
+
+/*
+** write_za16_s16_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0v\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_m1_z0, svint16_t,
+ svwrite_ver_za16_s16_m (0, w0 - 1, p0, z0),
+ svwrite_ver_za16_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za16_s16_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1v\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_1_0_z0, svint16_t,
+ svwrite_ver_za16_s16_m (1, w0, p0, z0),
+ svwrite_ver_za16_m (1, w0, p0, z0))
+
+/*
+** write_za16_s16_1_7_z0:
+** mov (w1[2-5]), w0
+** mova za1v\.h\[\1, 7\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_1_7_z0, svint16_t,
+ svwrite_ver_za16_s16_m (1, w0 + 7, p0, z0),
+ svwrite_ver_za16_m (1, w0 + 7, p0, z0))
+
+/*
+** write_za16_s16_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0v\.h\[\1, 0\], p0/m, z1\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_s16_0_0_z1, svint16_t,
+ svwrite_ver_za16_s16_m (0, w0, p0, z1),
+ svwrite_ver_za16_m (0, w0, p0, z1))
+
+/*
+** write_za16_u16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_u16_0_0_z0, svuint16_t,
+ svwrite_ver_za16_u16_m (0, w0, p0, z0),
+ svwrite_ver_za16_m (0, w0, p0, z0))
+
+/*
+** write_za16_f16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_f16_0_0_z0, svfloat16_t,
+ svwrite_ver_za16_f16_m (0, w0, p0, z0),
+ svwrite_ver_za16_m (0, w0, p0, z0))
+
+/*
+** write_za16_bf16_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.h\[\1, 0\], p0/m, z0\.h
+** ret
+*/
+TEST_WRITE_ZA (write_za16_bf16_0_0_z0, svbfloat16_t,
+ svwrite_ver_za16_bf16_m (0, w0, p0, z0),
+ svwrite_ver_za16_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,123 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za32_s32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t,
+ svwrite_ver_za32_s32_m (0, w0, p0, z0),
+ svwrite_ver_za32_m (0, w0, p0, z0))
+
+/*
+** write_za32_s32_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.s\[\1, 1\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t,
+ svwrite_ver_za32_s32_m (0, w0 + 1, p0, z0),
+ svwrite_ver_za32_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za32_s32_0_3_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.s\[\1, 3\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_3_z0, svint32_t,
+ svwrite_ver_za32_s32_m (0, w0 + 3, p0, z0),
+ svwrite_ver_za32_m (0, w0 + 3, p0, z0))
+
+/*
+** write_za32_s32_0_4_z0:
+** add (w1[2-5]), w0, #?4
+** mova za0v\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_4_z0, svint32_t,
+ svwrite_ver_za32_s32_m (0, w0 + 4, p0, z0),
+ svwrite_ver_za32_m (0, w0 + 4, p0, z0))
+
+/*
+** write_za32_s32_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0v\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_m1_z0, svint32_t,
+ svwrite_ver_za32_s32_m (0, w0 - 1, p0, z0),
+ svwrite_ver_za32_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za32_s32_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1v\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_1_0_z0, svint32_t,
+ svwrite_ver_za32_s32_m (1, w0, p0, z0),
+ svwrite_ver_za32_m (1, w0, p0, z0))
+
+/*
+** write_za32_s32_1_3_z0:
+** mov (w1[2-5]), w0
+** mova za1v\.s\[\1, 3\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_1_3_z0, svint32_t,
+ svwrite_ver_za32_s32_m (1, w0 + 3, p0, z0),
+ svwrite_ver_za32_m (1, w0 + 3, p0, z0))
+
+/*
+** write_za32_s32_3_0_z0:
+** mov (w1[2-5]), w0
+** mova za3v\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_3_0_z0, svint32_t,
+ svwrite_ver_za32_s32_m (3, w0, p0, z0),
+ svwrite_ver_za32_m (3, w0, p0, z0))
+
+/*
+** write_za32_s32_3_3_z0:
+** mov (w1[2-5]), w0
+** mova za3v\.s\[\1, 3\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_3_3_z0, svint32_t,
+ svwrite_ver_za32_s32_m (3, w0 + 3, p0, z0),
+ svwrite_ver_za32_m (3, w0 + 3, p0, z0))
+
+/*
+** write_za32_s32_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0v\.s\[\1, 0\], p0/m, z1\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_s32_0_0_z1, svint32_t,
+ svwrite_ver_za32_s32_m (0, w0, p0, z1),
+ svwrite_ver_za32_m (0, w0, p0, z1))
+
+/*
+** write_za32_u32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_u32_0_0_z0, svuint32_t,
+ svwrite_ver_za32_u32_m (0, w0, p0, z0),
+ svwrite_ver_za32_m (0, w0, p0, z0))
+
+/*
+** write_za32_f32_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.s\[\1, 0\], p0/m, z0\.s
+** ret
+*/
+TEST_WRITE_ZA (write_za32_f32_0_0_z0, svfloat32_t,
+ svwrite_ver_za32_f32_m (0, w0, p0, z0),
+ svwrite_ver_za32_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,113 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za64_s64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t,
+ svwrite_ver_za64_s64_m (0, w0, p0, z0),
+ svwrite_ver_za64_m (0, w0, p0, z0))
+
+/*
+** write_za64_s64_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.d\[\1, 1\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t,
+ svwrite_ver_za64_s64_m (0, w0 + 1, p0, z0),
+ svwrite_ver_za64_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za64_s64_0_2_z0:
+** add (w1[2-5]), w0, #?2
+** mova za0v\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_2_z0, svint64_t,
+ svwrite_ver_za64_s64_m (0, w0 + 2, p0, z0),
+ svwrite_ver_za64_m (0, w0 + 2, p0, z0))
+
+/*
+** write_za64_s64_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0v\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_m1_z0, svint64_t,
+ svwrite_ver_za64_s64_m (0, w0 - 1, p0, z0),
+ svwrite_ver_za64_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za64_s64_1_0_z0:
+** mov (w1[2-5]), w0
+** mova za1v\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_1_0_z0, svint64_t,
+ svwrite_ver_za64_s64_m (1, w0, p0, z0),
+ svwrite_ver_za64_m (1, w0, p0, z0))
+
+/*
+** write_za64_s64_1_1_z0:
+** mov (w1[2-5]), w0
+** mova za1v\.d\[\1, 1\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_1_1_z0, svint64_t,
+ svwrite_ver_za64_s64_m (1, w0 + 1, p0, z0),
+ svwrite_ver_za64_m (1, w0 + 1, p0, z0))
+
+/*
+** write_za64_s64_7_0_z0:
+** mov (w1[2-5]), w0
+** mova za7v\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_7_0_z0, svint64_t,
+ svwrite_ver_za64_s64_m (7, w0, p0, z0),
+ svwrite_ver_za64_m (7, w0, p0, z0))
+
+/*
+** write_za64_s64_7_1_z0:
+** mov (w1[2-5]), w0
+** mova za7v\.d\[\1, 1\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_7_1_z0, svint64_t,
+ svwrite_ver_za64_s64_m (7, w0 + 1, p0, z0),
+ svwrite_ver_za64_m (7, w0 + 1, p0, z0))
+
+/*
+** write_za64_s64_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0v\.d\[\1, 0\], p0/m, z1\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_s64_0_0_z1, svint64_t,
+ svwrite_ver_za64_s64_m (0, w0, p0, z1),
+ svwrite_ver_za64_m (0, w0, p0, z1))
+
+/*
+** write_za64_u64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_u64_0_0_z0, svuint64_t,
+ svwrite_ver_za64_u64_m (0, w0, p0, z0),
+ svwrite_ver_za64_m (0, w0, p0, z0))
+
+/*
+** write_za64_f64_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.d\[\1, 0\], p0/m, z0\.d
+** ret
+*/
+TEST_WRITE_ZA (write_za64_f64_0_0_z0, svfloat64_t,
+ svwrite_ver_za64_f64_m (0, w0, p0, z0),
+ svwrite_ver_za64_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,73 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme_acle.h"
+
+/*
+** write_za8_s8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t,
+ svwrite_ver_za8_s8_m (0, w0, p0, z0),
+ svwrite_ver_za8_m (0, w0, p0, z0))
+
+/*
+** write_za8_s8_0_1_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.b\[\1, 1\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t,
+ svwrite_ver_za8_s8_m (0, w0 + 1, p0, z0),
+ svwrite_ver_za8_m (0, w0 + 1, p0, z0))
+
+/*
+** write_za8_s8_0_15_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.b\[\1, 15\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_15_z0, svint8_t,
+ svwrite_ver_za8_s8_m (0, w0 + 15, p0, z0),
+ svwrite_ver_za8_m (0, w0 + 15, p0, z0))
+
+/*
+** write_za8_s8_0_16_z0:
+** add (w1[2-5]), w0, #?16
+** mova za0v\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_16_z0, svint8_t,
+ svwrite_ver_za8_s8_m (0, w0 + 16, p0, z0),
+ svwrite_ver_za8_m (0, w0 + 16, p0, z0))
+
+/*
+** write_za8_s8_0_m1_z0:
+** sub (w1[2-5]), w0, #?1
+** mova za0v\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_m1_z0, svint8_t,
+ svwrite_ver_za8_s8_m (0, w0 - 1, p0, z0),
+ svwrite_ver_za8_m (0, w0 - 1, p0, z0))
+
+/*
+** write_za8_s8_0_0_z1:
+** mov (w1[2-5]), w0
+** mova za0v\.b\[\1, 0\], p0/m, z1\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_s8_0_0_z1, svint8_t,
+ svwrite_ver_za8_s8_m (0, w0, p0, z1),
+ svwrite_ver_za8_m (0, w0, p0, z1))
+
+/*
+** write_za8_u8_0_0_z0:
+** mov (w1[2-5]), w0
+** mova za0v\.b\[\1, 0\], p0/m, z0\.b
+** ret
+*/
+TEST_WRITE_ZA (write_za8_u8_0_0_z0, svuint8_t,
+ svwrite_ver_za8_u8_m (0, w0, p0, z0),
+ svwrite_ver_za8_m (0, w0, p0, z0))
new file mode 100644
@@ -0,0 +1,130 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#include "test_sme_acle.h"
+
+/*
+** zero_mask_za_0:
+** zero { *}
+** ret
+*/
+PROTO (zero_mask_za_0, void, ()) { svzero_mask_za (0); }
+
+/*
+** zero_mask_za_01:
+** zero { za0\.d }
+** ret
+*/
+PROTO (zero_mask_za_01, void, ()) { svzero_mask_za (0x01); }
+
+/*
+** zero_mask_za_80:
+** zero { za7\.d }
+** ret
+*/
+PROTO (zero_mask_za_80, void, ()) { svzero_mask_za (0x80); }
+
+/*
+** zero_mask_za_03:
+** zero { za0\.d, za1\.d }
+** ret
+*/
+PROTO (zero_mask_za_03, void, ()) { svzero_mask_za (0x03); }
+
+/*
+** zero_mask_za_09:
+** zero { za0\.d, za3\.d }
+** ret
+*/
+PROTO (zero_mask_za_09, void, ()) { svzero_mask_za (0x09); }
+
+/*
+** zero_mask_za_0d:
+** zero { za0\.d, za2\.d, za3\.d }
+** ret
+*/
+PROTO (zero_mask_za_0d, void, ()) { svzero_mask_za (0x0d); }
+
+/*
+** zero_mask_za_3c:
+** zero { za2\.d, za3\.d, za4\.d, za5\.d }
+** ret
+*/
+PROTO (zero_mask_za_3c, void, ()) { svzero_mask_za (0x3c); }
+
+/*
+** zero_mask_za_5a:
+** zero { za1\.d, za3\.d, za4\.d, za6\.d }
+** ret
+*/
+PROTO (zero_mask_za_5a, void, ()) { svzero_mask_za (0x5a); }
+
+/*
+** zero_mask_za_11:
+** zero { za0\.s }
+** ret
+*/
+PROTO (zero_mask_za_11, void, ()) { svzero_mask_za (0x11); }
+
+/*
+** zero_mask_za_88:
+** zero { za3\.s }
+** ret
+*/
+PROTO (zero_mask_za_88, void, ()) { svzero_mask_za (0x88); }
+
+/*
+** zero_mask_za_33:
+** zero { za0\.s, za1\.s }
+** ret
+*/
+PROTO (zero_mask_za_33, void, ()) { svzero_mask_za (0x33); }
+
+/*
+** zero_mask_za_cc:
+** zero { za2\.s, za3\.s }
+** ret
+*/
+PROTO (zero_mask_za_cc, void, ()) { svzero_mask_za (0xcc); }
+
+/*
+** zero_mask_za_55:
+** zero { za0\.h }
+** ret
+*/
+PROTO (zero_mask_za_55, void, ()) { svzero_mask_za (0x55); }
+
+/*
+** zero_mask_za_aa:
+** zero { za1\.h }
+** ret
+*/
+PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za (0xaa); }
+
+/*
+** zero_mask_za_ab:
+** zero { za1\.h, za0\.d }
+** ret
+*/
+PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); }
+
+/*
+** zero_mask_za_d7:
+** zero { za0\.h, za1\.d, za7\.d }
+** ret
+*/
+PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); }
+
+/*
+** zero_mask_za_bf:
+** zero { za1\.h, za0\.s, za2\.d }
+** ret
+*/
+PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); }
+
+/*
+** zero_mask_za_ff:
+** zero { za }
+** ret
+*/
+PROTO (zero_mask_za_ff, void, ()) { svzero_mask_za (0xff); }
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#define STREAMING_COMPATIBLE
+#include "test_sme_acle.h"
+
+/*
+** zero_za:
+** zero { za }
+** ret
+*/
+PROTO (zero_za, void, ()) { svzero_za (); }
@@ -11,12 +11,22 @@
#error "Please define -DTEST_OVERLOADS or -DTEST_FULL"
#endif
-#ifdef STREAMING_COMPATIBLE
-#define ATTR __attribute__ ((arm_streaming_compatible))
+#if defined(STREAMING_COMPATIBLE)
+#define SM_ATTR __attribute__ ((arm_streaming_compatible))
+#elif defined(STREAMING)
+#define SM_ATTR __attribute__ ((arm_streaming))
#else
-#define ATTR
+#define SM_ATTR
#endif
+#ifdef SHARED_ZA
+#define ZA_ATTR __attribute__ ((arm_shared_za))
+#else
+#define ZA_ATTR
+#endif
+
+#define ATTR SM_ATTR ZA_ATTR
+
#ifdef __cplusplus
#define PROTO(NAME, RET, ARGS) \
extern "C" RET ATTR NAME ARGS; RET ATTR NAME ARGS
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv8.2-a+sme")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
+ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile)
+{
+ svusmopa_za32_m (0, pg, pg, u8); /* { dg-error {too few arguments to function 'svusmopa_za32_m'} } */
+ svusmopa_za32_m (0, pg, pg, u8, s8, 0); /* { dg-error {too many arguments to function 'svusmopa_za32_m'} } */
+ svusmopa_za32_m (tile, pg, pg, u8, s8); /* { dg-error {argument 1 of 'svusmopa_za32_m' must be an integer constant expression} } */
+ svusmopa_za32_m (-1, pg, pg, u8, s8); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \[0, 3\]} } */
+ svusmopa_za32_m (4, pg, pg, u8, s8); /* { dg-error {passing 4 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \[0, 3\]} } */
+ svusmopa_za32_m (0, u8, pg, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svusmopa_za32_m', which expects 'svbool_t'} } */
+ svusmopa_za32_m (0, pg, u8, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmopa_za32_m', which expects 'svbool_t'} } */
+ svusmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svusmopa_za32_m', which expects an SVE vector type} } */
+ svusmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svint8_t' arguments} } */
+ svusmopa_za32_m (0, pg, pg, pg, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svbool_t' arguments} } */
+ svusmopa_za32_m (0, pg, pg, f16, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */
+ svusmopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 5 of 'svusmopa_za32_m', which expects a vector of signed integers} } */
+ svusmopa_za32_m (0, pg, pg, u8, s16); /* { dg-error {arguments 4 and 5 of 'svusmopa_za32_m' must have the same element size, but the values passed here have type 'svuint8_t' and 'svint16_t' respectively} } */
+ svusmopa_za32_m (0, pg, pg, u16, s16); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svuint16_t' arguments} } */
+
+ svusmopa_za64_m (0, pg, pg, u16, s16); /* { dg-error {ACLE function 'svusmopa_za64_u16_m' requires ISA extension 'sme-i16i64'} } */
+}
+
+void __attribute__((arm_streaming))
+f2 (svbool_t pg, svint8_t s8, svuint8_t u8)
+{
+ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called from a function that has ZA state} } */
+}
+
+void __attribute__((arm_shared_za))
+f3 (svbool_t pg, svint8_t s8, svuint8_t u8)
+{
+ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("arch=armv8.2-a+sme-i16i64")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f4 (svbool_t pg, svint16_t s16, svuint16_t u16)
+{
+ svusmopa_za64_m (-1, pg, pg, u16, s16); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \[0, 7\]} } */
+ svusmopa_za64_m (8, pg, pg, u16, s16); /* { dg-error {passing 8 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \[0, 7\]} } */
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv8.2-a+sme")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svint32_t s32,
+ svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, uint32_t tile)
+{
+ svmopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svmopa_za32_m'} } */
+ svmopa_za32_m (0, pg, pg, s8, s8, 0); /* { dg-error {too many arguments to function 'svmopa_za32_m'} } */
+ svmopa_za32_m (tile, pg, pg, s8, s8); /* { dg-error {argument 1 of 'svmopa_za32_m' must be an integer constant expression} } */
+ svmopa_za32_m (-1, pg, pg, s8, s8); /* { dg-error {passing -1 to argument 1 of 'svmopa_za32_m', which expects a value in the range \[0, 3\]} } */
+ svmopa_za32_m (4, pg, pg, s8, s8); /* { dg-error {passing 4 to argument 1 of 'svmopa_za32_m', which expects a value in the range \[0, 3\]} } */
+ svmopa_za32_m (0, u8, pg, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svmopa_za32_m', which expects 'svbool_t'} } */
+ svmopa_za32_m (0, pg, u8, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svmopa_za32_m', which expects 'svbool_t'} } */
+ svmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svmopa_za32_m', which expects an SVE vector type} } */
+ svmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {passing 'svint8_t'.* to argument 5 of 'svmopa_za32_m', but previous arguments had type 'svuint8_t'} } */
+ svmopa_za32_m (0, pg, pg, s8, f16); /* { dg-error {passing 'svfloat16_t'.* to argument 5 of 'svmopa_za32_m', but previous arguments had type 'svint8_t'} } */
+ svmopa_za32_m (0, pg, pg, pg, pg); /* { dg-error {'svmopa_za32_m' has no form that takes 'svbool_t' arguments} } */
+ svmopa_za32_m (0, pg, pg, s16, s16); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint16_t' arguments} } */
+ svmopa_za32_m (0, pg, pg, s32, s32); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint32_t' arguments} } */
+ svmopa_za32_m (0, pg, pg, f64, f64); /* { dg-error {'svmopa_za32_m' has no form that takes 'svfloat64_t' arguments} } */
+
+ svmopa_za64_m (0, pg, pg, s16, s16); /* { dg-error {ACLE function 'svmopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */
+}
+
+void __attribute__((arm_streaming))
+f2 (svbool_t pg, svint8_t s8)
+{
+ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called from a function that has ZA state} } */
+}
+
+void __attribute__((arm_shared_za))
+f3 (svbool_t pg, svint8_t s8)
+{
+ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("arch=armv8.2-a+sme-i16i64")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f4 (svbool_t pg, svint16_t s16)
+{
+ svmopa_za64_m (-1, pg, pg, s16, s16); /* { dg-error {passing -1 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */
+ svmopa_za64_m (8, pg, pg, s16, s16); /* { dg-error {passing 8 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv8.2-a+sme")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f1 (svbool_t pg, svfloat64_t f64)
+{
+ svmopa_za64_m (0, pg, pg, f64, f64); /* { dg-error {ACLE function 'svmopa_za64_f64_m' requires ISA extension 'sme-f64f64'} } */
+}
new file mode 100644
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv8.2-a+sme")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
+ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile)
+{
+ svsumopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svsumopa_za32_m'} } */
+ svsumopa_za32_m (0, pg, pg, s8, u8, 0); /* { dg-error {too many arguments to function 'svsumopa_za32_m'} } */
+ svsumopa_za32_m (tile, pg, pg, s8, u8); /* { dg-error {argument 1 of 'svsumopa_za32_m' must be an integer constant expression} } */
+ svsumopa_za32_m (-1, pg, pg, s8, u8); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \[0, 3\]} } */
+ svsumopa_za32_m (4, pg, pg, s8, u8); /* { dg-error {passing 4 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \[0, 3\]} } */
+ svsumopa_za32_m (0, u8, pg, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsumopa_za32_m', which expects 'svbool_t'} } */
+ svsumopa_za32_m (0, pg, u8, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svsumopa_za32_m', which expects 'svbool_t'} } */
+ svsumopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svsumopa_za32_m', which expects an SVE vector type} } */
+ svsumopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svuint8_t' arguments} } */
+ svsumopa_za32_m (0, pg, pg, pg, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svbool_t' arguments} } */
+ svsumopa_za32_m (0, pg, pg, f16, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */
+ svsumopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 5 of 'svsumopa_za32_m', which expects a vector of unsigned integers} } */
+ svsumopa_za32_m (0, pg, pg, s8, u16); /* { dg-error {arguments 4 and 5 of 'svsumopa_za32_m' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */
+ svsumopa_za32_m (0, pg, pg, s16, u16); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svint16_t' arguments} } */
+
+ svsumopa_za64_m (0, pg, pg, s16, u16); /* { dg-error {ACLE function 'svsumopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */
+}
+
+void __attribute__((arm_streaming))
+f2 (svbool_t pg, svint8_t s8, svuint8_t u8)
+{
+ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called from a function that has ZA state} } */
+}
+
+void __attribute__((arm_shared_za))
+f3 (svbool_t pg, svint8_t s8, svuint8_t u8)
+{
+ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("arch=armv8.2-a+sme-i16i64")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f4 (svbool_t pg, svint16_t s16, svuint16_t u16)
+{
+ svsumopa_za64_m (-1, pg, pg, s16, u16); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \[0, 7\]} } */
+ svsumopa_za64_m (8, pg, pg, s16, u16); /* { dg-error {passing 8 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \[0, 7\]} } */
+}
@@ -4,6 +4,6 @@
to be diagnosed. Any attempt to call the function before including
arm_sve.h will lead to a link failure. (Same for taking its address,
etc.) */
-extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t);
+extern __attribute__((arm_preserves_za)) __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t);
#pragma GCC aarch64 "arm_sve.h"
@@ -6,7 +6,7 @@
At the moment this works like other built-ins in the sense that the
explicit definition "wins". This isn't supported behavior though. */
-__SVUint8_t
+__SVUint8_t __attribute__((arm_preserves_za))
svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y)
{
return x;
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv8.2-a+sme")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16,
+ svfloat32_t f32, uint32_t tile)
+{
+ svread_hor_za8_m (s8, pg, 0); /* { dg-error {too few arguments to function 'svread_hor_za8_m'} } */
+ svread_hor_za8_m (s8, pg, 0, 0, 0); /* { dg-error {too many arguments to function 'svread_hor_za8_m'} } */
+ svread_hor_za8_m (tile, pg, 0, 0); /* { dg-error {passing 'uint32_t'.* to argument 1 of 'svread_hor_za8_m', which expects an SVE vector type} } */
+ svread_hor_za8_m (pg, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svbool_t' arguments} } */
+ svread_hor_za8_m (u16, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svuint16_t' arguments} } */
+ svread_hor_za8_m (s8, s8, 0, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svread_hor_za8_m', which expects 'svbool_t'} } */
+ svread_hor_za8_m (s8, pg, tile, 0); /* { dg-error {argument 3 of 'svread_hor_za8_m' must be an integer constant expression} } */
+ svread_hor_za8_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */
+ svread_hor_za8_m (s8, pg, 1, 0); /* { dg-error {passing 1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */
+ svread_hor_za8_m (s8, pg, 0, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svread_hor_za8_m', which expects 'uint32_t'} } */
+
+ svread_hor_za16_m (u16, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \[0, 1\]} } */
+ svread_hor_za16_m (u16, pg, 2, 0); /* { dg-error {passing 2 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \[0, 1\]} } */
+
+ svread_hor_za32_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \[0, 3\]} } */
+ svread_hor_za32_m (f32, pg, 4, 0); /* { dg-error {passing 4 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \[0, 3\]} } */
+
+ svread_hor_za64_m (s64, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \[0, 7\]} } */
+ svread_hor_za64_m (s64, pg, 8, 0); /* { dg-error {passing 8 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \[0, 7\]} } */
+
+ svread_hor_za128_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */
+ svread_hor_za128_m (s8, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */
+ svread_hor_za128_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */
+ svread_hor_za128_m (f32, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */
+}
+
+void __attribute__((arm_streaming))
+f2 (svbool_t pg, svint8_t s8)
+{
+ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called from a function that has ZA state} } */
+}
+
+void __attribute__((arm_shared_za))
+f3 (svbool_t pg, svint8_t s8)
+{
+ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called when SME streaming mode is enabled} } */
+}
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv8.2-a+sme")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f1 (svbool_t pg, svuint8_t u8, svint16_t s16, svint32_t s32, svint64_t s64,
+ svfloat32_t f32, uint32_t tile)
+{
+ svaddha_za32_m (0, pg, pg); /* { dg-error {too few arguments to function 'svaddha_za32_m'} } */
+ svaddha_za32_m (0, pg, pg, s32, s32); /* { dg-error {too many arguments to function 'svaddha_za32_m'} } */
+ svaddha_za32_m (tile, pg, pg, s32); /* { dg-error {argument 1 of 'svaddha_za32_m' must be an integer constant expression} } */
+ svaddha_za32_m (-1, pg, pg, s32); /* { dg-error {passing -1 to argument 1 of 'svaddha_za32_m', which expects a value in the range \[0, 3\]} } */
+ svaddha_za32_m (4, pg, pg, s32); /* { dg-error {passing 4 to argument 1 of 'svaddha_za32_m', which expects a value in the range \[0, 3\]} } */
+ svaddha_za32_m (0, u8, pg, s32); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svaddha_za32_m', which expects 'svbool_t'} } */
+ svaddha_za32_m (0, pg, u8, s32); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svaddha_za32_m', which expects 'svbool_t'} } */
+ svaddha_za32_m (0, pg, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svaddha_za32_m', which expects an SVE vector type} } */
+ svaddha_za32_m (0, pg, pg, pg); /* { dg-error {'svaddha_za32_m' has no form that takes 'svbool_t' arguments} } */
+ svaddha_za32_m (0, pg, pg, u8); /* { dg-error {'svaddha_za32_m' has no form that takes 'svuint8_t' arguments} } */
+ svaddha_za32_m (0, pg, pg, s16); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint16_t' arguments} } */
+ svaddha_za32_m (0, pg, pg, f32); /* { dg-error {'svaddha_za32_m' has no form that takes 'svfloat32_t' arguments} } */
+ svaddha_za32_m (0, pg, pg, s64); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint64_t' arguments} } */
+
+ svaddha_za64_m (0, pg, pg, s64); /* { dg-error {ACLE function 'svaddha_za64_s64_m' requires ISA extension 'sme-i16i64'} } */
+}
+
+void __attribute__((arm_streaming))
+f2 (svbool_t pg, svint32_t s32)
+{
+ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called from a function that has ZA state} } */
+}
+
+void __attribute__((arm_shared_za))
+f3 (svbool_t pg, svint32_t s32)
+{
+ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called when SME streaming mode is enabled} } */
+}
+
+#pragma GCC target ("arch=armv8.2-a+sme-i16i64")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f4 (svbool_t pg, svint64_t s64)
+{
+ svaddha_za64_m (-1, pg, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svaddha_za64_m', which expects a value in the range \[0, 7\]} } */
+ svaddha_za64_m (8, pg, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svaddha_za64_m', which expects a value in the range \[0, 7\]} } */
+}
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv8.2-a+sme")
+
+void __attribute__((arm_streaming, arm_shared_za))
+f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16,
+ svfloat32_t f32, uint32_t tile)
+{
+ svwrite_ver_za8_m (0, 0, pg); /* { dg-error {too few arguments to function 'svwrite_ver_za8_m'} } */
+ svwrite_ver_za8_m (0, 0, pg, s8, 0); /* { dg-error {too many arguments to function 'svwrite_ver_za8_m'} } */
+ svwrite_ver_za8_m (tile, 0, pg, s8); /* { dg-error {argument 1 of 'svwrite_ver_za8_m' must be an integer constant expression} } */
+ svwrite_ver_za8_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */
+ svwrite_ver_za8_m (1, 0, pg, s8); /* { dg-error {passing 1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */
+ svwrite_ver_za8_m (0, u8, pg, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svwrite_ver_za8_m', which expects 'uint32_t'} } */
+ svwrite_ver_za8_m (0, 0, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svwrite_ver_za8_m', which expects 'svbool_t'} } */
+ svwrite_ver_za8_m (0, 0, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svwrite_ver_za8_m', which expects an SVE vector type} } */
+ svwrite_ver_za8_m (0, 0, pg, pg); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svbool_t' arguments} } */
+ svwrite_ver_za8_m (0, 0, pg, u16); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svuint16_t' arguments} } */
+
+ svwrite_ver_za16_m (-1, 0, pg, u16); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \[0, 1\]} } */
+ svwrite_ver_za16_m (2, 0, pg, u16); /* { dg-error {passing 2 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \[0, 1\]} } */
+
+ svwrite_ver_za32_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \[0, 3\]} } */
+ svwrite_ver_za32_m (4, 0, pg, f32); /* { dg-error {passing 4 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \[0, 3\]} } */
+
+ svwrite_ver_za64_m (-1, 0, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \[0, 7\]} } */
+ svwrite_ver_za64_m (8, 0, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \[0, 7\]} } */
+
+ svwrite_ver_za128_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */
+ svwrite_ver_za128_m (16, 0, pg, s8); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */
+ svwrite_ver_za128_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */
+ svwrite_ver_za128_m (16, 0, pg, f32); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */
+}
+
+void __attribute__((arm_streaming))
+f2 (svbool_t pg, svint8_t s8)
+{
+ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called from a function that has ZA state} } */
+}
+
+void __attribute__((arm_shared_za))
+f3 (svbool_t pg, svint8_t s8)
+{
+ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called when SME streaming mode is enabled} } */
+}
@@ -10781,7 +10781,8 @@ proc check_effective_target_aarch64_tiny { } {
# various architecture extensions via the .arch_extension pseudo-op.
foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
- "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" } {
+ "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "sme"
+ "sme-i16i64" } {
eval [string map [list FUNC $aarch64_ext] {
proc check_effective_target_aarch64_asm_FUNC_ok { } {
if { [istarget aarch64*-*-*] } {