@@ -32,4 +32,9 @@
DEF_AARCH64_ISA_MODE(SM_ON)
DEF_AARCH64_ISA_MODE(SM_OFF)
+/* Indicates that PSTATE.ZA is known to be 1. The converse is that
+ PSTATE.ZA might be 0 or 1, depending on whether there is an uncommitted
+ lazy save. */
+DEF_AARCH64_ISA_MODE(ZA_ON)
+
#undef DEF_AARCH64_ISA_MODE
@@ -807,6 +807,8 @@ bool aarch64_sve_addvl_addpl_immediate_p (rtx);
bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
int aarch64_add_offset_temporaries (rtx);
void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
+bool aarch64_rdsvl_immediate_p (const_rtx);
+char *aarch64_output_rdsvl (const_rtx);
bool aarch64_mov_operand_p (rtx, machine_mode);
rtx aarch64_reverse_mask (machine_mode, unsigned int);
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
@@ -1077,4 +1079,6 @@ const char *aarch64_indirect_call_asm (rtx);
extern bool aarch64_harden_sls_retbr_p (void);
extern bool aarch64_harden_sls_blr_p (void);
+void aarch64_restore_za ();
+
#endif /* GCC_AARCH64_PROTOS_H */
@@ -23,6 +23,7 @@
;; == State management
;; ---- Test current state
;; ---- PSTATE.SM management
+;; ---- PSTATE.ZA management
;; =========================================================================
;; == State management
@@ -131,3 +132,140 @@ (define_insn "aarch64_smstop_sm"
""
"smstop\tsm"
)
+
+;; -------------------------------------------------------------------------
+;; ---- PSTATE.ZA management
+;; -------------------------------------------------------------------------
+;; Includes
+;; - SMSTART ZA
+;; - SMSTOP ZA
+;; plus calls to support routines.
+;; -------------------------------------------------------------------------
+
+(define_c_enum "unspec" [
+ UNSPEC_SMSTART_ZA
+ UNSPEC_SMSTOP_ZA
+ UNSPEC_TPIDR2_SAVE
+ UNSPEC_TPIDR2_RESTORE
+ UNSPEC_READ_TPIDR2
+ UNSPEC_CLEAR_TPIDR2
+])
+
+;; Enable ZA, starting with fresh ZA contents. This is only valid when
+;; SME is present, but the pattern does not depend on TARGET_SME since
+;; it can be used conditionally.
+(define_insn "aarch64_smstart_za"
+ [(unspec_volatile [(const_int 0)] UNSPEC_SMSTART_ZA)
+ (clobber (reg:VNx16QI ZA_REGNUM))]
+ ""
+ "smstart\tza"
+)
+
+;; Disable ZA and discard its current contents. This is only valid when
+;; SME is present, but the pattern does not depend on TARGET_SME since
+;; it can be used conditionally.
+;;
+;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA
+;; is zero. This instruction is therefore sequenced wrt writes to
+;; OLD_ZA_REGNUM.
+(define_insn "aarch64_smstop_za"
+ [(unspec_volatile [(reg:VNx16QI OLD_ZA_REGNUM)] UNSPEC_SMSTOP_ZA)
+ (clobber (reg:VNx16QI ZA_REGNUM))]
+ ""
+ "smstop\tza"
+)
+
+;; Use the ABI-defined routine to commit any uncommitted lazy save.
+(define_insn "aarch64_tpidr2_save"
+ [(unspec_volatile:DI [(reg:VNx16QI OLD_ZA_REGNUM)
+ (reg:VNx16QI ZA_REGNUM)] UNSPEC_TPIDR2_SAVE)
+ (clobber (reg:DI R14_REGNUM))
+ (clobber (reg:DI R15_REGNUM))
+ (clobber (reg:DI R16_REGNUM))
+ (clobber (reg:DI R17_REGNUM))
+ (clobber (reg:DI R18_REGNUM))
+ (clobber (reg:DI R30_REGNUM))
+ (clobber (reg:CC CC_REGNUM))]
+ ""
+ "bl\t__arm_tpidr2_save"
+)
+
+;; Use the ABI-defined routine to restore lazy-saved ZA contents
+;; from the TPIDR2 block pointed to by X0.
+(define_insn "aarch64_tpidr2_restore"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (unspec:VNx16QI [(reg:VNx16QI OLD_ZA_REGNUM)
+ (reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE))
+ (clobber (reg:DI R14_REGNUM))
+ (clobber (reg:DI R15_REGNUM))
+ (clobber (reg:DI R16_REGNUM))
+ (clobber (reg:DI R17_REGNUM))
+ (clobber (reg:DI R18_REGNUM))
+ (clobber (reg:DI R30_REGNUM))
+ (clobber (reg:CC CC_REGNUM))]
+ ""
+ "bl\t__arm_tpidr2_restore"
+)
+
+;; Check whether a lazy save of ZA is active. This is only valid when
+;; SME is present, but the pattern does not depend on TARGET_SME since
+;; it can be used conditionally.
+(define_insn "aarch64_read_tpidr2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(reg:VNx16QI OLD_ZA_REGNUM)] UNSPEC_READ_TPIDR2))]
+ ""
+ "mrs\t%0, tpidr2_el0"
+)
+
+;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save. This is only
+;; valid when SME is present, but the pattern does not depend on TARGET_SME
+;; since it can be used conditionally.
+(define_insn "aarch64_clear_tpidr2"
+ [(set (reg:VNx16QI OLD_ZA_REGNUM)
+ (unspec:VNx16QI [(const_int 0)] UNSPEC_CLEAR_TPIDR2))]
+ ""
+ "msr\ttpidr2_el0, xzr"
+)
+
+;; Set up a lazy save of ZA. Operand 0 points to the TPIDR2 block and
+;; operand 1 is the contents of that block. Operand 1 exists only to
+;; provide dependency information: the TPIDR2 block must be valid
+;; before TPIDR2_EL0 is updated.
+(define_insn "aarch64_save_za"
+ [(set (reg:VNx16QI OLD_ZA_REGNUM)
+ (reg:VNx16QI ZA_REGNUM))
+ (use (match_operand 0 "pmode_register_operand" "r"))
+ (use (match_operand:V16QI 1 "memory_operand" "m"))]
+ ""
+ "msr\ttpidr2_el0, %0"
+)
+
+;; Check whether a lazy save set up by aarch64_save_za was committed
+;; and restore the saved contents if so.
+(define_insn_and_split "aarch64_restore_za"
+ [(set (reg:VNx16QI ZA_REGNUM)
+ (reg:VNx16QI OLD_ZA_REGNUM))
+ (clobber (reg:DI R14_REGNUM))
+ (clobber (reg:DI R15_REGNUM))
+ (clobber (reg:DI R16_REGNUM))
+ (clobber (reg:DI R17_REGNUM))
+ (clobber (reg:DI R18_REGNUM))
+ (clobber (reg:DI R30_REGNUM))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (reg:VNx16QI OLD_ZA_REGNUM))]
+ ""
+ "#"
+ "&& epilogue_completed"
+ [(const_int 0)]
+ {
+ auto label = gen_label_rtx ();
+ auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
+ emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
+ auto jump = emit_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label));
+ JUMP_LABEL (jump) = label;
+ aarch64_restore_za ();
+ emit_label (label);
+ emit_insn (gen_aarch64_clear_tpidr2 ());
+ DONE;
+ }
+)
@@ -2733,6 +2733,22 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree,
gcc_unreachable ();
}
+/* Check whether an 'arm_new_za' attribute is valid. */
+
+static tree
+handle_arm_new_za_attribute (tree *node, tree name, tree,
+ int, bool *no_add_attrs)
+{
+ tree decl = *node;
+ if (TREE_CODE (decl) != FUNCTION_DECL)
+ {
+ error_at (DECL_SOURCE_LOCATION (decl),
+ "%qE attribute applies only to functions", name);
+ *no_add_attrs = true;
+ }
+ return NULL_TREE;
+}
+
/* Mutually-exclusive function type attributes for controlling PSTATE.SM. */
static const struct attribute_spec::exclusions attr_streaming_exclusions[] =
{
@@ -2743,6 +2759,26 @@ static const struct attribute_spec::exclusions attr_streaming_exclusions[] =
{ NULL, false, false, false }
};
+/* Function type attributes that are mutually-exclusive with arm_new_za. */
+static const struct attribute_spec::exclusions attr_arm_new_za_exclusions[] =
+{
+ /* Attribute name exclusion applies to:
+ function, type, variable */
+ { "arm_preserves_za", true, false, false },
+ { "arm_shared_za", true, false, false },
+ { NULL, false, false, false }
+};
+
+/* Used by function type attributes that are mutually-exclusive with
+ arm_new_za. */
+static const struct attribute_spec::exclusions attr_no_arm_new_za[] =
+{
+ /* Attribute name exclusion applies to:
+ function, type, variable */
+ { "arm_new_za", true, false, false },
+ { NULL, false, false, false }
+};
+
/* Table of machine attributes. */
static const struct attribute_spec aarch64_attribute_table[] =
{
@@ -2754,6 +2790,13 @@ static const struct attribute_spec aarch64_attribute_table[] =
NULL, attr_streaming_exclusions },
{ "arm_streaming_compatible", 0, 0, false, true, true, true,
NULL, attr_streaming_exclusions },
+ { "arm_new_za", 0, 0, true, false, false, false,
+ handle_arm_new_za_attribute,
+ attr_arm_new_za_exclusions },
+ { "arm_shared_za", 0, 0, false, true, true, true,
+ NULL, attr_no_arm_new_za },
+ { "arm_preserves_za", 0, 0, false, true, true, true,
+ NULL, attr_no_arm_new_za },
{ "arm_sve_vector_bits", 1, 1, false, true, false, true,
aarch64_sve::handle_arm_sve_vector_bits_attribute,
NULL },
@@ -3929,6 +3972,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
case PR_HI_REGS:
case FFR_REGS:
case PR_AND_FFR_REGS:
+ case ZA_REGS:
return 1;
default:
return CEIL (lowest_size, UNITS_PER_WORD);
@@ -3959,6 +4003,9 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
if (pr_or_ffr_regnum_p (regno))
return false;
+ if (regno == ZA_REGNUM || regno == OLD_ZA_REGNUM)
+ return true;
+
if (regno == SP_REGNUM)
/* The purpose of comparing with ptr_mode is to support the
global register variable associated with the stack pointer
@@ -4078,12 +4125,41 @@ aarch64_fntype_sm_state (const_tree fntype)
return AARCH64_FL_SM_OFF;
}
+/* Return the state of PSTATE.ZA on entry to functions of type FNTYPE. */
+
+static aarch64_feature_flags
+aarch64_fntype_za_state (const_tree fntype)
+{
+ if (lookup_attribute ("arm_shared_za", TYPE_ATTRIBUTES (fntype)))
+ return AARCH64_FL_ZA_ON;
+
+ return 0;
+}
+
/* Return the ISA mode on entry to functions of type FNTYPE. */
static aarch64_feature_flags
aarch64_fntype_isa_mode (const_tree fntype)
{
- return aarch64_fntype_sm_state (fntype);
+ return (aarch64_fntype_sm_state (fntype)
+ | aarch64_fntype_za_state (fntype));
+}
+
+/* Return true if functions of type FNTYPE preserve the contents of ZA. */
+
+static bool
+aarch64_fntype_preserves_za (const_tree fntype)
+{
+ return lookup_attribute ("arm_preserves_za", TYPE_ATTRIBUTES (fntype));
+}
+
+/* Return true if FNDECL creates new ZA state (as opposed to sharing
+ ZA with its callers or ignoring ZA altogether). */
+
+static bool
+aarch64_fndecl_has_new_za_state (const_tree fndecl)
+{
+ return lookup_attribute ("arm_new_za", DECL_ATTRIBUTES (fndecl));
}
/* Return the state of PSTATE.SM when compiling the body of
@@ -4096,13 +4172,34 @@ aarch64_fndecl_sm_state (const_tree fndecl)
return aarch64_fntype_sm_state (TREE_TYPE (fndecl));
}
+/* Return the state of PSTATE.ZA when compiling the body of function FNDECL.
+ This might be different from the state of PSTATE.ZA on entry. */
+
+static aarch64_feature_flags
+aarch64_fndecl_za_state (const_tree fndecl)
+{
+ if (aarch64_fndecl_has_new_za_state (fndecl))
+ return AARCH64_FL_ZA_ON;
+
+ return aarch64_fntype_za_state (TREE_TYPE (fndecl));
+}
+
/* Return the ISA mode that should be used to compile the body of
function FNDECL. */
static aarch64_feature_flags
aarch64_fndecl_isa_mode (const_tree fndecl)
{
- return aarch64_fndecl_sm_state (fndecl);
+ return (aarch64_fndecl_sm_state (fndecl)
+ | aarch64_fndecl_za_state (fndecl));
+}
+
+/* Return true if function FNDECL preserves the contents of ZA. */
+
+static bool
+aarch64_fndecl_preserves_za (const_tree fndecl)
+{
+ return aarch64_fntype_preserves_za (TREE_TYPE (fndecl));
}
/* Return the state of PSTATE.SM on entry to the current function.
@@ -4115,6 +4212,25 @@ aarch64_cfun_incoming_sm_state ()
return aarch64_fntype_sm_state (TREE_TYPE (cfun->decl));
}
+/* Return the state of PSTATE.ZA on entry to the current function
+ (which might be different from the state of PSTATE.ZA in the
+ function body). */
+
+static aarch64_feature_flags
+aarch64_cfun_incoming_za_state ()
+{
+ return aarch64_fntype_za_state (TREE_TYPE (cfun->decl));
+}
+
+/* Return true if the current function creates new ZA state (as opposed
+ to sharing ZA with its callers or ignoring ZA altogether). */
+
+static bool
+aarch64_cfun_has_new_za_state ()
+{
+ return aarch64_fndecl_has_new_za_state (cfun->decl);
+}
+
/* Return true if a call from the current function to a function with
ISA mode CALLEE_MODE would involve a change to PSTATE.SM around
the BL instruction. */
@@ -5678,6 +5794,74 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
factor, nelts_per_vq);
}
+/* Return a constant that represents FACTOR multiplied by the
+ number of 128-bit quadwords in an SME vector. ISA_MODE is the
+ ISA mode in which the calculation is being performed. */
+
+static rtx
+aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor,
+ aarch64_feature_flags isa_mode)
+{
+ gcc_assert (aarch64_sve_rdvl_factor_p (factor));
+ if (isa_mode & AARCH64_FL_SM_ON)
+ /* We're in streaming mode, so we can use normal poly-int values. */
+ return gen_int_mode ({ factor, factor }, mode);
+
+ rtvec vec = gen_rtvec (1, gen_int_mode (factor, SImode));
+ rtx unspec = gen_rtx_UNSPEC (mode, vec, UNSPEC_SME_VQ);
+ return gen_rtx_CONST (mode, unspec);
+}
+
+/* Return true if X is a constant that represents some number X
+ multiplied by the number of quadwords in an SME vector. Store this X
+ in *FACTOR if so. */
+
+static bool
+aarch64_sme_vq_unspec_p (const_rtx x, HOST_WIDE_INT *factor)
+{
+ if (!TARGET_SME || GET_CODE (x) != CONST)
+ return false;
+
+ x = XEXP (x, 0);
+ if (GET_CODE (x) != UNSPEC
+ || XINT (x, 1) != UNSPEC_SME_VQ
+ || XVECLEN (x, 0) != 1)
+ return false;
+
+ x = XVECEXP (x, 0, 0);
+ if (!CONST_INT_P (x))
+ return false;
+
+ *factor = INTVAL (x);
+ return true;
+}
+
+/* Return true if X is a constant that represents some number X
+ multiplied by the number of quadwords in an SME vector, and if
+ that X is in the range of RDSVL. */
+
+bool
+aarch64_rdsvl_immediate_p (const_rtx x)
+{
+ HOST_WIDE_INT factor;
+ return (aarch64_sme_vq_unspec_p (x, &factor)
+ && aarch64_sve_rdvl_factor_p (factor));
+}
+
+/* Return the asm string for an RDSVL instruction that calculates X,
+ which is a constant that satisfies aarch64_rdsvl_immediate_p. */
+
+char *
+aarch64_output_rdsvl (const_rtx x)
+{
+ gcc_assert (aarch64_rdsvl_immediate_p (x));
+ static char buffer[sizeof ("rdsvl\t%x0, #-") + 3 * sizeof (int)];
+ x = XVECEXP (XEXP (x, 0), 0, 0);
+ snprintf (buffer, sizeof (buffer), "rdsvl\t%%x0, #%d",
+ (int) INTVAL (x) / 16);
+ return buffer;
+}
+
/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
@@ -7457,6 +7641,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
+ if (aarch64_rdsvl_immediate_p (base))
+ {
+ /* We could handle non-constant offsets if they are ever
+ generated. */
+ gcc_assert (const_offset == 0);
+ emit_insn (gen_rtx_SET (dest, imm));
+ return;
+ }
+
sty = aarch64_classify_symbol (base, const_offset);
switch (sty)
{
@@ -8458,7 +8651,7 @@ void
aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
const_tree fntype,
rtx libname ATTRIBUTE_UNUSED,
- const_tree fndecl ATTRIBUTE_UNUSED,
+ const_tree fndecl,
unsigned n_named ATTRIBUTE_UNUSED,
bool silent_p)
{
@@ -8483,6 +8676,9 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
pcum->aapcs_stack_words = 0;
pcum->aapcs_stack_size = 0;
pcum->silent_p = silent_p;
+ pcum->preserves_za = (fndecl ? aarch64_fndecl_preserves_za (fndecl)
+ : fntype ? aarch64_fntype_preserves_za (fntype)
+ : false);
pcum->num_sme_mode_switch_args = 0;
if (!silent_p
@@ -9015,6 +9211,12 @@ aarch64_layout_frame (void)
frame.wb_push_candidate2 = INVALID_REGNUM;
frame.spare_pred_reg = INVALID_REGNUM;
+ frame.has_new_za_state = (aarch64_cfun_has_new_za_state ()
+ && DF_REG_USE_COUNT (ZA_REGNUM) > 0);
+ if (frame.has_new_za_state)
+ /* Saving any old ZA state involves a call to __arm_tpidr2_save. */
+ df_set_regs_ever_live (R30_REGNUM, true);
+
/* First mark all the registers that really need to be saved... */
for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
@@ -10443,7 +10645,11 @@ aarch64_epilogue_uses (int regno)
{
if (regno == LR_REGNUM)
return 1;
+ if (regno == ZA_REGNUM)
+ return 1;
}
+ if (regno == ZA_REGNUM && aarch64_cfun_incoming_za_state ())
+ return 1;
return 0;
}
@@ -10756,6 +10962,27 @@ aarch64_expand_prologue (void)
emit_move_insn (gen_rtx_REG (DImode, R1_REGNUM), old_r1);
}
}
+
+ if (cfun->machine->frame.has_new_za_state)
+ {
+ /* Commit any uncommitted lazy save and turn ZA on. The sequence is:
+
+ mrs <temp>, tpidr2_el0
+ cbz <temp>, no_save
+ bl __arm_tpidr2_save
+ msr tpidr2_el0, xzr
+ no_save:
+ smstart za */
+ auto label = gen_label_rtx ();
+ auto tmp_reg = gen_rtx_REG (DImode, STACK_CLASH_SVE_CFA_REGNUM);
+ emit_insn (gen_aarch64_read_tpidr2 (tmp_reg));
+ auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (tmp_reg, label));
+ JUMP_LABEL (jump) = label;
+ emit_insn (gen_aarch64_tpidr2_save ());
+ emit_insn (gen_aarch64_clear_tpidr2 ());
+ emit_label (label);
+ emit_insn (gen_aarch64_smstart_za ());
+ }
}
/* Return TRUE if we can use a simple_return insn.
@@ -10829,6 +11056,11 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
= maybe_ne (get_frame_size ()
+ cfun->machine->frame.saved_varargs_size, 0);
+ if (cfun->machine->frame.has_new_za_state)
+ /* Turn ZA off before returning. TPIDR2_EL0 is already null at
+ this point. */
+ emit_insn (gen_aarch64_smstop_za ());
+
/* Emit a barrier to prevent loads from a deallocated stack. */
if (maybe_gt (final_adjust, crtl->outgoing_args_size)
|| cfun->calls_alloca
@@ -11989,6 +12221,66 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
return true;
}
+/* Make the start of the current function allocate a ZA lazy save buffer
+ and associated TPIDR2 block. Also make it initialize the TPIDR2 block
+ to point to the ZA save buffer. */
+
+static void
+aarch64_create_tpidr2_block ()
+{
+ if (cfun->machine->tpidr2_block)
+ return;
+
+ start_sequence ();
+ NO_DEFER_POP;
+
+ /* The TPIDR2 block is 16 bytes in size and must be aligned to a 128-bit
+ boundary. */
+ rtx block = assign_stack_local (V16QImode, 16, 128);
+
+ /* We use the block by moving its address into TPIDR2_EL0, so we need
+ a simple register pointer to it rather than a general address. */
+ rtx ptr = force_reg (Pmode, XEXP (block, 0));
+ cfun->machine->tpidr2_block_ptr = ptr;
+ cfun->machine->tpidr2_block = replace_equiv_address (block, ptr);
+
+ /* The ZA save buffer is SVL.B*SVL.B bytes in size. */
+ rtx svl_bytes = aarch64_sme_vq_immediate (Pmode, 16, AARCH64_ISA_MODE);
+ rtx za_size = expand_simple_binop (Pmode, MULT, svl_bytes, svl_bytes,
+ NULL, 0, OPTAB_LIB_WIDEN);
+ rtx za_save_buffer = allocate_dynamic_stack_space (za_size, 128, 128,
+ -1, true);
+ za_save_buffer = force_reg (Pmode, za_save_buffer);
+ cfun->machine->za_save_buffer = za_save_buffer;
+
+ /* The first word of the block points to the save buffer and the second
+ word is the number of ZA slices to save. */
+ rtx block_0 = adjust_address (block, DImode, 0);
+ rtx block_8 = adjust_address (block, DImode, 8);
+ emit_insn (gen_store_pair_dw_didi (block_0, za_save_buffer,
+ block_8, force_reg (DImode, svl_bytes)));
+
+ OK_DEFER_POP;
+ auto insns = get_insns ();
+ end_sequence ();
+
+ emit_insn_after (insns, parm_birth_insn);
+}
+
+/* Restore the contents of ZA from the lazy save buffer. PSTATE.ZA is
+ known to be 0 and TPIDR2_EL0 is known to be null. */
+
+void
+aarch64_restore_za ()
+{
+ gcc_assert (cfun->machine->tpidr2_block);
+
+ emit_insn (gen_aarch64_smstart_za ());
+ emit_move_insn (gen_rtx_REG (Pmode, R0_REGNUM),
+ cfun->machine->tpidr2_block_ptr);
+ emit_insn (gen_aarch64_tpidr2_restore ());
+}
+
/* Implement TARGET_START_CALL_ARGS. */
static void
@@ -12004,6 +12296,23 @@ aarch64_start_call_args (cumulative_args_t ca_v)
" option %<-march%>, or by using the %<target%>"
" attribute or pragma", "sme");
}
+
+ if (!TARGET_ZA && (ca->isa_mode & AARCH64_FL_ZA_ON))
+ error ("call to an %<arm_shared_za%> function from a function"
+ " that has no ZA state");
+
+ /* Set up a lazy save buffer if the current function has ZA state
+ that is not shared with the callee and if the callee might
+ clobber the state. */
+ if (TARGET_ZA
+ && !(ca->isa_mode & AARCH64_FL_ZA_ON)
+ && !ca->preserves_za)
+ {
+ if (!cfun->machine->tpidr2_block)
+ aarch64_create_tpidr2_block ();
+ emit_insn (gen_aarch64_save_za (cfun->machine->tpidr2_block_ptr,
+ cfun->machine->tpidr2_block));
+ }
}
/* This function is used by the call expanders of the machine description.
@@ -12109,6 +12418,27 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
cfun->machine->call_switches_sm_state = true;
}
+
+ /* If the callee is a shared ZA function, record that it uses the
+ current value of ZA. */
+ if (callee_isa_mode & AARCH64_FL_ZA_ON)
+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
+ gen_rtx_REG (VNx16BImode, ZA_REGNUM));
+}
+
+/* Implement TARGET_END_CALL_ARGS. */
+
+static void
+aarch64_end_call_args (cumulative_args_t ca_v)
+{
+ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+ /* If we set up a ZA lazy save before the call, check whether the save
+ was committed. Restore the contents of ZA from the buffer is so. */
+ if (TARGET_ZA
+ && !(ca->isa_mode & AARCH64_FL_ZA_ON)
+ && !ca->preserves_za)
+ emit_insn (gen_aarch64_restore_za ());
}
/* Emit call insn with PAT and do aarch64-specific handling. */
@@ -13246,6 +13576,9 @@ aarch64_regno_regclass (unsigned regno)
if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
return FFR_REGS;
+ if (regno == ZA_REGNUM || regno == OLD_ZA_REGNUM)
+ return ZA_REGS;
+
return NO_REGS;
}
@@ -13601,12 +13934,14 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
return (vec_flags & VEC_ADVSIMD
? CEIL (lowest_size, UNITS_PER_VREG)
: CEIL (lowest_size, UNITS_PER_WORD));
+
case STACK_REG:
case PR_REGS:
case PR_LO_REGS:
case PR_HI_REGS:
case FFR_REGS:
case PR_AND_FFR_REGS:
+ case ZA_REGS:
return 1;
case NO_REGS:
@@ -18570,10 +18905,13 @@ aarch64_override_options_internal (struct gcc_options *opts)
&& !fixed_regs[R18_REGNUM])
error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
- if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
+ if ((opts->x_aarch64_isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON))
&& !(opts->x_aarch64_isa_flags & AARCH64_FL_SME))
{
- error ("streaming functions require the ISA extension %qs", "sme");
+ if (opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
+ error ("streaming functions require the ISA extension %qs", "sme");
+ else
+ error ("functions with ZA state require the ISA extension %qs", "sme");
inform (input_location, "you can enable %qs using the command-line"
" option %<-march%>, or by using the %<target%>"
" attribute or pragma", "sme");
@@ -20900,9 +21238,11 @@ aarch64_conditional_register_usage (void)
call_used_regs[i] = 1;
}
- /* Only allow the FFR and FFRT to be accessed via special patterns. */
+ /* Only allow these registers to be accessed via special patterns. */
CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM);
CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM);
+ CLEAR_HARD_REG_BIT (operand_reg_set, ZA_REGNUM);
+ CLEAR_HARD_REG_BIT (operand_reg_set, OLD_ZA_REGNUM);
/* When tracking speculation, we need a couple of call-clobbered registers
to track the speculation state. It would be nice to just use
@@ -22359,6 +22699,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
|| aarch64_sve_rdvl_immediate_p (x)))
return true;
+ if (aarch64_rdsvl_immediate_p (x))
+ return true;
+
return aarch64_classify_symbolic_expression (x)
== SYMBOL_TINY_ABSOLUTE;
}
@@ -27810,9 +28153,36 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2)
return 0;
if (!check_attr ("arm_streaming_compatible"))
return 0;
+ if (!check_attr ("arm_shared_za"))
+ return 0;
+ if (!check_attr ("arm_preserves_za"))
+ return 0;
return 1;
}
+/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
+
+static tree
+aarch64_merge_decl_attributes (tree olddecl, tree newdecl)
+{
+ tree attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
+ DECL_ATTRIBUTES (newdecl));
+
+ if (DECL_INITIAL (olddecl))
+ for (auto name : { "arm_new_za" })
+ if (!lookup_attribute (name, DECL_ATTRIBUTES (olddecl))
+ && lookup_attribute (name, DECL_ATTRIBUTES (newdecl)))
+ {
+ error ("cannot apply attribute %qs to %q+D after the function"
+ " has been defined", name, newdecl);
+ inform (DECL_SOURCE_LOCATION (olddecl), "%q+D defined here",
+ newdecl);
+ attrs = remove_attribute (name, attrs);
+ }
+
+ return attrs;
+}
+
/* Implement TARGET_GET_MULTILIB_ABI_NAME */
static const char *
@@ -28178,6 +28548,24 @@ aarch64_indirect_call_asm (rtx addr)
return "";
}
+/* Implement TARGET_MD_ASM_ADJUST. */
+
+rtx_insn *
+aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
+ vec<machine_mode> &input_modes,
+ vec<const char *> &constraints,
+ vec<rtx> &uses, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs, location_t loc)
+{
+ /* "za" in the clobber list is defined to mean that the asm can read
+ from and write to ZA. */
+ if (TEST_HARD_REG_BIT (clobbered_regs, ZA_REGNUM))
+ uses.safe_push (gen_rtx_REG (VNx16QImode, ZA_REGNUM));
+
+ return arm_md_asm_adjust (outputs, inputs, input_modes, constraints,
+ uses, clobbers, clobbered_regs, loc);
+}
+
/* If CALL involves a change in PSTATE.SM, emit the instructions needed
to switch to the new mode and the instructions needed to restore the
original mode. Return true if something changed. */
@@ -28565,6 +28953,9 @@ aarch64_run_selftests (void)
#undef TARGET_START_CALL_ARGS
#define TARGET_START_CALL_ARGS aarch64_start_call_args
+#undef TARGET_END_CALL_ARGS
+#define TARGET_END_CALL_ARGS aarch64_end_call_args
+
#undef TARGET_GIMPLE_FOLD_BUILTIN
#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
@@ -28926,6 +29317,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
+#undef TARGET_MERGE_DECL_ATTRIBUTES
+#define TARGET_MERGE_DECL_ATTRIBUTES aarch64_merge_decl_attributes
+
#undef TARGET_GET_MULTILIB_ABI_NAME
#define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
@@ -28947,7 +29341,7 @@ aarch64_libgcc_floating_mode_supported_p
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
#undef TARGET_MD_ASM_ADJUST
-#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+#define TARGET_MD_ASM_ADJUST aarch64_md_asm_adjust
#undef TARGET_ASM_FILE_END
#define TARGET_ASM_FILE_END aarch64_asm_file_end
@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
/* Macros to test ISA flags. */
#define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF)
+#define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON)
#define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES)
#define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC)
#define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO)
@@ -259,6 +260,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
#define TARGET_STREAMING_COMPATIBLE \
((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0)
+/* PSTATE.ZA is enabled in the current function body. */
+#define TARGET_ZA (AARCH64_ISA_ZA_ON)
+
/* Crypto is an optional extension to AdvSIMD. */
#define TARGET_CRYPTO (AARCH64_ISA_CRYPTO)
@@ -445,7 +449,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
1, 1, 1, 1, /* SFP, AP, CC, VG */ \
0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \
0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \
- 1, 1 /* FFR and FFRT */ \
+ 1, 1, /* FFR and FFRT */ \
+ 1, 1 /* TPIDR2 and ZA */ \
}
/* X30 is marked as caller-saved which is in line with regular function call
@@ -455,7 +460,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
true but not until function epilogues have been generated. This ensures
that X30 is available for use in leaf functions if needed. */
-#define CALL_USED_REGISTERS \
+#define CALL_REALLY_USED_REGISTERS \
{ \
1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \
1, 1, 1, 1, 1, 1, 1, 1, /* R8 - R15 */ \
@@ -468,7 +473,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
1, 1, 1, 1, /* SFP, AP, CC, VG */ \
1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \
1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \
- 1, 1 /* FFR and FFRT */ \
+ 1, 1, /* FFR and FFRT */ \
+ 1, 0 /* TPIDR2 and ZA */ \
}
#define REGISTER_NAMES \
@@ -484,7 +490,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
"sfp", "ap", "cc", "vg", \
"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \
- "ffr", "ffrt" \
+ "ffr", "ffrt", \
+ "za", "old_za" \
}
/* Generate the register aliases for core register N */
@@ -533,7 +540,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
#define FRAME_POINTER_REGNUM SFP_REGNUM
#define STACK_POINTER_REGNUM SP_REGNUM
#define ARG_POINTER_REGNUM AP_REGNUM
-#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1)
+#define FIRST_PSEUDO_REGISTER (OLD_ZA_REGNUM + 1)
/* The number of argument registers available for each class. */
#define NUM_ARG_REGS 8
@@ -673,6 +680,7 @@ enum reg_class
PR_REGS,
FFR_REGS,
PR_AND_FFR_REGS,
+ ZA_REGS,
ALL_REGS,
LIM_REG_CLASSES /* Last */
};
@@ -696,6 +704,7 @@ enum reg_class
"PR_REGS", \
"FFR_REGS", \
"PR_AND_FFR_REGS", \
+ "ZA_REGS", \
"ALL_REGS" \
}
@@ -716,6 +725,7 @@ enum reg_class
{ 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \
{ 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \
{ 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \
+ { 0x00000000, 0x00000000, 0x00c00000 }, /* ZA_REGS */ \
{ 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \
}
@@ -889,16 +899,36 @@ struct GTY (()) aarch64_frame
/* True if shadow call stack should be enabled for the current function. */
bool is_scs_enabled;
+
+ /* True if the function has an arm_new_za attribute and if ZA is
+ actually used by the function. */
+ bool has_new_za_state;
};
typedef struct GTY (()) machine_function
{
struct aarch64_frame frame;
+
/* One entry for each hard register. */
bool reg_is_wrapped_separately[LAST_SAVED_REGNUM];
+
/* One entry for each general purpose register. */
rtx call_via[SP_REGNUM];
+
+ /* A MEM for the whole of the function's TPIDR2 block, or null if the
+ function doesn't have a TPIDR2 block. */
+ rtx tpidr2_block;
+
+ /* A pseudo register that points to the function's TPIDR2 block, or null
+ if the function doesn't have a TPIDR2 block. */
+ rtx tpidr2_block_ptr;
+
+ /* A pseudo register that points to the function's ZA save buffer,
+ or null if none. */
+ rtx za_save_buffer;
+
bool label_is_assembled;
+
/* True if we've expanded at least one call to a function that changes
PSTATE.SM. This should only be used for saving compile time: false
guarantees that no such mode switch exists. */
@@ -968,6 +998,9 @@ typedef struct
bool silent_p; /* True if we should act silently, rather than
raise an error for invalid calls. */
+ /* True if the call preserves ZA. */
+ bool preserves_za;
+
/* A list of registers that need to be saved and restored around a
change to PSTATE.SM. An auto_vec would be more convenient, but those
can't be copied. */
@@ -111,6 +111,11 @@ (define_constants
;; "FFR token": a fake register used for representing the scheduling
;; restrictions on FFR-related operations.
(FFRT_REGNUM 85)
+ (ZA_REGNUM 86)
+ ;; Represents a lazy-populated back-up of the ZA contents, as managed
+ ;; by TPIDR2_EL0. Modelling this as a simple register allows the RTL
+ ;; optimizers to remove redundant saves and restores.
+ (OLD_ZA_REGNUM 87)
;; The pair of scratch registers used for stack probing with -fstack-check.
;; Leave R9 alone as a possible choice for the static chain.
;; Note that the use of these registers is mutually exclusive with the use
@@ -303,6 +308,9 @@ (define_c_enum "unspec" [
UNSPEC_TAG_SPACE ; Translate address to MTE tag address space.
UNSPEC_LD1RO
UNSPEC_SALT_ADDR
+ ;; Wraps a constant integer that should be multiplied by the number
+ ;; of quadwords in an SME vector.
+ UNSPEC_SME_VQ
])
(define_c_enum "unspecv" [
@@ -374,7 +382,7 @@ (define_constants
;; As a convenience, "fp_q" means "fp" + the ability to move between
;; Q registers and is equivalent to "simd".
-(define_enum "arches" [any rcpc8_4 fp fp_q base_simd simd sve fp16])
+(define_enum "arches" [any rcpc8_4 fp fp_q base_simd simd sve fp16 sme])
(define_enum_attr "arch" "arches" (const_string "any"))
@@ -412,7 +420,10 @@ (define_attr "arch_enabled" "no,yes"
(match_test "TARGET_FP_F16INST"))
(and (eq_attr "arch" "sve")
- (match_test "TARGET_SVE")))
+ (match_test "TARGET_SVE"))
+
+ (and (eq_attr "arch" "sme")
+ (match_test "TARGET_SME")))
(const_string "yes")
(const_string "no")))
@@ -915,7 +926,7 @@ (define_insn "simple_return"
(set_attr "sls_length" "retbr")]
)
-(define_insn "*cb<optab><mode>1"
+(define_insn "aarch64_cb<optab><mode>1"
[(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
(const_int 0))
(label_ref (match_operand 1 "" ""))
@@ -1268,8 +1279,8 @@ (define_expand "mov<mode>"
)
(define_insn_and_split "*movsi_aarch64"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r, r, r,r,w, m,m, r, r, r, w,r,w, w")
- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,Usr,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r, r, r, r,r,w, m,m, r, r, r, w,r,w, w")
+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,Usr,UsR,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
"@
@@ -1280,6 +1291,7 @@ (define_insn_and_split "*movsi_aarch64"
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
* return aarch64_output_sve_rdvl (operands[1]);
+ * return aarch64_output_rdsvl (operands[1]);
ldr\\t%w0, %1
ldr\\t%s0, %1
str\\t%w1, %0
@@ -1300,17 +1312,17 @@ (define_insn_and_split "*movsi_aarch64"
}"
;; The "mov_imm" type for CNT is just a placeholder.
[(set_attr "type" "mov_reg,mov_reg,mov_reg,
- mov_imm,mov_imm,mov_imm,mov_imm,
+ mov_imm,mov_imm,mov_imm,mov_imm,mov_imm,
load_4,load_4,store_4,store_4,load_4,
adr,adr,f_mcr,f_mrc,fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,sve,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,*, 4, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")
+ (set_attr "arch" "*,*,*,*,*,sve,sve,sme,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+ (set_attr "length" "4,4,4,4,*, 4, 4, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")
]
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r, r,r,w, m,m, r, r, r, w,r,w, w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,Usr,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r, r, r,r,w, m,m, r, r, r, w,r,w, w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,Usr,UsR,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
@@ -1322,6 +1334,7 @@ (define_insn_and_split "*movdi_aarch64"
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
* return aarch64_output_sve_rdvl (operands[1]);
+ * return aarch64_output_rdsvl (operands[1]);
ldr\\t%x0, %1
ldr\\t%d0, %1
str\\t%x1, %0
@@ -1342,11 +1355,11 @@ (define_insn_and_split "*movdi_aarch64"
}"
;; The "mov_imm" type for CNTD is just a placeholder.
[(set_attr "type" "mov_reg,mov_reg,mov_reg,
- mov_imm,mov_imm,mov_imm,mov_imm,mov_imm,
+ mov_imm,mov_imm,mov_imm,mov_imm,mov_imm,mov_imm,
load_8,load_8,store_8,store_8,load_8,
adr,adr,f_mcr,f_mrc,fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,*,sve,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,4,*, 4, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
+ (set_attr "arch" "*,*,*,*,*,*,sve,sve,sme,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+ (set_attr "length" "4,4,4,4,4,*, 4, 4, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
)
(define_insn "insv_imm<mode>"
@@ -215,6 +215,12 @@ (define_constraint "Usr"
(and (match_code "const_poly_int")
(match_test "aarch64_sve_rdvl_immediate_p (op)")))
+(define_constraint "UsR"
+ "@internal
+ A constraint that matches a value produced by RDSVL."
+ (and (match_code "const")
+ (match_test "aarch64_rdsvl_immediate_p (op)")))
+
(define_constraint "Usv"
"@internal
A constraint that matches a VG-based constant that can be loaded by
new file mode 100644
@@ -0,0 +1,102 @@
+// { dg-options "" }
+
+void __attribute__((arm_shared_za)) shared_a ();
+void shared_a (); // { dg-error "conflicting types" }
+
+void shared_b ();
+void __attribute__((arm_shared_za)) shared_b (); // { dg-error "conflicting types" }
+
+void __attribute__((arm_shared_za)) shared_c ();
+void shared_c () {} // Inherits attribute from declaration (confusingly).
+
+void shared_d ();
+void __attribute__((arm_shared_za)) shared_d () {} // { dg-error "conflicting types" }
+
+void __attribute__((arm_shared_za)) shared_e () {}
+void shared_e (); // { dg-error "conflicting types" }
+
+void shared_f () {}
+void __attribute__((arm_shared_za)) shared_f (); // { dg-error "conflicting types" }
+
+extern void (*shared_g) ();
+extern __attribute__((arm_shared_za)) void (*shared_g) (); // { dg-error "conflicting types" }
+
+extern __attribute__((arm_shared_za)) void (*shared_h) ();
+extern void (*shared_h) (); // { dg-error "conflicting types" }
+
+//----------------------------------------------------------------------------
+
+void __attribute__((arm_preserves_za)) preserves_a ();
+void preserves_a (); // { dg-error "conflicting types" }
+
+void preserves_b ();
+void __attribute__((arm_preserves_za)) preserves_b (); // { dg-error "conflicting types" }
+
+void __attribute__((arm_preserves_za)) preserves_c ();
+void preserves_c () {} // Inherits attribute from declaration (confusingly).
+
+void preserves_d ();
+void __attribute__((arm_preserves_za)) preserves_d () {} // { dg-error "conflicting types" }
+
+void __attribute__((arm_preserves_za)) preserves_e () {}
+void preserves_e (); // { dg-error "conflicting types" }
+
+void preserves_f () {}
+void __attribute__((arm_preserves_za)) preserves_f (); // { dg-error "conflicting types" }
+
+extern void (*preserves_g) ();
+extern __attribute__((arm_preserves_za)) void (*preserves_g) (); // { dg-error "conflicting types" }
+
+extern __attribute__((arm_preserves_za)) void (*preserves_h) ();
+extern void (*preserves_h) (); // { dg-error "conflicting types" }
+
+//----------------------------------------------------------------------------
+
+void __attribute__((arm_preserves_za)) mixed_a ();
+void __attribute__((arm_shared_za)) mixed_a (); // { dg-error "conflicting types" }
+
+void __attribute__((arm_shared_za)) mixed_b ();
+void __attribute__((arm_preserves_za)) mixed_b (); // { dg-error "conflicting types" }
+
+void __attribute__((arm_preserves_za)) mixed_c ();
+void __attribute__((arm_shared_za)) mixed_c () {} // { dg-error "conflicting types" }
+
+void __attribute__((arm_shared_za)) mixed_d ();
+void __attribute__((arm_preserves_za)) mixed_d () {} // { dg-error "conflicting types" }
+
+void __attribute__((arm_preserves_za)) mixed_e () {}
+void __attribute__((arm_shared_za)) mixed_e (); // { dg-error "conflicting types" }
+
+void __attribute__((arm_shared_za)) mixed_f () {}
+void __attribute__((arm_preserves_za)) mixed_f (); // { dg-error "conflicting types" }
+
+extern __attribute__((arm_shared_za)) void (*mixed_g) ();
+extern __attribute__((arm_preserves_za)) void (*mixed_g) (); // { dg-error "conflicting types" }
+
+extern __attribute__((arm_preserves_za)) void (*mixed_h) ();
+extern __attribute__((arm_shared_za)) void (*mixed_h) (); // { dg-error "conflicting types" }
+
+//----------------------------------------------------------------------------
+
+void __attribute__((arm_preserves_za, arm_shared_za)) complementary_1();
+void __attribute__((arm_shared_za, arm_preserves_za)) complementary_2();
+
+int __attribute__((arm_shared_za)) int_attr; // { dg-warning "only applies to function types" }
+void *__attribute__((arm_preserves_za)) ptr_attr; // { dg-warning "only applies to function types" }
+
+typedef void __attribute__((arm_preserves_za)) preserves_callback ();
+typedef void __attribute__((arm_shared_za)) shared_callback ();
+
+void (*__attribute__((arm_preserves_za)) preserves_callback_ptr) ();
+void (*__attribute__((arm_shared_za)) shared_callback_ptr) ();
+
+typedef void __attribute__((arm_preserves_za, arm_shared_za)) complementary_callback_1 ();
+typedef void __attribute__((arm_shared_za, arm_preserves_za)) complementary_callback_2 ();
+
+void __attribute__((arm_preserves_za, arm_shared_za)) (*complementary_callback_ptr_1) ();
+void __attribute__((arm_shared_za, arm_preserves_za)) (*complementary_callback_ptr_2) ();
+
+struct s {
+ void __attribute__((arm_preserves_za, arm_shared_za)) (*complementary_callback_ptr_1) ();
+ void __attribute__((arm_shared_za, arm_preserves_za)) (*complementary_callback_ptr_2) ();
+};
new file mode 100644
@@ -0,0 +1,96 @@
+// { dg-options "" }
+
+void __attribute__((arm_new_za)) new_za_a ();
+void new_za_a ();
+
+void new_za_b ();
+void __attribute__((arm_new_za)) new_za_b ();
+
+void __attribute__((arm_new_za)) new_za_c ();
+void new_za_c () {}
+
+void new_za_d ();
+void __attribute__((arm_new_za)) new_za_d () {}
+
+void __attribute__((arm_new_za)) new_za_e () {}
+void new_za_e ();
+
+void new_za_f () {}
+void __attribute__((arm_new_za)) new_za_f (); // { dg-error "cannot apply attribute 'arm_new_za' to 'new_za_f' after the function has been defined" }
+
+extern void (*new_za_g) ();
+extern __attribute__((arm_new_za)) void (*new_za_g) (); // { dg-error "applies only to functions" }
+
+extern __attribute__((arm_new_za)) void (*new_za_h) (); // { dg-error "applies only to functions" }
+extern void (*new_za_h) ();
+
+//----------------------------------------------------------------------------
+
+void __attribute__((arm_new_za)) shared_a ();
+void __attribute__((arm_shared_za)) shared_a (); // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_shared_za)) shared_b ();
+void __attribute__((arm_new_za)) shared_b (); // { dg-error "conflicting types" }
+// { dg-warning "conflicts with attribute" "" { target *-*-* } .-1 }
+
+void __attribute__((arm_new_za)) shared_c ();
+void __attribute__((arm_shared_za)) shared_c () {} // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_shared_za)) shared_d ();
+void __attribute__((arm_new_za)) shared_d () {} // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_new_za)) shared_e () {}
+void __attribute__((arm_shared_za)) shared_e (); // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_shared_za)) shared_f () {}
+void __attribute__((arm_new_za)) shared_f (); // { dg-error "conflicting types" }
+// { dg-warning "conflicts with attribute" "" { target *-*-* } .-1 }
+
+//----------------------------------------------------------------------------
+
+void __attribute__((arm_new_za)) preserves_a ();
+void __attribute__((arm_preserves_za)) preserves_a (); // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_preserves_za)) preserves_b ();
+void __attribute__((arm_new_za)) preserves_b (); // { dg-error "conflicting types" }
+// { dg-warning "conflicts with attribute" "" { target *-*-* } .-1 }
+
+void __attribute__((arm_new_za)) preserves_c ();
+void __attribute__((arm_preserves_za)) preserves_c () {} // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_preserves_za)) preserves_d ();
+void __attribute__((arm_new_za)) preserves_d () {} // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_new_za)) preserves_e () {}
+void __attribute__((arm_preserves_za)) preserves_e (); // { dg-warning "conflicts with attribute" }
+
+void __attribute__((arm_preserves_za)) preserves_f () {}
+void __attribute__((arm_new_za)) preserves_f (); // { dg-error "conflicting types" }
+// { dg-warning "conflicts with attribute" "" { target *-*-* } .-1 }
+
+//----------------------------------------------------------------------------
+
+void __attribute__((arm_new_za, arm_shared_za)) contradiction_1(); // { dg-warning "conflicts with attribute" }
+void __attribute__((arm_shared_za, arm_new_za)) contradiction_2(); // { dg-warning "conflicts with attribute" }
+void __attribute__((arm_new_za, arm_preserves_za)) contradiction_3(); // { dg-warning "conflicts with attribute" }
+void __attribute__((arm_preserves_za, arm_new_za)) contradiction_4(); // { dg-warning "conflicts with attribute" }
+
+int __attribute__((arm_new_za)) int_attr; // { dg-error "applies only to functions" }
+typedef __attribute__((arm_new_za)) int int_typdef; // { dg-error "applies only to functions" }
+typedef void __attribute__((arm_new_za)) new_za_callback (); // { dg-error "applies only to functions" }
+
+//----------------------------------------------------------------------------
+
+void __attribute__((arm_streaming, arm_new_za)) complementary_1 () {}
+void __attribute__((arm_new_za, arm_streaming)) complementary_2 () {}
+void __attribute__((arm_streaming_compatible, arm_new_za)) complementary_3 () {}
+void __attribute__((arm_new_za, arm_streaming_compatible)) complementary_4 () {}
+
+//----------------------------------------------------------------------------
+
+#pragma GCC target "+nosme"
+
+void __attribute__((arm_new_za)) bereft_1 ();
+void __attribute__((arm_new_za)) bereft_2 () {} // { dg-error "functions with ZA state require the ISA extension 'sme'" }
+void __attribute__((arm_shared_za)) bereft_3 ();
+void __attribute__((arm_shared_za)) bereft_4 () {} // { dg-error "functions with ZA state require the ISA extension 'sme'" }
new file mode 100644
@@ -0,0 +1,27 @@
+// { dg-options "" }
+
+void normal_callee ();
+__attribute__((arm_shared_za)) void shared_callee ();
+__attribute__((arm_preserves_za)) void preserves_callee ();
+__attribute__((arm_shared_za, arm_preserves_za)) void shared_preserves_callee ();
+
+struct callbacks {
+ void (*normal_ptr) ();
+ __attribute__((arm_shared_za)) void (*shared_ptr) ();
+ __attribute__((arm_preserves_za)) void (*preserves_ptr) ();
+ __attribute__((arm_shared_za, arm_preserves_za)) void (*shared_preserves_ptr) ();
+};
+
+void
+normal_caller (struct callbacks *c)
+{
+ normal_callee ();
+ shared_callee (); // { dg-error "call to an 'arm_shared_za' function from a function that has no ZA state" }
+ preserves_callee ();
+ shared_preserves_callee (); // { dg-error "call to an 'arm_shared_za' function from a function that has no ZA state" }
+
+ c->normal_ptr ();
+ c->shared_ptr (); // { dg-error "call to an 'arm_shared_za' function from a function that has no ZA state" }
+ c->preserves_ptr ();
+ c->shared_preserves_ptr (); // { dg-error "call to an 'arm_shared_za' function from a function that has no ZA state" }
+}
new file mode 100644
@@ -0,0 +1,277 @@
+// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+void ns_normal_callee ();
+__attribute__((arm_shared_za)) void ns_shared_callee ();
+__attribute__((arm_preserves_za)) void ns_preserves_callee ();
+__attribute__((arm_shared_za, arm_preserves_za)) void ns_shared_preserves_callee ();
+
+__attribute__((arm_streaming)) void s_normal_callee ();
+__attribute__((arm_streaming, arm_shared_za)) void s_shared_callee ();
+__attribute__((arm_streaming, arm_preserves_za)) void s_preserves_callee ();
+__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) void s_shared_preserves_callee ();
+
+__attribute__((arm_streaming_compatible)) void sc_normal_callee ();
+__attribute__((arm_streaming_compatible, arm_shared_za)) void sc_shared_callee ();
+__attribute__((arm_streaming_compatible, arm_preserves_za)) void sc_preserves_callee ();
+__attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za)) void sc_shared_preserves_callee ();
+
+struct callbacks {
+ void (*normal_ptr) ();
+ __attribute__((arm_shared_za)) void (*shared_ptr) ();
+ __attribute__((arm_preserves_za)) void (*preserves_ptr) ();
+ __attribute__((arm_shared_za, arm_preserves_za)) void (*shared_preserves_ptr) ();
+};
+
+/*
+** ns_caller1:
+** ...
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** smstart za
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl ns_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl ns_shared_callee
+** bl ns_preserves_callee
+** bl ns_shared_preserves_callee
+** msr tpidr2_el0, \1
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** smstop za
+** ...
+*/
+void __attribute__((arm_new_za))
+ns_caller1 (struct callbacks *c)
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ c->normal_ptr ();
+ c->shared_ptr ();
+ c->preserves_ptr ();
+ c->shared_preserves_ptr ();
+}
+
+/*
+** ns_caller2:
+** ...
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** msr tpidr2_el0, xzr
+** smstart za
+** bl ns_shared_callee
+** smstop za
+** ...
+*/
+void __attribute__((arm_new_za))
+ns_caller2 (struct callbacks *c)
+{
+ ns_shared_callee ();
+}
+
+/*
+** ns_caller3:
+** ...
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** msr tpidr2_el0, xzr
+** smstart za
+** bl ns_preserves_callee
+** bl ns_shared_callee
+** bl ns_shared_preserves_callee
+** smstop za
+** ...
+*/
+void __attribute__((arm_new_za))
+ns_caller3 (struct callbacks *c)
+{
+ ns_preserves_callee ();
+ ns_shared_callee ();
+ ns_shared_preserves_callee ();
+}
+
+/*
+** ns_caller4:
+** ...
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** smstart za
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** smstart sm
+** bl s_normal_callee
+** smstop sm
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** smstart sm
+** bl s_shared_callee
+** smstop sm
+** smstart sm
+** bl s_preserves_callee
+** smstop sm
+** smstart sm
+** bl s_shared_preserves_callee
+** smstop sm
+** smstop za
+** ...
+*/
+void __attribute__((arm_new_za))
+ns_caller4 (struct callbacks *c)
+{
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+}
+
+/*
+** ns_caller5:
+** ...
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** smstart za
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl sc_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl sc_shared_callee
+** bl sc_preserves_callee
+** bl sc_shared_preserves_callee
+** smstop za
+** ...
+*/
+void __attribute__((arm_new_za))
+ns_caller5 (struct callbacks *c)
+{
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+/*
+** s_caller1:
+** ...
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** smstart za
+** ...
+** add (x[0-9]+), x29, .*
+** cntb (x[0-9]+)
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl s_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl s_shared_callee
+** bl s_preserves_callee
+** bl s_shared_preserves_callee
+** smstop za
+** ...
+*/
+void __attribute__((arm_new_za, arm_streaming))
+s_caller1 (struct callbacks *c)
+{
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+}
+
+/*
+** sc_caller1:
+** ...
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** smstart za
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl sc_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl sc_shared_callee
+** bl sc_preserves_callee
+** bl sc_shared_preserves_callee
+** smstop za
+** ...
+*/
+void __attribute__((arm_new_za, arm_streaming_compatible))
+sc_caller1 (struct callbacks *c)
+{
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
new file mode 100644
@@ -0,0 +1,241 @@
+// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+void ns_normal_callee ();
+__attribute__((arm_shared_za)) void ns_shared_callee ();
+__attribute__((arm_preserves_za)) void ns_preserves_callee ();
+__attribute__((arm_shared_za, arm_preserves_za)) void ns_shared_preserves_callee ();
+
+__attribute__((arm_streaming)) void s_normal_callee ();
+__attribute__((arm_streaming, arm_shared_za)) void s_shared_callee ();
+__attribute__((arm_streaming, arm_preserves_za)) void s_preserves_callee ();
+__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) void s_shared_preserves_callee ();
+
+__attribute__((arm_streaming_compatible)) void sc_normal_callee ();
+__attribute__((arm_streaming_compatible, arm_shared_za)) void sc_shared_callee ();
+__attribute__((arm_streaming_compatible, arm_preserves_za)) void sc_preserves_callee ();
+__attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za)) void sc_shared_preserves_callee ();
+
+struct callbacks {
+ void (*normal_ptr) ();
+ __attribute__((arm_shared_za)) void (*shared_ptr) ();
+ __attribute__((arm_preserves_za)) void (*preserves_ptr) ();
+ __attribute__((arm_shared_za, arm_preserves_za)) void (*shared_preserves_ptr) ();
+};
+
+/*
+** ns_caller1:
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl ns_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl ns_shared_callee
+** bl ns_preserves_callee
+** bl ns_shared_preserves_callee
+** msr tpidr2_el0, \1
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** ldr x[0-9]+, .*
+** blr x[0-9]+
+** ...
+*/
+void __attribute__((arm_shared_za))
+ns_caller1 (struct callbacks *c)
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ c->normal_ptr ();
+ c->shared_ptr ();
+ c->preserves_ptr ();
+ c->shared_preserves_ptr ();
+}
+
+/*
+** ns_caller2:
+** stp x29, x30, \[sp, #?-16\]!
+** mov x29, sp
+** bl ns_shared_callee
+** ldp x29, x30, \[sp\], #?16
+** ret
+*/
+void __attribute__((arm_shared_za))
+ns_caller2 (struct callbacks *c)
+{
+ ns_shared_callee ();
+}
+
+/*
+** ns_caller3:
+** stp x29, x30, \[sp, #?-16\]!
+** mov x29, sp
+** bl ns_preserves_callee
+** bl ns_shared_callee
+** bl ns_shared_preserves_callee
+** ldp x29, x30, \[sp\], #?16
+** ret
+*/
+void __attribute__((arm_shared_za))
+ns_caller3 (struct callbacks *c)
+{
+ ns_preserves_callee ();
+ ns_shared_callee ();
+ ns_shared_preserves_callee ();
+}
+
+/*
+** ns_caller4:
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** smstart sm
+** bl s_normal_callee
+** smstop sm
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** smstart sm
+** bl s_shared_callee
+** smstop sm
+** smstart sm
+** bl s_preserves_callee
+** smstop sm
+** smstart sm
+** bl s_shared_preserves_callee
+** smstop sm
+** ...
+*/
+void __attribute__((arm_shared_za))
+ns_caller4 (struct callbacks *c)
+{
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+}
+
+/*
+** ns_caller5:
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl sc_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl sc_shared_callee
+** bl sc_preserves_callee
+** bl sc_shared_preserves_callee
+** ...
+*/
+void __attribute__((arm_shared_za))
+ns_caller5 (struct callbacks *c)
+{
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+/*
+** s_caller1:
+** ...
+** add (x[0-9]+), x29, .*
+** cntb (x[0-9]+)
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl s_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl s_shared_callee
+** bl s_preserves_callee
+** bl s_shared_preserves_callee
+** ...
+*/
+void __attribute__((arm_shared_za, arm_streaming))
+s_caller1 (struct callbacks *c)
+{
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+}
+
+/*
+** sc_caller1:
+** ...
+** add (x[0-9]+), x29, .*
+** rdsvl (x[0-9]+), #1
+** mov (x[0-9]+), sp
+** msub (x[0-9]+), \2, \2, \3
+** mov sp, \4
+** stp \4, \2, .*
+** msr tpidr2_el0, \1
+** bl sc_normal_callee
+** mrs x16, tpidr2_el0
+** cbnz x16, .*
+** smstart za
+** mov x0, \1
+** bl __arm_tpidr2_restore
+** msr tpidr2_el0, xzr
+** bl sc_shared_callee
+** bl sc_preserves_callee
+** bl sc_shared_preserves_callee
+** ...
+*/
+void __attribute__((arm_shared_za, arm_streaming_compatible))
+sc_caller1 (struct callbacks *c)
+{
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+// { dg-final { scan-assembler-not {\tsmstop\tza} } }
new file mode 100644
@@ -0,0 +1,132 @@
+// { dg-options "-O -fno-optimize-sibling-calls" }
+
+void ns_normal_callee ();
+__attribute__((arm_shared_za)) void ns_shared_callee ();
+__attribute__((arm_preserves_za)) void ns_preserves_callee ();
+__attribute__((arm_shared_za, arm_preserves_za)) void ns_shared_preserves_callee ();
+
+__attribute__((arm_streaming)) void s_normal_callee ();
+__attribute__((arm_streaming, arm_shared_za)) void s_shared_callee ();
+__attribute__((arm_streaming, arm_preserves_za)) void s_preserves_callee ();
+__attribute__((arm_streaming, arm_shared_za, arm_preserves_za)) void s_shared_preserves_callee ();
+
+__attribute__((arm_streaming_compatible)) void sc_normal_callee ();
+__attribute__((arm_streaming_compatible, arm_shared_za)) void sc_shared_callee ();
+__attribute__((arm_streaming_compatible, arm_preserves_za)) void sc_preserves_callee ();
+__attribute__((arm_streaming_compatible, arm_shared_za, arm_preserves_za)) void sc_shared_preserves_callee ();
+
+void __attribute__((arm_new_za))
+caller1 ()
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+void __attribute__((arm_shared_za))
+caller2 ()
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+void __attribute__((arm_new_za, arm_streaming))
+caller3 ()
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+void __attribute__((arm_shared_za, arm_streaming))
+caller4 ()
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+void __attribute__((arm_new_za, arm_streaming_compatible))
+caller5 ()
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+void __attribute__((arm_shared_za, arm_streaming_compatible))
+caller6 ()
+{
+ ns_normal_callee ();
+ ns_shared_callee ();
+ ns_preserves_callee ();
+ ns_shared_preserves_callee ();
+
+ s_normal_callee ();
+ s_shared_callee ();
+ s_preserves_callee ();
+ s_shared_preserves_callee ();
+
+ sc_normal_callee ();
+ sc_shared_callee ();
+ sc_preserves_callee ();
+ sc_shared_preserves_callee ();
+}
+
+// { dg-final { scan-assembler-times {\tmsr\ttpidr2_el0, xzr} 18 } }
new file mode 100644
@@ -0,0 +1,55 @@
+// { dg-options "-O -fno-optimize-sibling-calls -fomit-frame-pointer" }
+// { dg-final { check-function-bodies "**" "" } }
+
+/*
+** za1:
+** mov w0, #?1
+** ret
+*/
+int __attribute__((arm_new_za))
+za1 ()
+{
+ asm ("");
+ return 1;
+}
+
+/*
+** za2:
+** str x30, \[sp, #?-16\]!
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** msr tpidr2_el0, xzr
+** smstart za
+** mov w0, #?1
+** smstop za
+** ldr x30, \[sp\], #?16
+** ret
+*/
+int __attribute__((arm_new_za))
+za2 ()
+{
+ asm ("" ::: "za");
+ return 1;
+}
+
+/*
+** za3:
+** str x30, \[sp, #?-16\]!
+** mrs x11, tpidr2_el0
+** cbz x11, .*
+** bl __arm_tpidr2_save
+** msr tpidr2_el0, xzr
+** smstart za
+** mov w0, w2
+** smstop za
+** ldr x30, \[sp\], #?16
+** ret
+*/
+int __attribute__((arm_new_za))
+za3 ()
+{
+ register int ret asm ("x2");
+ asm ("" : "=r" (ret) :: "za");
+ return ret;
+}