@@ -85,6 +85,7 @@
#include "config/arm/aarch-common.h"
#include "config/arm/aarch-common-protos.h"
#include "ssa.h"
+#include "except.h"
#include "tree-pass.h"
#include "cfgbuild.h"
@@ -4758,6 +4759,8 @@ public:
void add_reg (machine_mode, unsigned int);
void add_call_args (rtx_call_insn *);
void add_call_result (rtx_call_insn *);
+ void add_call_preserved_reg (unsigned int);
+ void add_call_preserved_regs (bitmap);
void emit_prologue ();
void emit_epilogue ();
@@ -4890,6 +4893,46 @@ aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn)
add_reg (GET_MODE (dest), REGNO (dest));
}
+/* REGNO is a register that is call-preserved under the current function's ABI.
+ Record that it must be preserved around the mode switch. */
+
+void
+aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno)
+{
+ if (FP_REGNUM_P (regno))
+ switch (crtl->abi->id ())
+ {
+ case ARM_PCS_SVE:
+ add_reg (VNx16QImode, regno);
+ break;
+ case ARM_PCS_SIMD:
+ add_reg (V16QImode, regno);
+ break;
+ case ARM_PCS_AAPCS64:
+ add_reg (DImode, regno);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ else if (PR_REGNUM_P (regno))
+ add_reg (VNx16BImode, regno);
+}
+
+/* The hard registers in REGS are call-preserved under the current function's
+ ABI. Record that they must be preserved around the mode switch. */
+
+void
+aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs)
+{
+ bitmap_iterator bi;
+ unsigned int regno;
+ EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi)
+ if (HARD_REGISTER_NUM_P (regno))
+ add_call_preserved_reg (regno);
+ else
+ break;
+}
+
/* Emit code to save registers before the mode switch. */
void
@@ -7423,6 +7466,23 @@ aarch64_need_old_pstate_sm ()
if (aarch64_cfun_enables_pstate_sm ())
return true;
+ /* Non-local goto receivers are entered with PSTATE.SM equal to 0,
+ but the function needs to return with PSTATE.SM unchanged. */
+ if (nonlocal_goto_handler_labels)
+ return true;
+
+ /* Likewise for exception handlers. */
+ eh_landing_pad lp;
+ for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i)
+ if (lp && lp->post_landing_pad)
+ return true;
+
+ /* Non-local gotos need to set PSTATE.SM to zero. It's possible to call
+ streaming-compatible functions without SME being available, so PSTATE.SM
+ should only be changed if it is currently set to one. */
+ if (crtl->has_nonlocal_goto)
+ return true;
+
if (cfun->machine->call_switches_pstate_sm)
for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (auto *call = dyn_cast<rtx_call_insn *> (insn))
@@ -28323,6 +28383,59 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
return seq;
}
+/* BB is the target of an exception or nonlocal goto edge, which means
+ that PSTATE.SM is known to be 0 on entry. Put it into the state that
+ the current function requires. */
+
+static bool
+aarch64_switch_pstate_sm_for_landing_pad (basic_block bb)
+{
+ if (TARGET_NON_STREAMING)
+ return false;
+
+ start_sequence ();
+ rtx_insn *guard_label = nullptr;
+ if (TARGET_STREAMING_COMPATIBLE)
+ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
+ AARCH64_FL_SM_OFF);
+ aarch64_sme_mode_switch_regs args_switch;
+ args_switch.add_call_preserved_regs (df_get_live_in (bb));
+ args_switch.emit_prologue ();
+ aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON);
+ args_switch.emit_epilogue ();
+ if (guard_label)
+ emit_label (guard_label);
+ auto seq = get_insns ();
+ end_sequence ();
+
+ emit_insn_after (seq, bb_note (bb));
+ return true;
+}
+
+/* JUMP is a nonlocal goto. Its target requires PSTATE.SM to be 0 on entry,
+ so arrange to make it so. */
+
+static bool
+aarch64_switch_pstate_sm_for_jump (rtx_insn *jump)
+{
+ if (TARGET_NON_STREAMING)
+ return false;
+
+ start_sequence ();
+ rtx_insn *guard_label = nullptr;
+ if (TARGET_STREAMING_COMPATIBLE)
+ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
+ AARCH64_FL_SM_OFF);
+ aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF);
+ if (guard_label)
+ emit_label (guard_label);
+ auto seq = get_insns ();
+ end_sequence ();
+
+ emit_insn_before (seq, jump);
+ return true;
+}
+
/* If CALL involves a change in PSTATE.SM, emit the instructions needed
to switch to the new mode and the instructions needed to restore the
original mode. Return true if something changed. */
@@ -28406,9 +28519,10 @@ public:
};
bool
-pass_switch_pstate_sm::gate (function *)
+pass_switch_pstate_sm::gate (function *fn)
{
- return cfun->machine->call_switches_pstate_sm;
+ return (aarch64_fndecl_pstate_sm (fn->decl) != AARCH64_FL_SM_OFF
+ || cfun->machine->call_switches_pstate_sm);
}
/* Emit any instructions needed to switch PSTATE.SM. */
@@ -28421,11 +28535,24 @@ pass_switch_pstate_sm::execute (function *fn)
bitmap_clear (blocks);
FOR_EACH_BB_FN (bb, fn)
{
- rtx_insn *insn;
- FOR_BB_INSNS (bb, insn)
- if (auto *call = dyn_cast<rtx_call_insn *> (insn))
- if (aarch64_switch_pstate_sm_for_call (call))
- bitmap_set_bit (blocks, bb->index);
+ if (has_abnormal_call_or_eh_pred_edge_p (bb)
+ && aarch64_switch_pstate_sm_for_landing_pad (bb))
+ bitmap_set_bit (blocks, bb->index);
+
+ if (cfun->machine->call_switches_pstate_sm)
+ {
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (auto *call = dyn_cast<rtx_call_insn *> (insn))
+ if (aarch64_switch_pstate_sm_for_call (call))
+ bitmap_set_bit (blocks, bb->index);
+ }
+
+ auto end = BB_END (bb);
+ if (JUMP_P (end)
+ && find_reg_note (end, REG_NON_LOCAL_GOTO, NULL_RTX)
+ && aarch64_switch_pstate_sm_for_jump (end))
+ bitmap_set_bit (blocks, bb->index);
}
find_many_sub_basic_blocks (blocks);
clear_aux_for_blocks ();
new file mode 100644
@@ -0,0 +1,148 @@
+// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+void n_callee();
+void s_callee() __arm_streaming;
+void sc_callee() __arm_streaming_compatible;
+
+void n_callee_ne() noexcept;
+void s_callee_ne() noexcept __arm_streaming;
+void sc_callee_ne() noexcept __arm_streaming_compatible;
+
+void n_caller1()
+{
+ try
+ {
+ n_callee();
+ sc_callee();
+ }
+ catch (...)
+ {
+ n_callee_ne();
+ sc_callee_ne();
+ }
+}
+// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } }
+
+/*
+** _Z9n_caller2v:
+** ...
+** cntd (x[0-9]+)
+** str \1, [^\n]+
+** ...
+** bl __cxa_begin_catch
+** smstart sm
+** bl _Z11s_callee_nev
+** smstop sm
+** bl __cxa_end_catch
+** ...
+*/
+void n_caller2()
+{
+ try
+ {
+ n_callee();
+ sc_callee();
+ }
+ catch (...)
+ {
+ s_callee_ne();
+ }
+}
+
+/*
+** _Z9s_caller1v:
+** ...
+** bl __cxa_end_catch
+** smstart sm
+** ...
+*/
+int s_caller1() __arm_streaming
+{
+ try
+ {
+ s_callee();
+ return 1;
+ }
+ catch (...)
+ {
+ return 2;
+ }
+}
+
+/*
+** _Z9s_caller2v:
+** ...
+** bl __cxa_begin_catch
+** smstart sm
+** bl _Z11s_callee_nev
+** smstop sm
+** bl __cxa_end_catch
+** smstart sm
+** ...
+*/
+int s_caller2() __arm_streaming
+{
+ try
+ {
+ n_callee();
+ return 1;
+ }
+ catch (...)
+ {
+ s_callee_ne();
+ return 2;
+ }
+}
+
+/*
+** _Z10sc_caller1v:
+** ...
+** cntd (x[0-9]+)
+** str \1, [^\n]+
+** mrs (x[0-9]+), svcr
+** str \2, ([^\n]+)
+** ...
+** bl __cxa_end_catch
+** ldr (x[0-9]+), \3
+** tbz \4, 0, [^\n]+
+** smstart sm
+** ...
+*/
+int sc_caller1() __arm_streaming_compatible
+{
+ try
+ {
+ sc_callee();
+ return 1;
+ }
+ catch (...)
+ {
+ return 2;
+ }
+}
+
+/*
+** _Z10ls_caller1v:
+** ...
+** cntd (x[0-9]+)
+** str \1, [^\n]+
+** ...
+** bl __cxa_begin_catch
+** smstart sm
+** bl _Z12sc_callee_nev
+** smstop sm
+** bl __cxa_end_catch
+** ...
+*/
+__arm_locally_streaming void ls_caller1()
+{
+ try
+ {
+ sc_callee();
+ }
+ catch (...)
+ {
+ sc_callee_ne();
+ }
+}
new file mode 100644
@@ -0,0 +1,58 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** foo:
+** ...
+** mrs x16, svcr
+** ...
+** str x16, (.*)
+** ...
+** ldr x16, \1
+** tbz x16, 0, .*
+** smstop sm
+** bl __clear_cache
+** ldr x16, \1
+** tbz x16, 0, .*
+** smstart sm
+** add x0, .*
+** ldr x16, \1
+** tbz x16, 0, .*
+** smstop sm
+** bl run
+** ldr x16, \1
+** tbz x16, 0, .*
+** smstart sm
+** mov w0, 1
+** ...
+** ret
+** ldr x16, \1
+** tbz x16, 0, .*
+** smstart sm
+** mov w0, 0
+** ...
+*/
+int
+foo (int *ptr) __arm_streaming_compatible
+{
+ __label__ failure;
+
+ void bar () { *ptr += 1; goto failure; }
+ run (bar);
+ return 1;
+
+failure:
+ return 0;
+}
+
+// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
+// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
+// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
+// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
+// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
+// { dg-final { scan-assembler {\tstp\td8, d9,} } }
+// { dg-final { scan-assembler {\tstp\td10, d11,} } }
+// { dg-final { scan-assembler {\tstp\td12, d13,} } }
+// { dg-final { scan-assembler {\tstp\td14, d15,} } }
new file mode 100644
@@ -0,0 +1,44 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** foo:
+** ...
+** smstop sm
+** bl __clear_cache
+** smstart sm
+** add x0, .*
+** smstop sm
+** bl run
+** smstart sm
+** mov w0, 1
+** ...
+** ret
+** smstart sm
+** mov w0, 0
+** ...
+*/
+int
+foo (int *ptr) __arm_streaming
+{
+ __label__ failure;
+
+ void bar () { *ptr += 1; goto failure; }
+ run (bar);
+ return 1;
+
+failure:
+ return 0;
+}
+
+// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
+// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
+// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
+// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
+// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
+// { dg-final { scan-assembler {\tstp\td8, d9,} } }
+// { dg-final { scan-assembler {\tstp\td10, d11,} } }
+// { dg-final { scan-assembler {\tstp\td12, d13,} } }
+// { dg-final { scan-assembler {\tstp\td14, d15,} } }
new file mode 100644
@@ -0,0 +1,46 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** foo:
+** ...
+** smstart sm
+** ...
+** smstop sm
+** bl __clear_cache
+** smstart sm
+** add x0, .*
+** smstop sm
+** bl run
+** smstart sm
+** mov w0, 1
+** ...
+** smstart sm
+** mov w0, 0
+** smstop sm
+** ...
+*/
+__arm_locally_streaming int
+foo (int *ptr)
+{
+ __label__ failure;
+
+ void bar () { *ptr += 1; goto failure; }
+ run (bar);
+ return 1;
+
+failure:
+ return 0;
+}
+
+// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
+// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
+// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
+// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
+// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
+// { dg-final { scan-assembler {\tstp\td8, d9,} } }
+// { dg-final { scan-assembler {\tstp\td10, d11,} } }
+// { dg-final { scan-assembler {\tstp\td12, d13,} } }
+// { dg-final { scan-assembler {\tstp\td14, d15,} } }
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** bar.0:
+** ...
+** smstart sm
+** ...
+** smstop sm
+** br x[0-9]+
+*/
+int
+foo (int *ptr)
+{
+ __label__ failure;
+
+ __arm_locally_streaming void bar () { *ptr += 1; goto failure; }
+ run (bar);
+ return 1;
+
+failure:
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)() __arm_streaming);
+
+/*
+** bar.0:
+** ...
+** smstop sm
+** br x[0-9]+
+*/
+int
+foo (int *ptr)
+{
+ __label__ failure;
+
+ void bar () __arm_streaming { *ptr += 1; goto failure; }
+ run (bar);
+ return 1;
+
+failure:
+ return 0;
+}
+
+// { dg-final { scan-assembler-not {smstart\t} } }
+// { dg-final { scan-assembler-not {mrs\t} } }
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)() __arm_streaming_compatible);
+
+/*
+** bar.0:
+** ...
+** mrs x16, svcr
+** ...
+** str x16, (.*)
+** ...
+** ldr x16, \1
+** tbz x16, 0, .*
+** smstop sm
+** br x[0-9]+
+*/
+int
+foo (int *ptr)
+{
+ __label__ failure;
+
+ void bar () __arm_streaming_compatible { *ptr += 1; goto failure; }
+ run (bar);
+ return 1;
+
+failure:
+ return 0;
+}
+
+// { dg-final { scan-assembler-not {smstart\t} } }
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+
+void run(void (*)() __arm_inout("za"));
+void callee () __arm_inout("za");
+
+int
+foo (int *ptr)
+{
+ __label__ failure;
+
+ void bar () __arm_inout("za")
+ {
+ callee ();
+ *ptr += 1;
+ goto failure;
+ }
+ run (bar);
+ return 1;
+
+failure:
+ return 0;
+}
+
+// { dg-final { scan-assembler-not {\tsmstart\t} } }
+// { dg-final { scan-assembler-not {\tsmstop\t} } }