diff mbox series

[pushed,v2,23/25] aarch64: Handle PSTATE.SM across abnormal edges

Message ID 20231205101323.1914247-24-richard.sandiford@arm.com
State New
Headers show
Series aarch64: Add support for SME | expand

Commit Message

Richard Sandiford Dec. 5, 2023, 10:13 a.m. UTC
PSTATE.SM is always off on entry to an exception handler, and on entry
to a nonlocal goto receiver.  Those entry points need to switch
PSTATE.SM back to the appropriate state for the current function.
In the case of streaming-compatible functions, they need to restore
the mode that the caller was originally using.

The requirement on nonlocal goto receivers means that nonlocal
jumps need to ensure that PSTATE.SM is zero.

gcc/
	* config/aarch64/aarch64.cc: Include except.h
	(aarch64_sme_mode_switch_regs::add_call_preserved_reg): New function.
	(aarch64_sme_mode_switch_regs::add_call_preserved_regs): Likewise.
	(aarch64_need_old_pstate_sm): Return true if the function has
	a nonlocal-goto or exception receiver.
	(aarch64_switch_pstate_sm_for_landing_pad): New function.
	(aarch64_switch_pstate_sm_for_jump): Likewise.
	(pass_switch_pstate_sm::gate): Enable the pass for all
	streaming and streaming-compatible functions.
	(pass_switch_pstate_sm::execute): Handle non-local gotos and their
	receivers.  Handle exception handler entry points.

gcc/testsuite/
	* g++.target/aarch64/sme/exceptions_2.C: New test.
	* gcc.target/aarch64/sme/nonlocal_goto_1.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_4.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_5.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_6.c: Likewise.
	* gcc.target/aarch64/sme/nonlocal_goto_7.c: Likewise.
---
 gcc/config/aarch64/aarch64.cc                 | 141 ++++++++++++++++-
 .../g++.target/aarch64/sme/exceptions_2.C     | 148 ++++++++++++++++++
 .../gcc.target/aarch64/sme/nonlocal_goto_1.c  |  58 +++++++
 .../gcc.target/aarch64/sme/nonlocal_goto_2.c  |  44 ++++++
 .../gcc.target/aarch64/sme/nonlocal_goto_3.c  |  46 ++++++
 .../gcc.target/aarch64/sme/nonlocal_goto_4.c  |  25 +++
 .../gcc.target/aarch64/sme/nonlocal_goto_5.c  |  26 +++
 .../gcc.target/aarch64/sme/nonlocal_goto_6.c  |  31 ++++
 .../gcc.target/aarch64/sme/nonlocal_goto_7.c  |  25 +++
 9 files changed, 537 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c94016ccdcf..be44e67979f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -85,6 +85,7 @@ 
 #include "config/arm/aarch-common.h"
 #include "config/arm/aarch-common-protos.h"
 #include "ssa.h"
+#include "except.h"
 #include "tree-pass.h"
 #include "cfgbuild.h"
 
@@ -4758,6 +4759,8 @@  public:
   void add_reg (machine_mode, unsigned int);
   void add_call_args (rtx_call_insn *);
   void add_call_result (rtx_call_insn *);
+  void add_call_preserved_reg (unsigned int);
+  void add_call_preserved_regs (bitmap);
 
   void emit_prologue ();
   void emit_epilogue ();
@@ -4890,6 +4893,46 @@  aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn)
     add_reg (GET_MODE (dest), REGNO (dest));
 }
 
+/* REGNO is a register that is call-preserved under the current function's ABI.
+   Record that it must be preserved around the mode switch.  */
+
+void
+aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno)
+{
+  if (FP_REGNUM_P (regno))
+    switch (crtl->abi->id ())
+      {
+      case ARM_PCS_SVE:
+	add_reg (VNx16QImode, regno);
+	break;
+      case ARM_PCS_SIMD:
+	add_reg (V16QImode, regno);
+	break;
+      case ARM_PCS_AAPCS64:
+	add_reg (DImode, regno);
+	break;
+      default:
+	gcc_unreachable ();
+      }
+  else if (PR_REGNUM_P (regno))
+    add_reg (VNx16BImode, regno);
+}
+
+/* The hard registers in REGS are call-preserved under the current function's
+   ABI.  Record that they must be preserved around the mode switch.  */
+
+void
+aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs)
+{
+  bitmap_iterator bi;
+  unsigned int regno;
+  EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi)
+    if (HARD_REGISTER_NUM_P (regno))
+      add_call_preserved_reg (regno);
+    else
+      break;
+}
+
 /* Emit code to save registers before the mode switch.  */
 
 void
@@ -7423,6 +7466,23 @@  aarch64_need_old_pstate_sm ()
   if (aarch64_cfun_enables_pstate_sm ())
     return true;
 
+  /* Non-local goto receivers are entered with PSTATE.SM equal to 0,
+     but the function needs to return with PSTATE.SM unchanged.  */
+  if (nonlocal_goto_handler_labels)
+    return true;
+
+  /* Likewise for exception handlers.  */
+  eh_landing_pad lp;
+  for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i)
+    if (lp && lp->post_landing_pad)
+      return true;
+
+  /* Non-local gotos need to set PSTATE.SM to zero.  It's possible to call
+     streaming-compatible functions without SME being available, so PSTATE.SM
+     should only be changed if it is currently set to one.  */
+  if (crtl->has_nonlocal_goto)
+    return true;
+
   if (cfun->machine->call_switches_pstate_sm)
     for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn))
       if (auto *call = dyn_cast<rtx_call_insn *> (insn))
@@ -28323,6 +28383,59 @@  aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
   return seq;
 }
 
+/* BB is the target of an exception or nonlocal goto edge, which means
+   that PSTATE.SM is known to be 0 on entry.  Put it into the state that
+   the current function requires.  */
+
+static bool
+aarch64_switch_pstate_sm_for_landing_pad (basic_block bb)
+{
+  if (TARGET_NON_STREAMING)
+    return false;
+
+  start_sequence ();
+  rtx_insn *guard_label = nullptr;
+  if (TARGET_STREAMING_COMPATIBLE)
+    guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
+						  AARCH64_FL_SM_OFF);
+  aarch64_sme_mode_switch_regs args_switch;
+  args_switch.add_call_preserved_regs (df_get_live_in (bb));
+  args_switch.emit_prologue ();
+  aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON);
+  args_switch.emit_epilogue ();
+  if (guard_label)
+    emit_label (guard_label);
+  auto seq = get_insns ();
+  end_sequence ();
+
+  emit_insn_after (seq, bb_note (bb));
+  return true;
+}
+
+/* JUMP is a nonlocal goto.  Its target requires PSTATE.SM to be 0 on entry,
+   so arrange to make it so.  */
+
+static bool
+aarch64_switch_pstate_sm_for_jump (rtx_insn *jump)
+{
+  if (TARGET_NON_STREAMING)
+    return false;
+
+  start_sequence ();
+  rtx_insn *guard_label = nullptr;
+  if (TARGET_STREAMING_COMPATIBLE)
+    guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
+						  AARCH64_FL_SM_OFF);
+  aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF);
+  if (guard_label)
+    emit_label (guard_label);
+  auto seq = get_insns ();
+  end_sequence ();
+
+  emit_insn_before (seq, jump);
+  return true;
+}
+
 /* If CALL involves a change in PSTATE.SM, emit the instructions needed
    to switch to the new mode and the instructions needed to restore the
    original mode.  Return true if something changed.  */
@@ -28406,9 +28519,10 @@  public:
 };
 
 bool
-pass_switch_pstate_sm::gate (function *)
+pass_switch_pstate_sm::gate (function *fn)
 {
-  return cfun->machine->call_switches_pstate_sm;
+  return (aarch64_fndecl_pstate_sm (fn->decl) != AARCH64_FL_SM_OFF
+	  || cfun->machine->call_switches_pstate_sm);
 }
 
 /* Emit any instructions needed to switch PSTATE.SM.  */
@@ -28421,11 +28535,24 @@  pass_switch_pstate_sm::execute (function *fn)
   bitmap_clear (blocks);
   FOR_EACH_BB_FN (bb, fn)
     {
-      rtx_insn *insn;
-      FOR_BB_INSNS (bb, insn)
-	if (auto *call = dyn_cast<rtx_call_insn *> (insn))
-	  if (aarch64_switch_pstate_sm_for_call (call))
-	    bitmap_set_bit (blocks, bb->index);
+      if (has_abnormal_call_or_eh_pred_edge_p (bb)
+	  && aarch64_switch_pstate_sm_for_landing_pad (bb))
+	bitmap_set_bit (blocks, bb->index);
+
+      if (cfun->machine->call_switches_pstate_sm)
+	{
+	  rtx_insn *insn;
+	  FOR_BB_INSNS (bb, insn)
+	    if (auto *call = dyn_cast<rtx_call_insn *> (insn))
+	      if (aarch64_switch_pstate_sm_for_call (call))
+		bitmap_set_bit (blocks, bb->index);
+	}
+
+      auto end = BB_END (bb);
+      if (JUMP_P (end)
+	  && find_reg_note (end, REG_NON_LOCAL_GOTO, NULL_RTX)
+	  && aarch64_switch_pstate_sm_for_jump (end))
+	bitmap_set_bit (blocks, bb->index);
     }
   find_many_sub_basic_blocks (blocks);
   clear_aux_for_blocks ();
diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C
new file mode 100644
index 00000000000..f791b6ecc54
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C
@@ -0,0 +1,148 @@ 
+// { dg-options "-O -fno-optimize-sibling-calls" }
+// { dg-final { check-function-bodies "**" "" } }
+
+void n_callee();
+void s_callee() __arm_streaming;
+void sc_callee() __arm_streaming_compatible;
+
+void n_callee_ne() noexcept;
+void s_callee_ne() noexcept __arm_streaming;
+void sc_callee_ne() noexcept __arm_streaming_compatible;
+
+void n_caller1()
+{
+  try
+    {
+      n_callee();
+      sc_callee();
+    }
+  catch (...)
+    {
+      n_callee_ne();
+      sc_callee_ne();
+    }
+}
+// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } }
+
+/*
+** _Z9n_caller2v:
+**	...
+**	cntd	(x[0-9]+)
+**	str	\1, [^\n]+
+**	...
+**	bl	__cxa_begin_catch
+**	smstart	sm
+**	bl	_Z11s_callee_nev
+**	smstop	sm
+**	bl	__cxa_end_catch
+**	...
+*/
+void n_caller2()
+{
+  try
+    {
+      n_callee();
+      sc_callee();
+    }
+  catch (...)
+    {
+      s_callee_ne();
+    }
+}
+
+/*
+** _Z9s_caller1v:
+**	...
+**	bl	__cxa_end_catch
+**	smstart	sm
+**	...
+*/
+int s_caller1() __arm_streaming
+{
+  try
+    {
+      s_callee();
+      return 1;
+    }
+  catch (...)
+    {
+      return 2;
+    }
+}
+
+/*
+** _Z9s_caller2v:
+**	...
+**	bl	__cxa_begin_catch
+**	smstart	sm
+**	bl	_Z11s_callee_nev
+**	smstop	sm
+**	bl	__cxa_end_catch
+**	smstart	sm
+**	...
+*/
+int s_caller2() __arm_streaming
+{
+  try
+    {
+      n_callee();
+      return 1;
+    }
+  catch (...)
+    {
+      s_callee_ne();
+      return 2;
+    }
+}
+
+/*
+** _Z10sc_caller1v:
+**	...
+**	cntd	(x[0-9]+)
+**	str	\1, [^\n]+
+**	mrs	(x[0-9]+), svcr
+**	str	\2, ([^\n]+)
+**	...
+**	bl	__cxa_end_catch
+**	ldr	(x[0-9]+), \3
+**	tbz	\4, 0, [^\n]+
+**	smstart	sm
+**	...
+*/
+int sc_caller1() __arm_streaming_compatible
+{
+  try
+    {
+      sc_callee();
+      return 1;
+    }
+  catch (...)
+    {
+      return 2;
+    }
+}
+
+/*
+** _Z10ls_caller1v:
+**	...
+**	cntd	(x[0-9]+)
+**	str	\1, [^\n]+
+**	...
+**	bl	__cxa_begin_catch
+**	smstart	sm
+**	bl	_Z12sc_callee_nev
+**	smstop	sm
+**	bl	__cxa_end_catch
+**	...
+*/
+__arm_locally_streaming void ls_caller1()
+{
+  try
+    {
+      sc_callee();
+    }
+  catch (...)
+    {
+      sc_callee_ne();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c
new file mode 100644
index 00000000000..4e3869fcc9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c
@@ -0,0 +1,58 @@ 
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** foo:
+**	...
+**	mrs	x16, svcr
+**	...
+**	str	x16, (.*)
+**	...
+**	ldr	x16, \1
+**	tbz	x16, 0, .*
+**	smstop	sm
+**	bl	__clear_cache
+**	ldr	x16, \1
+**	tbz	x16, 0, .*
+**	smstart	sm
+**	add	x0, .*
+**	ldr	x16, \1
+**	tbz	x16, 0, .*
+**	smstop	sm
+**	bl	run
+**	ldr	x16, \1
+**	tbz	x16, 0, .*
+**	smstart	sm
+**	mov	w0, 1
+**	...
+**	ret
+**	ldr	x16, \1
+**	tbz	x16, 0, .*
+**	smstart	sm
+**	mov	w0, 0
+**	...
+*/
+int
+foo (int *ptr) __arm_streaming_compatible
+{
+  __label__ failure;
+
+  void bar () { *ptr += 1; goto failure; }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
+
+// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
+// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
+// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
+// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
+// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
+// { dg-final { scan-assembler {\tstp\td8, d9,} } }
+// { dg-final { scan-assembler {\tstp\td10, d11,} } }
+// { dg-final { scan-assembler {\tstp\td12, d13,} } }
+// { dg-final { scan-assembler {\tstp\td14, d15,} } }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c
new file mode 100644
index 00000000000..2a2db72c3a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c
@@ -0,0 +1,44 @@ 
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** foo:
+**	...
+**	smstop	sm
+**	bl	__clear_cache
+**	smstart	sm
+**	add	x0, .*
+**	smstop	sm
+**	bl	run
+**	smstart	sm
+**	mov	w0, 1
+**	...
+**	ret
+**	smstart	sm
+**	mov	w0, 0
+**	...
+*/
+int
+foo (int *ptr) __arm_streaming
+{
+  __label__ failure;
+
+  void bar () { *ptr += 1; goto failure; }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
+
+// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
+// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
+// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
+// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
+// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
+// { dg-final { scan-assembler {\tstp\td8, d9,} } }
+// { dg-final { scan-assembler {\tstp\td10, d11,} } }
+// { dg-final { scan-assembler {\tstp\td12, d13,} } }
+// { dg-final { scan-assembler {\tstp\td14, d15,} } }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c
new file mode 100644
index 00000000000..022b04052c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c
@@ -0,0 +1,46 @@ 
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** foo:
+**	...
+**	smstart	sm
+**	...
+**	smstop	sm
+**	bl	__clear_cache
+**	smstart	sm
+**	add	x0, .*
+**	smstop	sm
+**	bl	run
+**	smstart	sm
+**	mov	w0, 1
+**	...
+**	smstart	sm
+**	mov	w0, 0
+**	smstop	sm
+**	...
+*/
+__arm_locally_streaming int
+foo (int *ptr)
+{
+  __label__ failure;
+
+  void bar () { *ptr += 1; goto failure; }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
+
+// { dg-final { scan-assembler {\tstp\tx19, x20,} } }
+// { dg-final { scan-assembler {\tstp\tx21, x22,} } }
+// { dg-final { scan-assembler {\tstp\tx23, x24,} } }
+// { dg-final { scan-assembler {\tstp\tx25, x26,} } }
+// { dg-final { scan-assembler {\tstp\tx27, x28,} } }
+// { dg-final { scan-assembler {\tstp\td8, d9,} } }
+// { dg-final { scan-assembler {\tstp\td10, d11,} } }
+// { dg-final { scan-assembler {\tstp\td12, d13,} } }
+// { dg-final { scan-assembler {\tstp\td14, d15,} } }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c
new file mode 100644
index 00000000000..0446076286b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c
@@ -0,0 +1,25 @@ 
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)());
+
+/*
+** bar.0:
+**	...
+**	smstart	sm
+**	...
+**	smstop	sm
+**	br	x[0-9]+
+*/
+int
+foo (int *ptr)
+{
+  __label__ failure;
+
+  __arm_locally_streaming void bar () { *ptr += 1; goto failure; }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c
new file mode 100644
index 00000000000..4246aec8b2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c
@@ -0,0 +1,26 @@ 
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)() __arm_streaming);
+
+/*
+** bar.0:
+**	...
+**	smstop	sm
+**	br	x[0-9]+
+*/
+int
+foo (int *ptr)
+{
+  __label__ failure;
+
+  void bar () __arm_streaming { *ptr += 1; goto failure; }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
+
+// { dg-final { scan-assembler-not {smstart\t} } }
+// { dg-final { scan-assembler-not {mrs\t} } }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c
new file mode 100644
index 00000000000..151e2f22dc7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c
@@ -0,0 +1,31 @@ 
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void run(void (*)() __arm_streaming_compatible);
+
+/*
+** bar.0:
+**	...
+**	mrs	x16, svcr
+**	...
+**	str	x16, (.*)
+**	...
+**	ldr	x16, \1
+**	tbz	x16, 0, .*
+**	smstop	sm
+**	br	x[0-9]+
+*/
+int
+foo (int *ptr)
+{
+  __label__ failure;
+
+  void bar () __arm_streaming_compatible { *ptr += 1; goto failure; }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
+
+// { dg-final { scan-assembler-not {smstart\t} } }
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c
new file mode 100644
index 00000000000..9cc3ad5d236
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c
@@ -0,0 +1,25 @@ 
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+
+void run(void (*)() __arm_inout("za"));
+void callee () __arm_inout("za");
+
+int
+foo (int *ptr)
+{
+  __label__ failure;
+
+  void bar () __arm_inout("za")
+  {
+    callee ();
+    *ptr += 1;
+    goto failure;
+  }
+  run (bar);
+  return 1;
+
+failure:
+  return 0;
+}
+
+// { dg-final { scan-assembler-not {\tsmstart\t} } }
+// { dg-final { scan-assembler-not {\tsmstop\t} } }