diff mbox series

[v2,09/21] aarch64: Add GCS support for nonlocal stack save

Message ID 20241031132323.948159-10-yury.khrustalev@arm.com
State New
Headers show
Series aarch64: Add support for Guarded Control Stack extension | expand

Commit Message

Yury Khrustalev Oct. 31, 2024, 1:23 p.m. UTC
From: Szabolcs Nagy <szabolcs.nagy@arm.com>

Nonlocal stack save and restore has to also save and restore the GCS
pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto.

The GCS specific code is only emitted if GCS branch-protection is
enabled and the code always checks at runtime if GCS is enabled.

The new -mbranch-protection=gcs and old -mbranch-protection=none code
are ABI compatible: jmpbuf for __builtin_setjmp has space for 5
pointers, the layout is

  old layout: fp, pc, sp, unused, unused
  new layout: fp, pc, sp, gcsp, unused

Note: the ILP32 code generation is wrong as it saves the pointers with
Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is
for 5 pointers (4 bytes per pointer), this is not fixed.

The nonlocal goto has no ABI compatibility issues as the goto and its
destination are in the same translation unit.

We use CDImode to allow extra space for GCS without the effect of 16-byte
alignment.

gcc/ChangeLog:

	* config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for gcs.
	* config/aarch64/aarch64.md (save_stack_nonlocal): New.
	(restore_stack_nonlocal): New.
	* tree-nested.cc (get_nl_goto_field): Updated.
---
 gcc/config/aarch64/aarch64.h  |  7 +++
 gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++
 gcc/tree-nested.cc            |  4 +-
 3 files changed, 91 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 593319fd472..c3fbe9b464c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1297,6 +1297,13 @@  typedef struct
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 
+/* Have space for both SP and GCSPR in the NONLOCAL case in
+   emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
+   and __builtin_nonlocal_goto.
+   Note: On ILP32 the documented buf size is not enough PR84150.  */
+#define STACK_SAVEAREA_MODE(LEVEL)			\
+  ((LEVEL) == SAVE_NONLOCAL ? E_CDImode : Pmode)
+
 #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
 
 #define RETURN_ADDR_RTX aarch64_return_addr
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 46adee5db62..8c5bf820737 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1201,6 +1201,88 @@  (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand")
+        (match_operand 1 "register_operand"))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[0], Pmode, 0);
+  emit_move_insn (stack_slot, operands[1]);
+
+  if (aarch64_gcs_enabled ())
+    {
+      /* Save GCS with code like
+		mov     x16, 1
+		chkfeat x16
+		tbnz    x16, 0, .L_done
+		mrs     tmp, gcspr_el0
+		str     tmp, [%0, 8]
+	.L_done:  */
+
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
+      rtx gcs = gen_reg_rtx (Pmode);
+      emit_insn (gen_aarch64_load_gcspr (gcs));
+      emit_move_insn (gcs_slot, gcs);
+      emit_label (done_label);
+    }
+  DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[1], Pmode, 0);
+  emit_move_insn (operands[0], stack_slot);
+
+  if (aarch64_gcs_enabled ())
+    {
+      /* Restore GCS with code like
+		mov     x16, 1
+		chkfeat x16
+		tbnz    x16, 0, .L_done
+		ldr     tmp1, [%1, 8]
+		mrs     tmp2, gcspr_el0
+		subs    tmp2, tmp1, tmp2
+		b.eq    .L_done
+	.L_loop:
+		gcspopm
+		subs    tmp2, tmp2, 8
+		b.ne    .L_loop
+	.L_done:  */
+
+      rtx loop_label = gen_label_rtx ();
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
+      rtx gcs_old = force_reg (Pmode, const0_rtx);
+      emit_move_insn (gcs_old, gcs_slot);
+      rtx gcs_now = force_reg (Pmode, const0_rtx);
+      emit_insn (gen_aarch64_load_gcspr (gcs_now));
+      emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
+      rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label));
+      emit_label (loop_label);
+      emit_insn (gen_aarch64_gcspopm_xzr ());
+      emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
+      cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label));
+      emit_label (done_label);
+    }
+  DONE;
+})
+
 ;; -------------------------------------------------------------------
 ;; Subroutine calls and sibcalls
 ;; -------------------------------------------------------------------
diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc
index a54e72c3237..dfbd3432ce1 100644
--- a/gcc/tree-nested.cc
+++ b/gcc/tree-nested.cc
@@ -783,8 +783,8 @@  get_nl_goto_field (struct nesting_info *info)
       else
 	type = lang_hooks.types.type_for_mode (Pmode, 1);
 
-      scalar_int_mode mode
-	= as_a <scalar_int_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
+      fixed_size_mode mode
+	= as_a <fixed_size_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
       size = GET_MODE_SIZE (mode);
       size = size / GET_MODE_SIZE (Pmode);
       size = size + 1;