@@ -1297,6 +1297,13 @@ typedef struct
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+/* Have space for both SP and GCSPR in the NONLOCAL case in
+ emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
+ and __builtin_nonlocal_goto.
+ Note: On ILP32 the documented buf size is not enough PR84150. */
+#define STACK_SAVEAREA_MODE(LEVEL) \
+ ((LEVEL) == SAVE_NONLOCAL ? E_CDImode : Pmode)
+
#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
#define RETURN_ADDR_RTX aarch64_return_addr
@@ -1201,6 +1201,88 @@ (define_insn "*cb<optab><mode>1"
(const_int 1)))]
)
+(define_expand "save_stack_nonlocal"
+ [(set (match_operand 0 "memory_operand")
+ (match_operand 1 "register_operand"))]
+ ""
+{
+ rtx stack_slot = adjust_address (operands[0], Pmode, 0);
+ emit_move_insn (stack_slot, operands[1]);
+
+ if (aarch64_gcs_enabled ())
+ {
+ /* Save GCS with code like
+ mov x16, 1
+ chkfeat x16
+ tbnz x16, 0, .L_done
+ mrs tmp, gcspr_el0
+ str tmp, [%0, 8]
+ .L_done: */
+
+ rtx done_label = gen_label_rtx ();
+ rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+ emit_move_insn (r16, const1_rtx);
+ emit_insn (gen_aarch64_chkfeat ());
+ emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+ rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
+ rtx gcs = gen_reg_rtx (Pmode);
+ emit_insn (gen_aarch64_load_gcspr (gcs));
+ emit_move_insn (gcs_slot, gcs);
+ emit_label (done_label);
+ }
+ DONE;
+})
+
+(define_expand "restore_stack_nonlocal"
+ [(set (match_operand 0 "register_operand" "")
+ (match_operand 1 "memory_operand" ""))]
+ ""
+{
+ rtx stack_slot = adjust_address (operands[1], Pmode, 0);
+ emit_move_insn (operands[0], stack_slot);
+
+ if (aarch64_gcs_enabled ())
+ {
+ /* Restore GCS with code like
+ mov x16, 1
+ chkfeat x16
+ tbnz x16, 0, .L_done
+ ldr tmp1, [%1, 8]
+ mrs tmp2, gcspr_el0
+ subs tmp2, tmp1, tmp2
+ b.eq .L_done
+ .L_loop:
+ gcspopm
+ subs tmp2, tmp2, 8
+ b.ne .L_loop
+ .L_done: */
+
+ rtx loop_label = gen_label_rtx ();
+ rtx done_label = gen_label_rtx ();
+ rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+ emit_move_insn (r16, const1_rtx);
+ emit_insn (gen_aarch64_chkfeat ());
+ emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+ rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
+ rtx gcs_old = force_reg (Pmode, const0_rtx);
+ emit_move_insn (gcs_old, gcs_slot);
+ rtx gcs_now = force_reg (Pmode, const0_rtx);
+ emit_insn (gen_aarch64_load_gcspr (gcs_now));
+ emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
+ rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+ rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label));
+ emit_label (loop_label);
+ emit_insn (gen_aarch64_gcspopm_xzr ());
+ emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
+ cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+ cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
+ emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label));
+ emit_label (done_label);
+ }
+ DONE;
+})
+
;; -------------------------------------------------------------------
;; Subroutine calls and sibcalls
;; -------------------------------------------------------------------
@@ -783,8 +783,8 @@ get_nl_goto_field (struct nesting_info *info)
else
type = lang_hooks.types.type_for_mode (Pmode, 1);
- scalar_int_mode mode
- = as_a <scalar_int_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
+ fixed_size_mode mode
+ = as_a <fixed_size_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
size = GET_MODE_SIZE (mode);
size = size / GET_MODE_SIZE (Pmode);
size = size + 1;
From: Szabolcs Nagy <szabolcs.nagy@arm.com> Nonlocal stack save and restore has to also save and restore the GCS pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto. The GCS specific code is only emitted if GCS branch-protection is enabled and the code always checks at runtime if GCS is enabled. The new -mbranch-protection=gcs and old -mbranch-protection=none code are ABI compatible: jmpbuf for __builtin_setjmp has space for 5 pointers, the layout is old layout: fp, pc, sp, unused, unused new layout: fp, pc, sp, gcsp, unused Note: the ILP32 code generation is wrong as it saves the pointers with Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is for 5 pointers (4 bytes per pointer), this is not fixed. The nonlocal goto has no ABI compatibility issues as the goto and its destination are in the same translation unit. We use CDImode to allow extra space for GCS without the effect of 16-byte alignment. gcc/ChangeLog: * config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for gcs. * config/aarch64/aarch64.md (save_stack_nonlocal): New. (restore_stack_nonlocal): New. * tree-nested.cc (get_nl_goto_field): Updated. --- gcc/config/aarch64/aarch64.h | 7 +++ gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++ gcc/tree-nested.cc | 4 +- 3 files changed, 91 insertions(+), 2 deletions(-)