@@ -8110,6 +8110,11 @@ aarch64_function_ok_for_sibcall (tree, tree exp)
if (crtl->abi->id () != expr_callee_abi (exp).id ())
return false;
+ tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
+ if (aarch64_fntype_sm_state (fntype) & ~aarch64_cfun_incoming_sm_state ())
+ return false;
+ if (aarch64_fntype_za_state (fntype) != aarch64_cfun_incoming_za_state ())
+ return false;
return true;
}
@@ -11236,7 +11241,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
aarch64_isa_flags);
aarch64_sme_mode_switch_regs args_switch;
- if (crtl->return_rtx && REG_P (crtl->return_rtx))
+ if (sibcall)
+ args_switch.add_call_args (sibcall);
+ else if (crtl->return_rtx && REG_P (crtl->return_rtx))
args_switch.add_reg (GET_MODE (crtl->return_rtx),
REGNO (crtl->return_rtx));
args_switch.emit_prologue ();
new file mode 100644
@@ -0,0 +1,129 @@
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+/*
+** test_d0:
+** ...
+** fmov x10, d0
+** smstop sm
+** fmov d0, x10
+** ...
+*/
+void consume_d0 (double d0);
+
+void __attribute__((arm_locally_streaming))
+test_d0 ()
+{
+ consume_d0 (1.0);
+}
+
+/*
+** test_d7:
+** ...
+** fmov x10, d0
+** fmov x11, d1
+** fmov x12, d2
+** fmov x13, d3
+** fmov x14, d4
+** fmov x15, d5
+** fmov x16, d6
+** fmov x17, d7
+** smstop sm
+** fmov d0, x10
+** fmov d1, x11
+** fmov d2, x12
+** fmov d3, x13
+** fmov d4, x14
+** fmov d5, x15
+** fmov d6, x16
+** fmov d7, x17
+** ...
+*/
+void consume_d7 (double d0, double d1, double d2, double d3,
+ double d4, double d5, double d6, double d7);
+void __attribute__((arm_locally_streaming))
+test_d7 ()
+{
+ consume_d7 (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+}
+
+/*
+** test_q7:
+** ...
+** stp q0, q1, \[sp, #?-128\]!
+** stp q2, q3, \[sp, #?32\]
+** stp q4, q5, \[sp, #?64\]
+** stp q6, q7, \[sp, #?96\]
+** smstop sm
+** ldp q2, q3, \[sp, #?32\]
+** ldp q4, q5, \[sp, #?64\]
+** ldp q6, q7, \[sp, #?96\]
+** ldp q0, q1, \[sp\], #?128
+** ...
+*/
+void consume_q7 (int8x16x4_t q0, int8x16x4_t q4);
+
+void __attribute__((arm_locally_streaming))
+test_q7 (int8x16x4_t *ptr)
+{
+ consume_q7 (ptr[0], ptr[1]);
+}
+
+/*
+** test_z7:
+** ...
+** addvl sp, sp, #-8
+** str z0, \[sp\]
+** str z1, \[sp, #1, mul vl\]
+** str z2, \[sp, #2, mul vl\]
+** str z3, \[sp, #3, mul vl\]
+** str z4, \[sp, #4, mul vl\]
+** str z5, \[sp, #5, mul vl\]
+** str z6, \[sp, #6, mul vl\]
+** str z7, \[sp, #7, mul vl\]
+** smstop sm
+** ldr z0, \[sp\]
+** ldr z1, \[sp, #1, mul vl\]
+** ldr z2, \[sp, #2, mul vl\]
+** ldr z3, \[sp, #3, mul vl\]
+** ldr z4, \[sp, #4, mul vl\]
+** ldr z5, \[sp, #5, mul vl\]
+** ldr z6, \[sp, #6, mul vl\]
+** ldr z7, \[sp, #7, mul vl\]
+** addvl sp, sp, #8
+** ...
+*/
+void consume_z7 (svint8x4_t z0, svint8x4_t z4);
+
+void __attribute__((arm_locally_streaming))
+test_z7 (svint8x4_t *ptr1, svint8x4_t *ptr2)
+{
+ consume_z7 (*ptr1, *ptr2);
+}
+
+/*
+** test_p3:
+** ...
+** addvl sp, sp, #-1
+** str p0, \[sp\]
+** str p1, \[sp, #1, mul vl\]
+** str p2, \[sp, #2, mul vl\]
+** str p3, \[sp, #3, mul vl\]
+** smstop sm
+** ldr p0, \[sp\]
+** ldr p1, \[sp, #1, mul vl\]
+** ldr p2, \[sp, #2, mul vl\]
+** ldr p3, \[sp, #3, mul vl\]
+** addvl sp, sp, #1
+** ...
+*/
+void consume_p3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3);
+
+void __attribute__((arm_locally_streaming))
+test_p3 (svbool_t *ptr1, svbool_t *ptr2, svbool_t *ptr3, svbool_t *ptr4)
+{
+ consume_p3 (*ptr1, *ptr2, *ptr3, *ptr4);
+}
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_streaming_compatible))
+sc_to_sc ()
+{
+ sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_s ()
+{
+ s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_n ()
+{
+ n_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_sc_ls ()
+{
+ sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_n_ls ()
+{
+ n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_streaming))
+s_to_sc ()
+{
+ sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_s ()
+{
+ s_callee ();
+}
+/* { dg-final { scan-assembler {\tb\ts_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_n ()
+{
+ n_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_sc_ls ()
+{
+ sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_n_ls ()
+{
+ n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void
+n_to_sc ()
+{
+ sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void
+n_to_s ()
+{
+ s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void
+n_to_n ()
+{
+ n_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_callee} } } */
+
+void
+n_to_sc_ls ()
+{
+ sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void
+n_to_n_ls ()
+{
+ n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_sc ()
+{
+ sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_s ()
+{
+ s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_n ()
+{
+ n_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_sc_ls ()
+{
+ sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_n_ls ()
+{
+ n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
new file mode 100644
@@ -0,0 +1,45 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_locally_streaming))
+n_to_sc ()
+{
+ sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_s ()
+{
+ s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_n ()
+{
+ n_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_sc_ls ()
+{
+ sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_n_ls ()
+{
+ n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_shared_za)) shared_callee ();
+void __attribute__((noipa, arm_new_za)) new_callee () {}
+void normal_callee ();
+
+void __attribute__((arm_shared_za))
+shared_to_shared ()
+{
+ shared_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tshared_callee} } } */
+
+void __attribute__((arm_shared_za))
+shared_to_new ()
+{
+ new_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tnew_callee} } } */
+
+void __attribute__((arm_shared_za))
+shared_to_normal ()
+{
+ normal_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tnormal_callee} } } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_shared_za)) shared_callee ();
+void __attribute__((noipa, arm_new_za)) new_callee () {}
+void normal_callee ();
+
+void __attribute__((arm_new_za))
+new_to_shared ()
+{
+ shared_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tshared_callee} } } */
+
+void __attribute__((arm_new_za))
+new_to_new ()
+{
+ new_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnew_callee} } } */
+
+void __attribute__((arm_new_za))
+new_to_normal ()
+{
+ normal_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_shared_za)) shared_callee ();
+void __attribute__((noipa, arm_new_za)) new_callee () {}
+void normal_callee ();
+
+void
+normal_to_new ()
+{
+ new_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnew_callee} } } */
+
+void
+normal_to_normal ()
+{
+ normal_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */