@@ -226,6 +226,12 @@
return riscv_gpr_save_operation_p (op);
})
+(define_special_predicate "gpr_multi_push_operation"
+ (match_code "parallel")
+{
+ return riscv_gpr_multi_push_operation_p (op);
+})
+
;; Predicates for the ZBS extension.
(define_predicate "single_bit_mask_operand"
(and (match_code "const_int")
@@ -56,6 +56,9 @@ extern bool riscv_split_64bit_move_p (rtx, rtx);
extern void riscv_split_doubleword_move (rtx, rtx);
extern const char *riscv_output_move (rtx, rtx);
extern const char *riscv_output_return ();
+extern void riscv_output_gpr_multi_push_pop (const char *, bool, rtx, rtx);
+extern bool riscv_gpr_multi_push_operation_p (rtx);
+extern rtx riscv_gen_multi_push_insn (struct riscv_frame_info *, int, int);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx);
@@ -117,6 +117,14 @@ struct GTY(()) riscv_frame_info {
/* How much the GPR save/restore routines adjust sp (or 0 if unused). */
unsigned save_libcall_adjustment;
+ /* the minimum number of bytes, in multiples of 16-byte address increments,
+ required to cover the registers in a multi push & pop. */
+ unsigned multi_push_adj_base;
+
+ /* the number of additional 16-byte address increments allocated for the stack frame
+ in a multi push & pop. */
+ unsigned multi_push_adj_addi;
+
/* Offsets of fixed-point and floating-point save areas from frame bottom */
poly_int64 gp_sp_offset;
poly_int64 fp_sp_offset;
@@ -398,6 +406,8 @@ static const struct attribute_spec riscv_attribute_table[] =
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
};
+
+#define GPR_SAVE_REG_ORDER_SKIP_T0T1 2
/* Order for the CLOBBERs/USEs of gpr_save. */
static const unsigned gpr_save_reg_order[] = {
INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
@@ -3014,6 +3024,47 @@ riscv_output_move (rtx dest, rtx src)
gcc_unreachable ();
}
+void
+riscv_output_gpr_multi_push_pop (const char * name, bool epilogue_p, rtx rlist_rtx, rtx spimm_rtx)
+{
+ unsigned HOST_WIDE_INT rlist = UINTVAL(rlist_rtx);
+ unsigned HOST_WIDE_INT spimm = UINTVAL(spimm_rtx);
+ gcc_assert(rlist >= ZCMP_MIN_RLIST && rlist <= ZCMP_MAX_RLIST);
+ gcc_assert(spimm <= ZCMP_MAX_SPIMM);
+
+ unsigned HOST_WIDE_INT stack_adj =
+ cfun->machine->frame.multi_push_adj_base + spimm * ZCMP_SP_INC_STEP;
+ rtx op = GEN_INT(stack_adj);
+ char pattern[100];
+
+ strcpy (pattern, name);
+
+ switch (rlist)
+ {
+ case 4: { strcat(pattern, "\t{ra}, "); break;}
+ case 5: { strcat(pattern, "\t{ra, s0}, "); break;}
+ case 6: { strcat(pattern, "\t{ra, s0-s1}, "); break;}
+ case 7: { strcat(pattern, "\t{ra, s0-s2}, "); break;}
+ case 8: { strcat(pattern, "\t{ra, s0-s3}, "); break;}
+ case 9: { strcat(pattern, "\t{ra, s0-s4}, "); break;}
+ case 10: { strcat(pattern, "\t{ra, s0-s5}, "); break;}
+ case 11: { strcat(pattern, "\t{ra, s0-s6}, "); break;}
+ case 12: { strcat(pattern, "\t{ra, s0-s7}, "); break;}
+ case 13: { strcat(pattern, "\t{ra, s0-s8}, "); break;}
+ case 14: { strcat(pattern, "\t{ra, s0-s9}, "); break;}
+ /* note: to include s10, s11 must also be included. */
+ case 15: { strcat(pattern, "\t{ra, s0-s11}, "); break;}
+ default: gcc_unreachable ();
+ }
+
+ if (!epilogue_p)
+ strcat(pattern, "-");
+
+ strcat(pattern, "%0");
+
+ output_asm_insn (pattern, &op);
+}
+
const char *
riscv_output_return ()
{
@@ -4844,6 +4895,37 @@ riscv_save_reg_p (unsigned int regno)
return false;
}
+/* Return TRUE if Zcmp push and pop insns should be
+ avoided. FALSE otherwise.
+ Only use multi push & pop if all GPRs masked can be covered,
+ and stack access is SP based,
+ and GPRs are at top of the stack frame,
+ and no conflicts in stack allocation with other features */
+static bool
+riscv_avoid_multi_push(const struct riscv_frame_info *frame)
+{
+ if (!TARGET_ZCMP
+ || crtl->calls_eh_return
+ || frame_pointer_needed
+ || cfun->machine->interrupt_handler_p
+ || cfun->machine->varargs_size != 0
+ || crtl->args.pretend_args_size != 0
+ || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
+ return true;
+
+ return false;
+}
+
+/* Determine whether to use multi push insn. */
+static bool
+riscv_use_multi_push(const struct riscv_frame_info *frame)
+{
+ if (riscv_avoid_multi_push (frame))
+ return false;
+
+ return (frame->multi_push_adj_base != 0);
+}
+
/* Return TRUE if a libcall to save/restore GPRs should be
avoided. FALSE otherwise. */
static bool
@@ -4881,6 +4963,52 @@ riscv_save_libcall_count (unsigned mask)
abort ();
}
+/* calculate number of s regs in multi push and pop.
+ Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
+static unsigned
+riscv_multi_push_sregs_count (unsigned mask)
+{
+ unsigned num = riscv_save_libcall_count (mask);
+ return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS)
+ ? ZCMP_S0S11_SREGS_COUNTS
+ : num;
+}
+
+static unsigned
+riscv_calculate_rlist (unsigned mask)
+{
+ unsigned num_s_regs = riscv_multi_push_sregs_count(mask);
+ if (num_s_regs == ZCMP_S0S11_SREGS_COUNTS)
+ return ZCMP_REG_LIST_RA_S0S11;
+ return num_s_regs + ZCMP_RLIST_OFFSET_TO_SREGS_COUNTS;
+}
+
+/* Handle 16 bytes align for poly_int. */
+static poly_int64
+riscv_16bytes_align (poly_int64 value)
+{
+ return aligned_upper_bound (value, 16);
+}
+
+static HOST_WIDE_INT
+riscv_16bytes_align (HOST_WIDE_INT value)
+{
+ return ROUND_UP(value, 16);
+}
+
+/* Handle stack align for poly_int. */
+static poly_int64
+riscv_stack_align (poly_int64 value)
+{
+ return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
+}
+
+static HOST_WIDE_INT
+riscv_stack_align (HOST_WIDE_INT value)
+{
+ return RISCV_STACK_ALIGN (value);
+}
+
/* Populate the current function's riscv_frame_info structure.
RISC-V stack frames grown downward. High addresses are at the top.
@@ -4906,7 +5034,7 @@ riscv_save_libcall_count (unsigned mask)
| GPR save area | + UNITS_PER_WORD
| |
+-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
- | | + UNITS_PER_HWVALUE
+ | | + UNITS_PER_FP_REG
| FPR save area |
| |
+-------------------------------+ <-- frame_pointer_rtx (virtual)
@@ -4925,19 +5053,6 @@ riscv_save_libcall_count (unsigned mask)
static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
-/* Handle stack align for poly_int. */
-static poly_int64
-riscv_stack_align (poly_int64 value)
-{
- return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
-}
-
-static HOST_WIDE_INT
-riscv_stack_align (HOST_WIDE_INT value)
-{
- return RISCV_STACK_ALIGN (value);
-}
-
static void
riscv_compute_frame_info (void)
{
@@ -4985,8 +5100,9 @@ riscv_compute_frame_info (void)
if (frame->mask)
{
x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
- unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
+ /* 1 is for ra */
+ unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
/* Only use save/restore routines if they don't alter the stack size. */
if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
&& !riscv_avoid_save_libcall ())
@@ -4998,6 +5114,16 @@ riscv_compute_frame_info (void)
frame->save_libcall_adjustment = x_save_size;
}
+
+ if (!riscv_avoid_multi_push (frame))
+ {
+ /* num(ra, s0-sx) */
+ unsigned num_multi_push =
+ 1 + riscv_multi_push_sregs_count (frame->mask); /* 1 is for ra */
+ x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
+ frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
+ }
+
}
/* At the bottom of the frame are any outgoing stack arguments. */
@@ -5012,7 +5138,15 @@ riscv_compute_frame_info (void)
frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
/* Next are the callee-saved GPRs. */
if (frame->mask)
- offset += x_save_size;
+ {
+ offset += x_save_size;
+ /* align to 16 bytes and add paddings to GPR part to honor
+ both stack alignment and zcmp pus/pop size alignment. */
+ if (riscv_use_multi_push (frame)
+ && known_lt(offset,
+ frame->multi_push_adj_base + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
+ offset = riscv_16bytes_align (offset);
+ }
frame->gp_sp_offset = offset - UNITS_PER_WORD;
/* The hard frame pointer points above the callee-saved GPRs. */
frame->hard_frame_pointer_offset = offset;
@@ -5356,6 +5490,41 @@ riscv_adjust_libcall_cfi_prologue ()
return dwarf;
}
+static rtx
+riscv_adjust_multi_push_cfi_prologue (int saved_size)
+{
+ rtx dwarf = NULL_RTX;
+ rtx adjust_sp_rtx, reg, mem, insn;
+ unsigned int mask = cfun->machine->frame.mask;
+ int offset;
+ int saved_cnt = 0;
+
+ if (mask & S10_MASK)
+ mask |= S11_MASK;
+
+ for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
+ if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
+ {
+ /* The save order is ra, s0, s1, s2 to s11
+ from low to high addr. */
+ offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
+
+ reg = gen_rtx_REG (SImode, regno);
+ mem = gen_frame_mem (SImode, plus_constant (Pmode,
+ stack_pointer_rtx,
+ offset));
+
+ insn = gen_rtx_SET (mem, reg);
+ dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
+ }
+
+ /* Debug info for adjust sp. */
+ adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant(Pmode, stack_pointer_rtx, -saved_size));
+ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
+ dwarf);
+ return dwarf;
+}
static void
riscv_emit_stack_tie (void)
{
@@ -5373,7 +5542,8 @@ riscv_expand_prologue (void)
struct riscv_frame_info *frame = &cfun->machine->frame;
poly_int64 remaining_size = frame->total_size;
unsigned mask = frame->mask;
- rtx insn;
+ int spimm, rlist, multi_push_additional;
+ rtx insn, dwarf = NULL_RTX;
if (flag_stack_usage_info)
current_function_static_stack_size = constant_lower_bound (remaining_size);
@@ -5381,8 +5551,35 @@ riscv_expand_prologue (void)
if (cfun->machine->naked_p)
return;
+ /* prefer muti-push to save-restore libcall. */
+ if (riscv_use_multi_push(frame))
+ {
+ remaining_size -= frame->multi_push_adj_base;
+ if (known_gt(remaining_size, 2 * ZCMP_SP_INC_STEP))
+ spimm = 3;
+ else if (known_gt(remaining_size, ZCMP_SP_INC_STEP))
+ spimm = 2;
+ else if (known_gt(remaining_size, 0))
+ spimm = 1;
+ else
+ spimm = 0;
+
+ multi_push_additional = spimm * ZCMP_SP_INC_STEP;
+ frame->multi_push_adj_addi = multi_push_additional;
+ remaining_size -= multi_push_additional;
+
+ /* emit multi push insn & dwarf along with it. */
+ rlist = riscv_calculate_rlist(frame->mask);
+ dwarf = riscv_adjust_multi_push_cfi_prologue (frame->multi_push_adj_base
+ + multi_push_additional);
+ insn = emit_insn (riscv_gen_multi_push_insn (frame, spimm, rlist));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ REG_NOTES (insn) = dwarf;
+
+ frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
+ }
/* When optimizing for size, call a subroutine to save the registers. */
- if (riscv_use_save_libcall (frame))
+ else if (riscv_use_save_libcall (frame))
{
rtx dwarf = NULL_RTX;
dwarf = riscv_adjust_libcall_cfi_prologue ();
@@ -5398,13 +5595,15 @@ riscv_expand_prologue (void)
/* Save the registers. */
if ((frame->mask | frame->fmask) != 0)
{
- HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
-
- insn = gen_add3_insn (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (-step1));
- RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
- remaining_size -= step1;
+ if (known_gt (remaining_size, frame->frame_pointer_offset))
+ {
+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
+ remaining_size -= step1;
+ insn = gen_add3_insn (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-step1));
+ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+ }
riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
}
@@ -5461,6 +5660,32 @@ riscv_expand_prologue (void)
}
}
+static rtx
+riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
+{
+ rtx dwarf = NULL_RTX;
+ rtx adjust_sp_rtx, reg;
+ unsigned int mask = cfun->machine->frame.mask;
+
+ if (mask & S10_MASK)
+ mask |= S11_MASK;
+
+ /* Debug info for adjust sp. */
+ adjust_sp_rtx = gen_rtx_SET (stack_pointer_rtx,
+ plus_constant(Pmode, stack_pointer_rtx, saved_size));
+ dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
+ dwarf);
+
+ for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+ if (BITSET_P (mask, regno - GP_REG_FIRST))
+ {
+ reg = gen_rtx_REG (SImode, regno);
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+ }
+
+ return dwarf;
+}
+
static rtx
riscv_adjust_libcall_cfi_epilogue ()
{
@@ -5500,10 +5725,18 @@ riscv_expand_epilogue (int style)
struct riscv_frame_info *frame = &cfun->machine->frame;
unsigned mask = frame->mask;
HOST_WIDE_INT step2 = 0;
- bool use_restore_libcall = ((style == NORMAL_RETURN)
- && riscv_use_save_libcall (frame));
- unsigned libcall_size = (use_restore_libcall
- ? frame->save_libcall_adjustment : 0);
+ bool use_multi_pop_normal = ((style == NORMAL_RETURN)
+ && riscv_use_multi_push (frame));
+ bool use_multi_pop_sibcall = ((style == SIBCALL_RETURN)
+ && riscv_use_multi_push (frame));
+ bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
+
+ bool use_restore_libcall = !use_multi_pop && ((style == NORMAL_RETURN)
+ && riscv_use_save_libcall (frame));
+ unsigned libcall_size = use_restore_libcall && !use_multi_pop ?
+ frame->save_libcall_adjustment : 0;
+ unsigned multipop_size = use_multi_pop ?
+ frame->multi_push_adj_base + frame->multi_push_adj_addi : 0;
rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
rtx insn;
@@ -5574,18 +5807,25 @@ riscv_expand_epilogue (int style)
REG_NOTES (insn) = dwarf;
}
- if (use_restore_libcall)
- frame->mask = 0; /* Temporarily fib for GPRs. */
+ if (use_restore_libcall || use_multi_pop)
+ frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
/* If we need to restore registers, deallocate as much stack as
possible in the second step without going out of range. */
- if ((frame->mask | frame->fmask) != 0)
+ if (use_multi_pop)
+ {
+ if (frame->fmask
+ && known_gt (frame->total_size - multipop_size,
+ frame->frame_pointer_offset))
+ step2 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
+ }
+ else if ((frame->mask | frame->fmask) != 0)
step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
- if (use_restore_libcall)
+ if (use_restore_libcall || use_multi_pop)
frame->mask = mask; /* Undo the above fib. */
- poly_int64 step1 = frame->total_size - step2 - libcall_size;
+ poly_int64 step1 = frame->total_size - step2 - libcall_size - multipop_size ;
/* Set TARGET to BASE + STEP1. */
if (known_gt (step1, 0))
@@ -5620,7 +5860,7 @@ riscv_expand_epilogue (int style)
adjust));
rtx dwarf = NULL_RTX;
rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (step2));
+ GEN_INT (step2 + libcall_size + multipop_size));
dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
RTX_FRAME_RELATED_P (insn) = 1;
@@ -5635,15 +5875,15 @@ riscv_expand_epilogue (int style)
epilogue_cfa_sp_offset = step2;
}
- if (use_restore_libcall)
+ if (use_restore_libcall || use_multi_pop)
frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
/* Restore the registers. */
- riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
+ riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size - multipop_size,
riscv_restore_reg,
true, style == EXCEPTION_RETURN);
- if (use_restore_libcall)
+ if (use_restore_libcall || use_multi_pop)
frame->mask = mask; /* Undo the above fib. */
if (need_barrier_p)
@@ -5657,14 +5897,30 @@ riscv_expand_epilogue (int style)
rtx dwarf = NULL_RTX;
rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- const0_rtx);
+ GEN_INT (libcall_size + multipop_size));
dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
RTX_FRAME_RELATED_P (insn) = 1;
REG_NOTES (insn) = dwarf;
}
- if (use_restore_libcall)
+ if (use_multi_pop)
+ {
+ int rlist = riscv_calculate_rlist (frame->mask);
+ if (use_multi_pop_normal)
+ insn = emit_jump_insn (gen_gpr_multi_popret (GEN_INT(rlist),
+ GEN_INT(frame->multi_push_adj_addi / ZCMP_SP_INC_STEP)));
+ else
+ insn = emit_insn (gen_gpr_multi_pop (GEN_INT(rlist),
+ GEN_INT(frame->multi_push_adj_addi / ZCMP_SP_INC_STEP)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (frame->multi_push_adj_base +
+ frame->multi_push_adj_addi);
+ REG_NOTES (insn) = dwarf;
+ if (use_multi_pop_normal)
+ return;
+ }
+ else if (use_restore_libcall)
{
rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
@@ -6937,6 +7193,70 @@ riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
return gen_rtx_PARALLEL (VOIDmode, vec);
}
+rtx
+riscv_gen_multi_push_insn (struct riscv_frame_info *frame, int spimm, int rlist)
+{
+ unsigned count = riscv_multi_push_sregs_count (frame->mask);
+ /* 1 for unspec and 1 for ra. */
+ unsigned veclen = 1 + 1 + count;
+ rtvec vec = rtvec_alloc (veclen);
+
+ gcc_assert (veclen <=
+ ARRAY_SIZE (gpr_save_reg_order) - GPR_SAVE_REG_ORDER_SKIP_T0T1);
+
+ RTVEC_ELT (vec, 0) = gen_rtx_UNSPEC_VOLATILE (VOIDmode,
+ gen_rtvec (2, GEN_INT (rlist), GEN_INT (spimm)),
+ UNSPECV_GPR_MULTI_PUSH);
+
+ for (unsigned i = 1; i < veclen; ++i)
+ {
+ unsigned regno = gpr_save_reg_order[i + GPR_SAVE_REG_ORDER_SKIP_T0T1];
+ rtx reg = gen_rtx_REG (Pmode, regno);
+ rtx elt = gen_rtx_USE (Pmode, reg);
+
+ RTVEC_ELT (vec, i) = elt;
+ }
+
+ return gen_rtx_PARALLEL (VOIDmode, vec);
+}
+
+/* Return true if it's valid gpr_multi_push pattern. */
+
+bool
+riscv_gpr_multi_push_operation_p (rtx op)
+{
+ unsigned len = XVECLEN (op, 0);
+
+ if (len > ARRAY_SIZE (gpr_save_reg_order) - GPR_SAVE_REG_ORDER_SKIP_T0T1)
+ return false;
+
+ for (unsigned i = 0; i < len; i++)
+ {
+ rtx elt = XVECEXP (op, 0, i);
+ if (i == 0)
+ {
+ /* First element in parallel is unspec. */
+ if (GET_CODE (elt) != UNSPEC_VOLATILE
+ || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
+ || GET_CODE (XVECEXP (elt, 0, 1)) != CONST_INT
+ || XINT (elt, 1) != UNSPECV_GPR_MULTI_PUSH)
+ return false;
+ }
+ else
+ {
+ /* USEs, must check the order. */
+ if (GET_CODE (elt) != USE
+ || !REG_P (XEXP (elt, 1))
+ || (REGNO (XEXP (elt, 1)) !=
+ gpr_save_reg_order[i + GPR_SAVE_REG_ORDER_SKIP_T0T1]))
+ return false;
+ }
+ break;
+ }
+ return true;
+}
+
+
/* Return true if it's valid gpr_save pattern. */
bool
@@ -413,6 +413,32 @@ ASM_MISA_SPEC
#define RISCV_CALL_ADDRESS_TEMP(MODE) \
gen_rtx_REG (MODE, RISCV_CALL_ADDRESS_TEMP_REGNUM)
+#define RETURN_ADDR_MASK ( 1 << RETURN_ADDR_REGNUM)
+#define S0_MASK ( 1 << S0_REGNUM)
+#define S1_MASK ( 1 << S1_REGNUM)
+#define S2_MASK ( 1 << S2_REGNUM)
+#define S3_MASK ( 1 << S3_REGNUM)
+#define S4_MASK ( 1 << S4_REGNUM)
+#define S5_MASK ( 1 << S5_REGNUM)
+#define S6_MASK ( 1 << S6_REGNUM)
+#define S7_MASK ( 1 << S7_REGNUM)
+#define S8_MASK ( 1 << S8_REGNUM)
+#define S9_MASK ( 1 << S9_REGNUM)
+#define S10_MASK ( 1 << S10_REGNUM)
+#define S11_MASK ( 1 << S11_REGNUM)
+
+#define MULTI_PUSH_GPR_MASK ( RETURN_ADDR_MASK | S0_MASK | S1_MASK | S2_MASK | S3_MASK \
+ | S4_MASK | S5_MASK | S6_MASK | S7_MASK \
+ | S8_MASK | S9_MASK | S10_MASK | S11_MASK )
+#define ZCMP_MAX_SPIMM 3
+#define ZCMP_SP_INC_STEP 16
+#define ZCMP_MAX_RLIST 15
+#define ZCMP_MIN_RLIST 4
+#define ZCMP_INVALID_S0S10_SREGS_COUNTS 11
+#define ZCMP_S0S11_SREGS_COUNTS 12
+#define ZCMP_REG_LIST_RA_S0S11 15
+#define ZCMP_RLIST_OFFSET_TO_SREGS_COUNTS 4
+
#define MCOUNT_NAME "_mcount"
#define NO_PROFILE_COUNTERS 1
@@ -106,6 +106,10 @@
;; Zihintpause unspec
UNSPECV_PAUSE
+ ;; zc unspecs
+ UNSPECV_GPR_MULTI_PUSH
+ UNSPECV_GPR_MULTI_POP
+
;; XTheadFmv unspec
UNSPEC_XTHEADFMV
UNSPEC_XTHEADFMV_HW
@@ -135,6 +139,8 @@
(EXCEPTION_RETURN 2)
(VL_REGNUM 66)
(VTYPE_REGNUM 67)
+ (PROLOGUE 0)
+ (EPILOGUE 1)
])
(include "predicates.md")
@@ -3205,3 +3211,4 @@
(include "sifive-7.md")
(include "thead.md")
(include "vector.md")
+(include "zc.md")
new file mode 100644
@@ -0,0 +1,55 @@
+;; Machine description for RISC-V Zc extention.
+;; Copyright (C) 2011-2023 Free Software Foundation, Inc.
+;; Contributed by Fei Gao (gaofei@eswincomputing.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "gpr_multi_pop"
+ [(unspec_volatile [(match_operand 0 "const_int_operand")
+ (match_operand 1 "const_int_operand")]
+ UNSPECV_GPR_MULTI_POP)]
+ "TARGET_ZCMP"
+ "*
+ riscv_output_gpr_multi_push_pop(\"cm.pop\", EPILOGUE, operands[0], operands[1]);
+ return \"\";
+ "
+)
+(define_insn "gpr_multi_popret"
+ [(unspec_volatile [(match_operand 0 "const_int_operand")
+ (match_operand 1 "const_int_operand")]
+ UNSPECV_GPR_MULTI_POP)
+ (return)
+ (use (reg:SI RETURN_ADDR_REGNUM))
+ (const_int 0)]
+ "TARGET_ZCMP"
+ "*
+ riscv_output_gpr_multi_push_pop(\"cm.popret\", EPILOGUE, operands[0], operands[1]);
+ return \"\";
+ "
+)
+
+(define_insn "gpr_multi_push"
+ [(match_parallel 2 "gpr_multi_push_operation"
+ [(unspec_volatile [(match_operand 0 "const_int_operand")
+ (match_operand 1 "const_int_operand")]
+ UNSPECV_GPR_MULTI_PUSH)])]
+ "TARGET_ZCMP"
+ "*
+ riscv_output_gpr_multi_push_pop(\"cm.push\", PROLOGUE, operands[0], operands[1]);
+ return \"\";
+ "
+)
new file mode 100644
@@ -0,0 +1,239 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -march=rv32e_zca_zcmp -mabi=ilp32e -mcmodel=medlow" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char my_getchar();
+float getf();
+int __attribute__((noinline)) incoming_stack_args
+ (int arg0, int arg1, int arg2, int arg3,
+ int arg4, int arg5, int arg6, int arg7, int arg8);
+int getint();
+void PrintInts (int n, ...); // varargs
+void __attribute__((noinline)) PrintIntsNoVaStart (int n, ...); // varargs
+void PrintInts2 (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5, int n, ...);
+extern void f1(void);
+extern void f2(void);
+
+/*
+**test1:
+** ...
+** cm.push {ra, s0-s1}, -64
+** ...
+** cm.popret {ra, s0-s1}, 64
+** ...
+*/
+int test1()
+{
+ char volatile array[3120];
+ float volatile farray[3120];
+
+ float sum = 0;
+ for (int i = 0; i < 3120; i++)
+ {
+ array[i] = my_getchar();
+ farray[i] = my_getchar() * 1.2;
+ sum += array[i] + farray[i];
+ }
+ return sum;
+}
+
+/*
+**test2_step1_0_size:
+** ...
+** cm.push {ra, s0}, -64
+** ...
+** cm.popret {ra, s0}, 64
+** ...
+*/
+int test2_step1_0_size()
+{
+ int volatile iarray[3120 + 1824/4 -8];
+
+ for (int i = 0; i < 3120 + 1824/4 - 8; i++)
+ {
+ iarray[i] = my_getchar() * 2;
+ }
+ return iarray[0] + iarray[1];
+}
+
+/*
+**test3:
+** ...
+** cm.push {ra, s0-s1}, -64
+** ...
+** cm.popret {ra, s0-s1}, 64
+** ...
+*/
+float test3()
+{
+ char volatile array[3120];
+ float volatile farray[3120];
+
+ float sum = 0, f1 = 0, f2 = 0, f3 = 0, f4 = 0, f5 = 0, f6 = 0, f7 = 0;
+
+ for (int i = 0; i < 3120; i++)
+ {
+ f1 = getf();
+ f2 = getf();
+ f3 = getf();
+ f4 = getf();
+ array[i] = my_getchar();
+ farray[i] = my_getchar() * 1.2;
+ sum += array[i] + farray[i] + f1 + f2 + f3 + f4;
+ }
+ return sum;
+}
+
+/*
+**outgoing_stack_args:
+** ...
+** cm.push {ra, s0}, -32
+** ...
+** cm.popret {ra, s0}, 32
+** ...
+*/
+int outgoing_stack_args()
+{
+ int local = getint();
+ return local +incoming_stack_args(0, 1, 2, 3, 4, 5, 6, 7, 8);
+}
+
+/*
+**callPrintInts:
+** ...
+** cm.push {ra}, -32
+** ...
+** cm.popret {ra}, 32
+** ...
+*/
+float callPrintInts()
+{
+ volatile float f = getf(); // f in local
+ PrintInts(9,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**callPrint:
+** ...
+** cm.push {ra}, -32
+** ...
+** cm.popret {ra}, 32
+** ...
+*/
+float callPrint()
+{
+ volatile float f = getf(); // f in local
+ PrintIntsNoVaStart(0,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**callPrint_S:
+** ...
+** cm.push {ra, s0}, -32
+** ...
+** cm.popret {ra, s0}, 32
+** ...
+*/
+float callPrint_S()
+{
+ float f = getf();
+ PrintIntsNoVaStart(0,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**callPrint_2:
+** ...
+** cm.push {ra, s0}, -32
+** ...
+** cm.popret {ra, s0}, 32
+** ...
+*/
+float callPrint_2()
+{
+ float f = getf();
+ PrintInts2(0,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**test_step1_0bytes_save_restore:
+** ...
+** cm.push {ra}, -16
+** ...
+** cm.popret {ra}, 16
+** ...
+*/
+int test_step1_0bytes_save_restore()
+{
+
+ int a = 9;
+ int b = my_getchar();
+ return a +b;
+}
+
+/*
+**test_s0:
+** ...
+** cm.push {ra, s0}, -16
+** ...
+** cm.popret {ra, s0}, 16
+** ...
+*/
+int test_s0()
+{
+
+ int a = my_getchar();
+ int b = my_getchar();
+ return a +b;
+}
+
+/*
+**test_s1:
+** ...
+** cm.push {ra, s0-s1}, -16
+** ...
+** cm.popret {ra, s0-s1}, 16
+** ...
+*/
+int test_s1()
+{
+
+ int s0 = my_getchar();
+ int s1 = my_getchar();
+ int b = my_getchar();
+ return s1 +s0 +b;
+}
+
+/*
+**test_f0:
+** ...
+** cm.push {ra, s0-s1}, -16
+** ...
+** cm.popret {ra, s0-s1}, 16
+** ...
+*/
+int test_f0()
+{
+
+ int s0 = my_getchar();
+ float f0 = getf();
+ int b = my_getchar();
+ return f0 +s0 +b;
+}
+
+/*
+**foo:
+** cm.push {ra}, -16
+** call f1
+** cm.pop {ra}, 16
+** tail f2
+*/
+void foo(void)
+{
+ f1();
+ f2();
+}
new file mode 100644
@@ -0,0 +1,239 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -march=rv32imaf_zca_zcmp -mabi=ilp32f -mcmodel=medlow" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char my_getchar();
+float getf();
+int __attribute__((noinline)) incoming_stack_args
+ (int arg0, int arg1, int arg2, int arg3,
+ int arg4, int arg5, int arg6, int arg7, int arg8);
+int getint();
+void PrintInts (int n, ...); // varargs
+void __attribute__((noinline)) PrintIntsNoVaStart (int n, ...); // varargs
+void PrintInts2 (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5, int n, ...);
+extern void f1(void);
+extern void f2(void);
+
+/*
+**test1:
+** ...
+** cm.push {ra, s0-s4}, -80
+** ...
+** cm.popret {ra, s0-s4}, 80
+** ...
+*/
+int test1()
+{
+ char volatile array[3120];
+ float volatile farray[3120];
+
+ float sum = 0;
+ for (int i = 0; i < 3120; i++)
+ {
+ array[i] = my_getchar();
+ farray[i] = my_getchar() * 1.2;
+ sum += array[i] + farray[i];
+ }
+ return sum;
+}
+
+/*
+**test2_step1_0_size:
+** ...
+** cm.push {ra, s0-s1}, -64
+** ...
+** cm.popret {ra, s0-s1}, 64
+** ...
+*/
+int test2_step1_0_size()
+{
+ int volatile iarray[3120 + 1824/4 -8];
+
+ for (int i = 0; i < 3120 + 1824/4 - 8; i++)
+ {
+ iarray[i] = my_getchar() * 2;
+ }
+ return iarray[0] + iarray[1];
+}
+
+/*
+**test3:
+** ...
+** cm.push {ra, s0-s4}, -80
+** ...
+** cm.popret {ra, s0-s4}, 80
+** ...
+*/
+float test3()
+{
+ char volatile array[3120];
+ float volatile farray[3120];
+
+ float sum = 0, f1 = 0, f2 = 0, f3 = 0, f4 = 0, f5 = 0, f6 = 0, f7 = 0;
+
+ for (int i = 0; i < 3120; i++)
+ {
+ f1 = getf();
+ f2 = getf();
+ f3 = getf();
+ f4 = getf();
+ array[i] = my_getchar();
+ farray[i] = my_getchar() * 1.2;
+ sum += array[i] + farray[i] + f1 + f2 + f3 + f4;
+ }
+ return sum;
+}
+
+/*
+**outgoing_stack_args:
+** ...
+** cm.push {ra, s0}, -32
+** ...
+** cm.popret {ra, s0}, 32
+** ...
+*/
+int outgoing_stack_args()
+{
+ int local = getint();
+ return local +incoming_stack_args(0, 1, 2, 3, 4, 5, 6, 7, 8);
+}
+
+/*
+**callPrintInts:
+** ...
+** cm.push {ra}, -48
+** ...
+** cm.popret {ra}, 48
+** ...
+*/
+float callPrintInts()
+{
+ volatile float f = getf(); // f in local
+ PrintInts(9,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**callPrint:
+** ...
+** cm.push {ra}, -48
+** ...
+** cm.popret {ra}, 48
+** ...
+*/
+float callPrint()
+{
+ volatile float f = getf(); // f in local
+ PrintIntsNoVaStart(0,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**callPrint_S:
+** ...
+** cm.push {ra}, -48
+** ...
+** cm.popret {ra}, 48
+** ...
+*/
+float callPrint_S()
+{
+ float f = getf();
+ PrintIntsNoVaStart(0,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**callPrint_2:
+** ...
+** cm.push {ra}, -48
+** ...
+** cm.popret {ra}, 48
+** ...
+*/
+float callPrint_2()
+{
+ float f = getf();
+ PrintInts2(0,1,2,3,4,5,6,7,8,9);
+ return f;
+}
+
+/*
+**test_step1_0bytes_save_restore:
+** ...
+** cm.push {ra}, -16
+** ...
+** cm.popret {ra}, 16
+** ...
+*/
+int test_step1_0bytes_save_restore()
+{
+
+ int a = 9;
+ int b = my_getchar();
+ return a +b;
+}
+
+/*
+**test_s0:
+** ...
+** cm.push {ra, s0}, -16
+** ...
+** cm.popret {ra, s0}, 16
+** ...
+*/
+int test_s0()
+{
+
+ int a = my_getchar();
+ int b = my_getchar();
+ return a +b;
+}
+
+/*
+**test_s1:
+** ...
+** cm.push {ra, s0-s1}, -16
+** ...
+** cm.popret {ra, s0-s1}, 16
+** ...
+*/
+int test_s1()
+{
+
+ int s0 = my_getchar();
+ int s1 = my_getchar();
+ int b = my_getchar();
+ return s1 +s0 +b;
+}
+
+/*
+**test_f0:
+** ...
+** cm.push {ra, s0}, -32
+** ...
+** cm.popret {ra, s0}, 32
+** ...
+*/
+int test_f0()
+{
+
+ int s0 = my_getchar();
+ float f0 = getf();
+ int b = my_getchar();
+ return f0 +s0 +b;
+}
+
+/*
+**foo:
+** cm.push {ra}, -16
+** call f1
+** cm.pop {ra}, 16
+** tail f2
+*/
+void foo(void)
+{
+ f1();
+ f2();
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options " -O0 -march=rv32e_zca_zcb_zcmp -mabi=ilp32e -mcmodel=medlow -fomit-frame-pointer" } */
+/* { dg-skip-if "" { *-*-* } {"-O2" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+void bar();
+
+/*
+**fool_rv32e:
+** cm.push {ra}, -32
+** ...
+** call bar
+** ...
+** lw a5,32\(sp\)
+** ...
+** cm.popret {ra}, 32
+*/
+int fool_rv32e ( int a0, int a1, int a2, int a3, int a4, int a5,
+ int incoming0)
+{
+ bar();
+ return a0 + a1 + a2 + a3 + a4 + a5 + incoming0;
+}
\ No newline at end of file