@@ -167,6 +167,8 @@ extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
extern enum memmodel riscv_union_memmodels (enum memmodel, enum memmodel);
extern bool riscv_reg_frame_related (rtx);
+extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
+ HOST_WIDE_INT *);
/* Routines implemented in riscv-c.cc. */
void riscv_cpu_cpp_builtins (cpp_reader *);
@@ -4075,6 +4075,32 @@ riscv_split_doubleword_move (rtx dest, rtx src)
riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
}
}
+
+/* Constant VAL is known to be sum of two S12 constants. Break it into
+ comprising BASE and OFF.
+ Numerically S12 is -2048 to 2047, however it uses the more conservative
+ range -2048 to 2032 as offsets pertain to stack related registers. */
+
+void
+riscv_split_sum_of_two_s12 (HOST_WIDE_INT val, HOST_WIDE_INT *base,
+ HOST_WIDE_INT *off)
+{
+ if (SUM_OF_TWO_S12_N (val))
+ {
+ *base = -2048;
+ *off = val - (-2048);
+ }
+ else if (SUM_OF_TWO_S12_P_ALGN (val))
+ {
+ *base = 2032;
+ *off = val - 2032;
+ }
+ else
+ {
+ gcc_unreachable ();
+ }
+}
+
/* Return the appropriate instructions to move SRC into DEST. Assume
that SRC is operand 1 and DEST is operand 0. */
@@ -7864,6 +7890,17 @@ riscv_expand_prologue (void)
GEN_INT (-constant_frame));
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
}
+ else if (SUM_OF_TWO_S12_ALGN (-constant_frame))
+ {
+ HOST_WIDE_INT one, two;
+ riscv_split_sum_of_two_s12 (-constant_frame, &one, &two);
+ insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (one));
+ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+ insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (two));
+ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+ }
else
{
riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame));
@@ -8160,10 +8197,21 @@ riscv_expand_epilogue (int style)
/* Get an rtx for STEP1 that we can add to BASE.
Skip if adjust equal to zero. */
- if (step1.to_constant () != 0)
+ HOST_WIDE_INT step1_value = step1.to_constant ();
+ if (step1_value != 0)
{
- rtx adjust = GEN_INT (step1.to_constant ());
- if (!SMALL_OPERAND (step1.to_constant ()))
+ rtx adjust = GEN_INT (step1_value);
+ if (SUM_OF_TWO_S12_ALGN (step1_value))
+ {
+ HOST_WIDE_INT one, two;
+ riscv_split_sum_of_two_s12 (step1_value, &one, &two);
+ insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (one)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ adjust = GEN_INT (two);
+ }
+ else if (!SMALL_OPERAND (step1_value))
{
riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
adjust = RISCV_PROLOGUE_TEMP (Pmode);
@@ -641,6 +641,13 @@ enum reg_class
#define SUM_OF_TWO_S12(VALUE) \
(SUM_OF_TWO_S12_N (VALUE) || SUM_OF_TWO_S12_P (VALUE))
+/* Variant with first value 8 byte aligned if involving stack regs. */
+#define SUM_OF_TWO_S12_P_ALGN(VALUE) \
+ (((VALUE) >= (2032 + 1)) && ((VALUE) <= (2032 * 2)))
+
+#define SUM_OF_TWO_S12_ALGN(VALUE) \
+ (SUM_OF_TWO_S12_N (VALUE) || SUM_OF_TWO_S12_P_ALGN (VALUE))
+
/* If this is a single bit mask, then we can load it with bseti. Special
handling of SImode 0x80000000 on RV64 is done in riscv_build_integer_1. */
#define SINGLE_BIT_MASK_OPERAND(VALUE) \
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options { -march=rv64gcv -mabi=lp64d } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-Os" "-Oz" } } */
+
+#define BUF_SIZE 2064
+
+void
+foo(unsigned long i)
+{
+ volatile char buf[BUF_SIZE];
+
+ buf[i] = 0;
+}
+
+/* { dg-final { scan-assembler-not {li\t[a-x0-9]+,4096} } } */
@@ -129,5 +129,5 @@ spill_12 (int8_t *in, int8_t *out)
/* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
@@ -120,5 +120,5 @@ spill_11 (int16_t *in, int16_t *out)
/* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
@@ -111,5 +111,5 @@ spill_10 (int32_t *in, int32_t *out)
/* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
@@ -102,5 +102,5 @@ spill_9 (int64_t *in, int64_t *out)
/* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
@@ -120,5 +120,5 @@ spill_11 (_Float16 *in, _Float16 *out)
/* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
@@ -111,5 +111,5 @@ spill_10 (float *in, float *out)
/* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
@@ -102,5 +102,5 @@ spill_9 (int64_t *in, int64_t *out)
/* { dg-final { scan-assembler-times {addi\tsp,sp,-256} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-512} 1 } } */
/* { dg-final { scan-assembler-times {addi\tsp,sp,-1024} 1 } } */
-/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 1 } } */
-/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,-4096\s+add\tsp,sp,[a-x0-9]+} 1 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,-2048} 3 } } */
+/* { dg-final { scan-assembler-times {addi\tsp,sp,2032} 1 } } */
Changes since v2: - Broke out the hunk corresponding to alloca in epilogue expansion in a seperate patch. --- If the constant used for stack offset can be expressed as sum of two S12 values, the constant need not be materialized (in a reg) and instead the two S12 bits can be added to instructions involved with frame pointer. This avoids burning a register and more importantly can often get down to be 2 insn vs. 3. The prev patches to generally avoid LUI based const materialization didn't fix this PR and need this directed fix in funcion prologue/epilogue expansion. This fix doesn't move the neddle for SPEC, at all, but it is still a win considering gcc generates one insn fewer than llvm for the test ;-) gcc-13.1 release | gcc 230823 | | | g6619b3d4c15c | This patch | clang/llvm --------------------------------------------------------------------------------- li t0,-4096 | li t0,-4096 | addi sp,sp,-2048 | addi sp,sp,-2048 addi t0,t0,2016 | addi t0,t0,2032 | add sp,sp,-16 | addi sp,sp,-32 li a4,4096 | add sp,sp,t0 | add a5,sp,a0 | add a1,sp,16 add sp,sp,t0 | addi a5,sp,-2032 | sb zero,0(a5) | add a0,a0,a1 li a5,-4096 | add a0,a5,a0 | addi sp,sp,2032 | sb zero,0(a0) addi a4,a4,-2032 | li t0, 4096 | addi sp,sp,32 | addi sp,sp,2032 add a4,a4,a5 | sb zero,2032(a0) | ret | addi sp,sp,48 addi a5,sp,16 | addi t0,t0,-2032 | | ret add a5,a4,a5 | add sp,sp,t0 | add a0,a5,a0 | ret | li t0,4096 | sd a5,8(sp) | sb zero,2032(a0)| addi t0,t0,-2016 | add sp,sp,t0 | ret | gcc/ChangeLog: PR target/105733 * config/riscv/riscv.h: New macros for with aligned offsets. * config/riscv/riscv.cc (riscv_split_sum_of_two_s12): New function to split a sum of two s12 values into constituents. (riscv_expand_prologue): Handle offset being sum of two S12. (riscv_expand_epilogue): Ditto. * config/riscv/riscv-protos.h (riscv_split_sum_of_two_s12): New. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr105733.c: New Test. * gcc.target/riscv/rvv/autovec/vls/spill-1.c: Adjust to not expect LUI 4096. * gcc.target/riscv/rvv/autovec/vls/spill-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/spill-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/spill-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/spill-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/spill-6.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/spill-7.c: Ditto. Signed-off-by: Vineet Gupta <vineetg@rivosinc.com> --- gcc/config/riscv/riscv-protos.h | 2 + gcc/config/riscv/riscv.cc | 54 +++++++++++++++++-- gcc/config/riscv/riscv.h | 7 +++ gcc/testsuite/gcc.target/riscv/pr105733.c | 15 ++++++ .../riscv/rvv/autovec/vls/spill-1.c | 4 +- .../riscv/rvv/autovec/vls/spill-2.c | 4 +- .../riscv/rvv/autovec/vls/spill-3.c | 4 +- .../riscv/rvv/autovec/vls/spill-4.c | 4 +- .../riscv/rvv/autovec/vls/spill-5.c | 4 +- .../riscv/rvv/autovec/vls/spill-6.c | 4 +- .../riscv/rvv/autovec/vls/spill-7.c | 4 +- 11 files changed, 89 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/pr105733.c