Message ID | 20221130083717.14438-1-gaofei@eswincomputing.com |
---|---|
State | New |
Headers | show |
Series | RISC-V: optimize stack manipulation in save-restore | expand |
On Wed, 30 Nov 2022 00:37:17 PST (-0800), gaofei@eswincomputing.com wrote: > The stack that save-restore reserves is not well accumulated in stack allocation and deallocation. > This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled, > and also a much clear logic for save-restore stack manipulation. > > before patch: > bar: > call t0,__riscv_save_4 > addi sp,sp,-64 > ... > li t0,-12288 > addi t0,t0,-1968 # optimized out after patch > add sp,sp,t0 # prologue > ... > li t0,12288 # epilogue > addi t0,t0,2000 # optimized out after patch > add sp,sp,t0 > ... > addi sp,sp,32 > tail __riscv_restore_4 > > after patch: > bar: > call t0,__riscv_save_4 > addi sp,sp,-2032 > ... > li t0,-12288 > add sp,sp,t0 # prologue > ... > li t0,12288 # epilogue > add sp,sp,t0 > ... > addi sp,sp,2032 > tail __riscv_restore_4 > > gcc/ChangeLog: > > * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size. > (riscv_compute_frame_info): adapt new riscv_first_stack_step interface. > (riscv_expand_prologue): consider save-restore in stack allocation. > (riscv_expand_epilogue): consider save-restore in stack deallocation. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/stack_save_restore.c: New test. > --- > gcc/config/riscv/riscv.cc | 58 ++++++++++--------- > .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++ > 2 files changed, 70 insertions(+), 28 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c I guess with the RISC-V backend still being open for things as big as the V port we should probably be taking code like this as well? I wouldn't be opposed to making an exception for the V code and holding everything else back, though. > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > index 05bdba5ab4d..9e92e729a5f 100644 > --- a/gcc/config/riscv/riscv.cc > +++ b/gcc/config/riscv/riscv.cc > @@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask) > They decrease stack_pointer_rtx but leave frame_pointer_rtx and > hard_frame_pointer_rtx unchanged. */ > > -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame); > +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size); > > /* Handle stack align for poly_int. */ > static poly_int64 > @@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void) > save/restore t0. We check for this before clearing the frame struct. */ > if (cfun->machine->interrupt_handler_p) > { > - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); > + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); > if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))) > interrupt_save_prologue_temp = true; > } > @@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem) > without adding extra instructions. */ > > static HOST_WIDE_INT > -riscv_first_stack_step (struct riscv_frame_info *frame) > +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size) > { > - HOST_WIDE_INT frame_total_constant_size; > - if (!frame->total_size.is_constant ()) > - frame_total_constant_size > - = riscv_stack_align (frame->total_size.coeffs[0]) > - - riscv_stack_align (frame->total_size.coeffs[1]); > + HOST_WIDE_INT remaining_const_size; > + if (!remaining_size.is_constant ()) > + remaining_const_size > + = riscv_stack_align (remaining_size.coeffs[0]) > + - riscv_stack_align (remaining_size.coeffs[1]); The alignment looks off here, at least in the email. Worth fixing it up if you're touching the lines anyway. > else > - frame_total_constant_size = frame->total_size.to_constant (); > + remaining_const_size = remaining_size.to_constant (); > > - if (SMALL_OPERAND (frame_total_constant_size)) > - return frame_total_constant_size; > + if (SMALL_OPERAND (remaining_const_size)) > + return remaining_const_size; > > HOST_WIDE_INT min_first_step = > - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant()); > + RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant()); > HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; > - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step; > + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step; > gcc_assert (min_first_step <= max_first_step); > > /* As an optimization, use the least-significant bits of the total frame > size, so that the second adjustment step is just LUI + ADD. */ > if (!SMALL_OPERAND (min_second_step) > - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2 > - && frame_total_constant_size % IMM_REACH >= min_first_step) > - return frame_total_constant_size % IMM_REACH; > + && remaining_const_size % IMM_REACH < IMM_REACH / 2 > + && remaining_const_size % IMM_REACH >= min_first_step) > + return remaining_const_size % IMM_REACH; Looks like this entire frame->total_size -> remaining_size conversion could be done as an independent patch that would change no functionality, that's always a nice way to do things as it makes the code easier to read. I spent a bit poking around here and nothing wrong is jumping out, but trying to keep all these offset differences in my head is a bit tricky. If you have the time to refactor this to be easier to read that'd be great, otherwise hopefully I (or someone else) will have the time to take a look -- probably not today on my end, though, as I've got some Linux backlog to look at. Thanks! > if (TARGET_RVC) > { > @@ -5037,9 +5037,7 @@ riscv_expand_prologue (void) > /* Save the registers. */ > if ((frame->mask | frame->fmask) != 0) > { > - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); > - if (size.is_constant ()) > - step1 = MIN (size.to_constant(), step1); > + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size); > > insn = gen_add3_insn (stack_pointer_rtx, > stack_pointer_rtx, > @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style) > HOST_WIDE_INT step2 = 0; > bool use_restore_libcall = ((style == NORMAL_RETURN) > && riscv_use_save_libcall (frame)); > + unsigned libcall_size = use_restore_libcall ? > + frame->save_libcall_adjustment : 0; > rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); > rtx insn; > > @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style) > REG_NOTES (insn) = dwarf; > } > > + if (use_restore_libcall) > + frame->mask = 0; /* Temporarily fib for GPRs. */ > + > /* If we need to restore registers, deallocate as much stack as > possible in the second step without going out of range. */ > if ((frame->mask | frame->fmask) != 0) > - { > - step2 = riscv_first_stack_step (frame); > - step1 -= step2; > - } > + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); > + > + if (use_restore_libcall) > + frame->mask = mask; /* Undo the above fib. */ > + > + step1 -= step2 + libcall_size; > > /* Set TARGET to BASE + STEP1. */ > if (known_gt (step1, 0)) > @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style) > frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ > > /* Restore the registers. */ > - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, > + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size, > + riscv_restore_reg, > true, style == EXCEPTION_RETURN); > > if (use_restore_libcall) > - { > frame->mask = mask; /* Undo the above fib. */ > - gcc_assert (step2 >= frame->save_libcall_adjustment); > - step2 -= frame->save_libcall_adjustment; > - } > > if (need_barrier_p) > riscv_emit_stack_tie (); > diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c > new file mode 100644 > index 00000000000..4695ef9469a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +char my_getchar(); > +float getf(); > + > +/* > +**bar: > +** call t0,__riscv_save_4 > +** addi sp,sp,-2032 > +** ... > +** li t0,-12288 > +** add sp,sp,t0 > +** ... > +** li t0,12288 > +** add sp,sp,t0 > +** ... > +** addi sp,sp,2032 > +** tail __riscv_restore_4 > +*/ The test needs to actually check this, it can't just be manual. > +int bar() > +{ > + float volatile farray[3568]; > + > + float sum = 0; > + float f1 = getf(); > + float f2 = getf(); > + float f3 = getf(); > + float f4 = getf(); > + > + for (int i = 0; i < 3568; i++) > + { > + farray[i] = my_getchar() * 1.2; > + sum += farray[i]; > + } > + > + return sum + f1 + f2 + f3 + f4; > +} > +
On 2022-12-01 06:50 Palmer Dabbelt <palmer@dabbelt.com> wrote: > >On Wed, 30 Nov 2022 00:37:17 PST (-0800), gaofei@eswincomputing.com wrote: >> The stack that save-restore reserves is not well accumulated in stack allocation and deallocation. >> This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled, >> and also a much clear logic for save-restore stack manipulation. >> >> before patch: >> bar: >> call t0,__riscv_save_4 >> addi sp,sp,-64 >> ... >> li t0,-12288 >> addi t0,t0,-1968 # optimized out after patch >> add sp,sp,t0 # prologue >> ... >> li t0,12288 # epilogue >> addi t0,t0,2000 # optimized out after patch >> add sp,sp,t0 >> ... >> addi sp,sp,32 >> tail __riscv_restore_4 >> >> after patch: >> bar: >> call t0,__riscv_save_4 >> addi sp,sp,-2032 >> ... >> li t0,-12288 >> add sp,sp,t0 # prologue >> ... >> li t0,12288 # epilogue >> add sp,sp,t0 >> ... >> addi sp,sp,2032 >> tail __riscv_restore_4 >> >> gcc/ChangeLog: >> >> * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size. >> (riscv_compute_frame_info): adapt new riscv_first_stack_step interface. >> (riscv_expand_prologue): consider save-restore in stack allocation. >> (riscv_expand_epilogue): consider save-restore in stack deallocation. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/riscv/stack_save_restore.c: New test. >> --- >> gcc/config/riscv/riscv.cc | 58 ++++++++++--------- >> .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++ >> 2 files changed, 70 insertions(+), 28 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c > >I guess with the RISC-V backend still being open for things as big as >the V port we should probably be taking code like this as well? I >wouldn't be opposed to making an exception for the V code and holding >everything else back, though. > >> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc >> index 05bdba5ab4d..9e92e729a5f 100644 >> --- a/gcc/config/riscv/riscv.cc >> +++ b/gcc/config/riscv/riscv.cc >> @@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask) >> They decrease stack_pointer_rtx but leave frame_pointer_rtx and >> hard_frame_pointer_rtx unchanged. */ >> >> -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame); >> +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size); >> >> /* Handle stack align for poly_int. */ >> static poly_int64 >> @@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void) >> save/restore t0. We check for this before clearing the frame struct. */ >> if (cfun->machine->interrupt_handler_p) >> { >> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); >> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); >> if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))) >> interrupt_save_prologue_temp = true; >> } >> @@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem) >> without adding extra instructions. */ >> >> static HOST_WIDE_INT >> -riscv_first_stack_step (struct riscv_frame_info *frame) >> +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size) >> { >> - HOST_WIDE_INT frame_total_constant_size; >> - if (!frame->total_size.is_constant ()) >> - frame_total_constant_size >> - = riscv_stack_align (frame->total_size.coeffs[0]) >> - - riscv_stack_align (frame->total_size.coeffs[1]); >> + HOST_WIDE_INT remaining_const_size; >> + if (!remaining_size.is_constant ()) >> + remaining_const_size >> + = riscv_stack_align (remaining_size.coeffs[0]) >> + - riscv_stack_align (remaining_size.coeffs[1]); > >The alignment looks off here, at least in the email. Worth fixing it up >if you're touching the lines anyway. Sure, i will change RISCV_STACK_ALIGN into riscv_stack_align. > >> else >> - frame_total_constant_size = frame->total_size.to_constant (); >> + remaining_const_size = remaining_size.to_constant (); >> >> - if (SMALL_OPERAND (frame_total_constant_size)) >> - return frame_total_constant_size; >> + if (SMALL_OPERAND (remaining_const_size)) >> + return remaining_const_size; >> >> HOST_WIDE_INT min_first_step = >> - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant()); >> + RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant()); >> HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; >> - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step; >> + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step; >> gcc_assert (min_first_step <= max_first_step); >> >> /* As an optimization, use the least-significant bits of the total frame >> size, so that the second adjustment step is just LUI + ADD. */ >> if (!SMALL_OPERAND (min_second_step) >> - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2 >> - && frame_total_constant_size % IMM_REACH >= min_first_step) >> - return frame_total_constant_size % IMM_REACH; >> + && remaining_const_size % IMM_REACH < IMM_REACH / 2 >> + && remaining_const_size % IMM_REACH >= min_first_step) >> + return remaining_const_size % IMM_REACH; > >Looks like this entire frame->total_size -> remaining_size conversion >could be done as an independent patch that would change no >functionality, that's always a nice way to do things as it makes the >code easier to read. Sure, i will split this patch. > >I spent a bit poking around here and nothing wrong is jumping out, but >trying to keep all these offset differences in my head is a bit tricky. >If you have the time to refactor this to be easier to read that'd be >great, otherwise hopefully I (or someone else) will have the time to >take a look -- probably not today on my end, though, as I've got some >Linux backlog to look at. I'll try it. Also i propose to add step0 for pre-allocated stack cases like save-restore lib call, and the future Zcmp in Zc* extension if needed. So we have a clear logic of manipulate stack step by step. > >Thanks! > >> if (TARGET_RVC) >> { >> @@ -5037,9 +5037,7 @@ riscv_expand_prologue (void) >> /* Save the registers. */ >> if ((frame->mask | frame->fmask) != 0) >> { >> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); >> - if (size.is_constant ()) >> - step1 = MIN (size.to_constant(), step1); >> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size); >> >> insn = gen_add3_insn (stack_pointer_rtx, >> stack_pointer_rtx, >> @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style) >> HOST_WIDE_INT step2 = 0; >> bool use_restore_libcall = ((style == NORMAL_RETURN) >> && riscv_use_save_libcall (frame)); >> + unsigned libcall_size = use_restore_libcall ? >> + frame->save_libcall_adjustment : 0; >> rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); >> rtx insn; >> >> @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style) >> REG_NOTES (insn) = dwarf; >> } >> >> + if (use_restore_libcall) >> + frame->mask = 0; /* Temporarily fib for GPRs. */ >> + >> /* If we need to restore registers, deallocate as much stack as >> possible in the second step without going out of range. */ >> if ((frame->mask | frame->fmask) != 0) >> - { >> - step2 = riscv_first_stack_step (frame); >> - step1 -= step2; >> - } >> + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); >> + >> + if (use_restore_libcall) >> + frame->mask = mask; /* Undo the above fib. */ >> + >> + step1 -= step2 + libcall_size; >> >> /* Set TARGET to BASE + STEP1. */ >> if (known_gt (step1, 0)) >> @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style) >> frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ >> >> /* Restore the registers. */ >> - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, >> + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size, >> + riscv_restore_reg, >> true, style == EXCEPTION_RETURN); >> >> if (use_restore_libcall) >> - { >> frame->mask = mask; /* Undo the above fib. */ >> - gcc_assert (step2 >= frame->save_libcall_adjustment); >> - step2 -= frame->save_libcall_adjustment; >> - } >> >> if (need_barrier_p) >> riscv_emit_stack_tie (); >> diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c >> new file mode 100644 >> index 00000000000..4695ef9469a >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c >> @@ -0,0 +1,40 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */ >> +/* { dg-final { check-function-bodies "**" "" } } */ >> + >> +char my_getchar(); >> +float getf(); >> + >> +/* >> +**bar: >> +** call t0,__riscv_save_4 >> +** addi sp,sp,-2032 >> +** ... >> +** li t0,-12288 >> +** add sp,sp,t0 >> +** ... >> +** li t0,12288 >> +** add sp,sp,t0 >> +** ... >> +** addi sp,sp,2032 >> +** tail __riscv_restore_4 >> +*/ > >The test needs to actually check this, it can't just be manual. I didn't get your point. The { dg-final { check-function-bodies "**" "" } } matches the output with the expected result above automatically. Please let me know your idea. Thanks & BR, Fei > >> +int bar() >> +{ >> + float volatile farray[3568]; >> + >> + float sum = 0; >> + float f1 = getf(); >> + float f2 = getf(); >> + float f3 = getf(); >> + float f4 = getf(); >> + >> + for (int i = 0; i < 3568; i++) >> + { >> + farray[i] = my_getchar() * 1.2; >> + sum += farray[i]; >> + } >> + >> + return sum + f1 + f2 + f3 + f4; >> +} >> +
Hi Palmer and all, I have split the patches and triggerred a new thread. https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg297206.html Could you please review at your convenience? Thanks & BR, Fei On 2022-12-01 11:07 Fei Gao <gaofei@eswincomputing.com> wrote: > >On 2022-12-01 06:50 Palmer Dabbelt <palmer@dabbelt.com> wrote: >> >>On Wed, 30 Nov 2022 00:37:17 PST (-0800), gaofei@eswincomputing.com wrote: >>> The stack that save-restore reserves is not well accumulated in stack allocation and deallocation. >>> This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled, >>> and also a much clear logic for save-restore stack manipulation. >>> >>> before patch: >>> bar: >>> call t0,__riscv_save_4 >>> addi sp,sp,-64 >>> ... >>> li t0,-12288 >>> addi t0,t0,-1968 # optimized out after patch >>> add sp,sp,t0 # prologue >>> ... >>> li t0,12288 # epilogue >>> addi t0,t0,2000 # optimized out after patch >>> add sp,sp,t0 >>> ... >>> addi sp,sp,32 >>> tail __riscv_restore_4 >>> >>> after patch: >>> bar: >>> call t0,__riscv_save_4 >>> addi sp,sp,-2032 >>> ... >>> li t0,-12288 >>> add sp,sp,t0 # prologue >>> ... >>> li t0,12288 # epilogue >>> add sp,sp,t0 >>> ... >>> addi sp,sp,2032 >>> tail __riscv_restore_4 >>> >>> gcc/ChangeLog: >>> >>> * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size. >>> (riscv_compute_frame_info): adapt new riscv_first_stack_step interface. >>> (riscv_expand_prologue): consider save-restore in stack allocation. >>> (riscv_expand_epilogue): consider save-restore in stack deallocation. >>> >>> gcc/testsuite/ChangeLog: >>> >>> * gcc.target/riscv/stack_save_restore.c: New test. >>> --- >>> gcc/config/riscv/riscv.cc | 58 ++++++++++--------- >>> .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++ >>> 2 files changed, 70 insertions(+), 28 deletions(-) >>> create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c >> >>I guess with the RISC-V backend still being open for things as big as >>the V port we should probably be taking code like this as well? I >>wouldn't be opposed to making an exception for the V code and holding >>everything else back, though. >> >>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc >>> index 05bdba5ab4d..9e92e729a5f 100644 >>> --- a/gcc/config/riscv/riscv.cc >>> +++ b/gcc/config/riscv/riscv.cc >>> @@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask) >>> They decrease stack_pointer_rtx but leave frame_pointer_rtx and >>> hard_frame_pointer_rtx unchanged. */ >>> >>> -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame); >>> +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size); >>> >>> /* Handle stack align for poly_int. */ >>> static poly_int64 >>> @@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void) >>> save/restore t0. We check for this before clearing the frame struct. */ >>> if (cfun->machine->interrupt_handler_p) >>> { >>> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); >>> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); >>> if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))) >>> interrupt_save_prologue_temp = true; >>> } >>> @@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem) >>> without adding extra instructions. */ >>> >>> static HOST_WIDE_INT >>> -riscv_first_stack_step (struct riscv_frame_info *frame) >>> +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size) >>> { >>> - HOST_WIDE_INT frame_total_constant_size; >>> - if (!frame->total_size.is_constant ()) >>> - frame_total_constant_size >>> - = riscv_stack_align (frame->total_size.coeffs[0]) >>> - - riscv_stack_align (frame->total_size.coeffs[1]); >>> + HOST_WIDE_INT remaining_const_size; >>> + if (!remaining_size.is_constant ()) >>> + remaining_const_size >>> + = riscv_stack_align (remaining_size.coeffs[0]) >>> + - riscv_stack_align (remaining_size.coeffs[1]); >> >>The alignment looks off here, at least in the email. Worth fixing it up >>if you're touching the lines anyway. > >Sure, i will change RISCV_STACK_ALIGN into riscv_stack_align. > >> >>> else >>> - frame_total_constant_size = frame->total_size.to_constant (); >>> + remaining_const_size = remaining_size.to_constant (); >>> >>> - if (SMALL_OPERAND (frame_total_constant_size)) >>> - return frame_total_constant_size; >>> + if (SMALL_OPERAND (remaining_const_size)) >>> + return remaining_const_size; >>> >>> HOST_WIDE_INT min_first_step = >>> - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant()); >>> + RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant()); >>> HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; >>> - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step; >>> + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step; >>> gcc_assert (min_first_step <= max_first_step); >>> >>> /* As an optimization, use the least-significant bits of the total frame >>> size, so that the second adjustment step is just LUI + ADD. */ >>> if (!SMALL_OPERAND (min_second_step) >>> - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2 >>> - && frame_total_constant_size % IMM_REACH >= min_first_step) >>> - return frame_total_constant_size % IMM_REACH; >>> + && remaining_const_size % IMM_REACH < IMM_REACH / 2 >>> + && remaining_const_size % IMM_REACH >= min_first_step) >>> + return remaining_const_size % IMM_REACH; >> >>Looks like this entire frame->total_size -> remaining_size conversion >>could be done as an independent patch that would change no >>functionality, that's always a nice way to do things as it makes the >>code easier to read. > >Sure, i will split this patch. > >> >>I spent a bit poking around here and nothing wrong is jumping out, but >>trying to keep all these offset differences in my head is a bit tricky. >>If you have the time to refactor this to be easier to read that'd be >>great, otherwise hopefully I (or someone else) will have the time to >>take a look -- probably not today on my end, though, as I've got some >>Linux backlog to look at. > >I'll try it. Also i propose to add step0 for pre-allocated stack cases like save-restore lib call, >and the future Zcmp in Zc* extension if needed. >So we have a clear logic of manipulate stack step by step. > >> >>Thanks! >> >>> if (TARGET_RVC) >>> { >>> @@ -5037,9 +5037,7 @@ riscv_expand_prologue (void) >>> /* Save the registers. */ >>> if ((frame->mask | frame->fmask) != 0) >>> { >>> - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); >>> - if (size.is_constant ()) >>> - step1 = MIN (size.to_constant(), step1); >>> + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size); >>> >>> insn = gen_add3_insn (stack_pointer_rtx, >>> stack_pointer_rtx, >>> @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style) >>> HOST_WIDE_INT step2 = 0; >>> bool use_restore_libcall = ((style == NORMAL_RETURN) >>> && riscv_use_save_libcall (frame)); >>> + unsigned libcall_size = use_restore_libcall ? >>> + frame->save_libcall_adjustment : 0; >>> rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); >>> rtx insn; >>> >>> @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style) >>> REG_NOTES (insn) = dwarf; >>> } >>> >>> + if (use_restore_libcall) >>> + frame->mask = 0; /* Temporarily fib for GPRs. */ >>> + >>> /* If we need to restore registers, deallocate as much stack as >>> possible in the second step without going out of range. */ >>> if ((frame->mask | frame->fmask) != 0) >>> - { >>> - step2 = riscv_first_stack_step (frame); >>> - step1 -= step2; >>> - } >>> + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); >>> + >>> + if (use_restore_libcall) >>> + frame->mask = mask; /* Undo the above fib. */ >>> + >>> + step1 -= step2 + libcall_size; >>> >>> /* Set TARGET to BASE + STEP1. */ >>> if (known_gt (step1, 0)) >>> @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style) >>> frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ >>> >>> /* Restore the registers. */ >>> - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, >>> + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size, >>> + riscv_restore_reg, >>> true, style == EXCEPTION_RETURN); >>> >>> if (use_restore_libcall) >>> - { >>> frame->mask = mask; /* Undo the above fib. */ >>> - gcc_assert (step2 >= frame->save_libcall_adjustment); >>> - step2 -= frame->save_libcall_adjustment; >>> - } >>> >>> if (need_barrier_p) >>> riscv_emit_stack_tie (); >>> diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c >>> new file mode 100644 >>> index 00000000000..4695ef9469a >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c >>> @@ -0,0 +1,40 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */ >>> +/* { dg-final { check-function-bodies "**" "" } } */ >>> + >>> +char my_getchar(); >>> +float getf(); >>> + >>> +/* >>> +**bar: >>> +** call t0,__riscv_save_4 >>> +** addi sp,sp,-2032 >>> +** ... >>> +** li t0,-12288 >>> +** add sp,sp,t0 >>> +** ... >>> +** li t0,12288 >>> +** add sp,sp,t0 >>> +** ... >>> +** addi sp,sp,2032 >>> +** tail __riscv_restore_4 >>> +*/ >> >>The test needs to actually check this, it can't just be manual. > >I didn't get your point. >The { dg-final { check-function-bodies "**" "" } } matches the output with the expected result above automatically. >Please let me know your idea. > >Thanks & BR, >Fei > >> >>> +int bar() >>> +{ >>> + float volatile farray[3568]; >>> + >>> + float sum = 0; >>> + float f1 = getf(); >>> + float f2 = getf(); >>> + float f3 = getf(); >>> + float f4 = getf(); >>> + >>> + for (int i = 0; i < 3568; i++) >>> + { >>> + farray[i] = my_getchar() * 1.2; >>> + sum += farray[i]; >>> + } >>> + >>> + return sum + f1 + f2 + f3 + f4; >>> +} >>> +
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 05bdba5ab4d..9e92e729a5f 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask) They decrease stack_pointer_rtx but leave frame_pointer_rtx and hard_frame_pointer_rtx unchanged. */ -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame); +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size); /* Handle stack align for poly_int. */ static poly_int64 @@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void) save/restore t0. We check for this before clearing the frame struct. */ if (cfun->machine->interrupt_handler_p) { - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))) interrupt_save_prologue_temp = true; } @@ -4913,31 +4913,31 @@ riscv_restore_reg (rtx reg, rtx mem) without adding extra instructions. */ static HOST_WIDE_INT -riscv_first_stack_step (struct riscv_frame_info *frame) +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size) { - HOST_WIDE_INT frame_total_constant_size; - if (!frame->total_size.is_constant ()) - frame_total_constant_size - = riscv_stack_align (frame->total_size.coeffs[0]) - - riscv_stack_align (frame->total_size.coeffs[1]); + HOST_WIDE_INT remaining_const_size; + if (!remaining_size.is_constant ()) + remaining_const_size + = riscv_stack_align (remaining_size.coeffs[0]) + - riscv_stack_align (remaining_size.coeffs[1]); else - frame_total_constant_size = frame->total_size.to_constant (); + remaining_const_size = remaining_size.to_constant (); - if (SMALL_OPERAND (frame_total_constant_size)) - return frame_total_constant_size; + if (SMALL_OPERAND (remaining_const_size)) + return remaining_const_size; HOST_WIDE_INT min_first_step = - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant()); + RISCV_STACK_ALIGN ((remaining_size - frame->frame_pointer_offset).to_constant()); HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step; + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step; gcc_assert (min_first_step <= max_first_step); /* As an optimization, use the least-significant bits of the total frame size, so that the second adjustment step is just LUI + ADD. */ if (!SMALL_OPERAND (min_second_step) - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2 - && frame_total_constant_size % IMM_REACH >= min_first_step) - return frame_total_constant_size % IMM_REACH; + && remaining_const_size % IMM_REACH < IMM_REACH / 2 + && remaining_const_size % IMM_REACH >= min_first_step) + return remaining_const_size % IMM_REACH; if (TARGET_RVC) { @@ -5037,9 +5037,7 @@ riscv_expand_prologue (void) /* Save the registers. */ if ((frame->mask | frame->fmask) != 0) { - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); - if (size.is_constant ()) - step1 = MIN (size.to_constant(), step1); + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, size); insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, @@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style) HOST_WIDE_INT step2 = 0; bool use_restore_libcall = ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame)); + unsigned libcall_size = use_restore_libcall ? + frame->save_libcall_adjustment : 0; rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); rtx insn; @@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style) REG_NOTES (insn) = dwarf; } + if (use_restore_libcall) + frame->mask = 0; /* Temporarily fib for GPRs. */ + /* If we need to restore registers, deallocate as much stack as possible in the second step without going out of range. */ if ((frame->mask | frame->fmask) != 0) - { - step2 = riscv_first_stack_step (frame); - step1 -= step2; - } + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); + + if (use_restore_libcall) + frame->mask = mask; /* Undo the above fib. */ + + step1 -= step2 + libcall_size; /* Set TARGET to BASE + STEP1. */ if (known_gt (step1, 0)) @@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style) frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ /* Restore the registers. */ - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size, + riscv_restore_reg, true, style == EXCEPTION_RETURN); if (use_restore_libcall) - { frame->mask = mask; /* Undo the above fib. */ - gcc_assert (step2 >= frame->save_libcall_adjustment); - step2 -= frame->save_libcall_adjustment; - } if (need_barrier_p) riscv_emit_stack_tie (); diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c new file mode 100644 index 00000000000..4695ef9469a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +char my_getchar(); +float getf(); + +/* +**bar: +** call t0,__riscv_save_4 +** addi sp,sp,-2032 +** ... +** li t0,-12288 +** add sp,sp,t0 +** ... +** li t0,12288 +** add sp,sp,t0 +** ... +** addi sp,sp,2032 +** tail __riscv_restore_4 +*/ +int bar() +{ + float volatile farray[3568]; + + float sum = 0; + float f1 = getf(); + float f2 = getf(); + float f3 = getf(); + float f4 = getf(); + + for (int i = 0; i < 3568; i++) + { + farray[i] = my_getchar() * 1.2; + sum += farray[i]; + } + + return sum + f1 + f2 + f3 + f4; +} +