diff mbox series

[2/4] RISC-V: Implement TARGET_SCHED_PRESSURE_PREFER_NARROW [PR/114729]

Message ID 20241020194018.3051160-3-vineetg@rivosinc.com
State New
Headers show
Series sched1 improvements | expand

Commit Message

Vineet Gupta Oct. 20, 2024, 7:40 p.m. UTC
This inhibits sched1 aggressive spilling on RISC-V (see prev commit for
details of what the hook does).

On RISC-V (BPI-F3) we see good results.
(Build: -Ofast -march=rv64gcv_zba_zbb_zbs)

  Before:
  ------
  Performance counter stats for './cactusBSSN_r_base.rivos spec_ref.par':

      4,769,844.10 msec task-clock:u                     #    1.000 CPUs utilized
             6,029      context-switches:u               #    1.264 /sec
                 0      cpu-migrations:u                 #    0.000 /sec
           201,468      page-faults:u                    #   42.238 /sec
 7,631,707,552,979      cycles:u                         #    1.600 GHz
 2,630,225,489,010      instructions:u                   #    0.34  insn per cycle
    10,592,305,077      branches:u                       #    2.221 M/sec
        16,274,388      branch-misses:u                  #    0.15% of all branches

  After:
  -----
  Performance counter stats for './cactusBSSN_r_base.rivos spec_ref.par':

      4,471,770.20 msec task-clock:u                     #    0.998 CPUs utilized
           159,245      context-switches:u               #   35.611 /sec
                 2      cpu-migrations:u                 #    0.000 /sec
           204,065      page-faults:u                    #   45.634 /sec
 7,153,778,156,281      cycles:u         ( 6% faster)    #    1.600 GHz
 2,143,115,846,207      instructions:u   (18.5% fewer)   #    0.30  insn per cycle
    10,592,316,035      branches:u                       #    2.369 M/sec
        17,229,411      branch-misses:u                  #    0.16% of all branches

Similarly, good results on Cactu on aarch64 as well (qemu dynamic icounts only)
(Build: -march=armv9-a+sve2)

  Before: 1,382,403,783,566
   After: 1,264,869,192,921 (8.5% improv)

gcc/ChangeLog:
	PR target/114729
	* config/riscv/riscv.cc (TARGET_SCHED_PRESSURE_PREFER_NARROW):
	Define to true.

gcc/testsuite/ChangeLog:
	PR target/114729
	* gcc.target/riscv/riscv.exp: Enable new tests to build.
	* gcc.target/riscv/sched1-spills/spill1.cpp: Add new test.

Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
---
 gcc/config/riscv/riscv.cc                     |  3 ++
 gcc/testsuite/gcc.target/riscv/riscv.exp      |  2 ++
 .../gcc.target/riscv/sched1-spills/spill1.cpp | 31 +++++++++++++++++++
 3 files changed, 36 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sched1-spills/spill1.cpp

Comments

Bernhard Reutner-Fischer Oct. 22, 2024, 7:02 p.m. UTC | #1
On 20 October 2024 21:40:16 CEST, Vineet Gupta <vineetg@rivosinc.com> wrote:

>diff --git a/gcc/testsuite/gcc.target/riscv/sched1-spills/spill1.cpp b/gcc/testsuite/gcc.target/riscv/sched1-spills/spill1.cpp
>new file mode 100644
>index 000000000000..47a9d1a01ab4
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/riscv/sched1-spills/spill1.cpp
>

>+/* { dg-final { scan-assembler-times "%sfp" 0 } } */

scan-assembler-not, please
Vineet Gupta Oct. 23, 2024, 4:37 p.m. UTC | #2
On 10/22/24 12:02, rep.dot.nop@gmail.com wrote:
>> +/* { dg-final { scan-assembler-times "%sfp" 0 } } */
> scan-assembler-not, please

Fixed and also in the other patch.

Thx,
-Vineet
Jeff Law Oct. 30, 2024, 8:34 p.m. UTC | #3
On 10/20/24 1:40 PM, Vineet Gupta wrote:
> This inhibits sched1 aggressive spilling on RISC-V (see prev commit for
> details of what the hook does).
> 
> On RISC-V (BPI-F3) we see good results.
> (Build: -Ofast -march=rv64gcv_zba_zbb_zbs)
> 
>    Before:
>    ------
>    Performance counter stats for './cactusBSSN_r_base.rivos spec_ref.par':
> 
>        4,769,844.10 msec task-clock:u                     #    1.000 CPUs utilized
>               6,029      context-switches:u               #    1.264 /sec
>                   0      cpu-migrations:u                 #    0.000 /sec
>             201,468      page-faults:u                    #   42.238 /sec
>   7,631,707,552,979      cycles:u                         #    1.600 GHz
>   2,630,225,489,010      instructions:u                   #    0.34  insn per cycle
>      10,592,305,077      branches:u                       #    2.221 M/sec
>          16,274,388      branch-misses:u                  #    0.15% of all branches
> 
>    After:
>    -----
>    Performance counter stats for './cactusBSSN_r_base.rivos spec_ref.par':
> 
>        4,471,770.20 msec task-clock:u                     #    0.998 CPUs utilized
>             159,245      context-switches:u               #   35.611 /sec
>                   2      cpu-migrations:u                 #    0.000 /sec
>             204,065      page-faults:u                    #   45.634 /sec
>   7,153,778,156,281      cycles:u         ( 6% faster)    #    1.600 GHz
>   2,143,115,846,207      instructions:u   (18.5% fewer)   #    0.30  insn per cycle
>      10,592,316,035      branches:u                       #    2.369 M/sec
>          17,229,411      branch-misses:u                  #    0.16% of all branches
> 
> Similarly, good results on Cactu on aarch64 as well (qemu dynamic icounts only)
> (Build: -march=armv9-a+sve2)
> 
>    Before: 1,382,403,783,566
>     After: 1,264,869,192,921 (8.5% improv)
> 
> gcc/ChangeLog:
> 	PR target/114729
> 	* config/riscv/riscv.cc (TARGET_SCHED_PRESSURE_PREFER_NARROW):
> 	Define to true.
> 
> gcc/testsuite/ChangeLog:
> 	PR target/114729
> 	* gcc.target/riscv/riscv.exp: Enable new tests to build.
> 	* gcc.target/riscv/sched1-spills/spill1.cpp: Add new test.
This is fine once we finalize naming for patch #1 and update this patch 
for whatever final name is selected.

I worry ever-so-slightly about the testcase being overly-sensitive to 
unrelated changes, but not overly so.  If it turns out to require 
regular twiddling, then we'll deal with it at that time.


jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3ac40234345a..46f775b3cdcb 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12616,6 +12616,9 @@  riscv_stack_clash_protection_alloca_probe_range (void)
 #undef  TARGET_SCHED_ADJUST_COST
 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
 
+#undef  TARGET_SCHED_PRESSURE_PREFER_NARROW
+#define TARGET_SCHED_PRESSURE_PREFER_NARROW hook_bool_void_true
+
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
 
diff --git a/gcc/testsuite/gcc.target/riscv/riscv.exp b/gcc/testsuite/gcc.target/riscv/riscv.exp
index 187eb6640470..3cbbf63b9d0a 100644
--- a/gcc/testsuite/gcc.target/riscv/riscv.exp
+++ b/gcc/testsuite/gcc.target/riscv/riscv.exp
@@ -38,6 +38,8 @@  dg-init
 # Main loop.
 gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
 	"" $DEFAULT_CFLAGS
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sched1-spills/*.{\[cS\],cpp}]] \
+	"" $DEFAULT_CFLAGS
 
 # All done.
 dg-finish
diff --git a/gcc/testsuite/gcc.target/riscv/sched1-spills/spill1.cpp b/gcc/testsuite/gcc.target/riscv/sched1-spills/spill1.cpp
new file mode 100644
index 000000000000..47a9d1a01ab4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sched1-spills/spill1.cpp
@@ -0,0 +1,31 @@ 
+/* Reduced from SPEC2017 Cactu ML_BSSN_Advect.cpp
+   by comparing -fschedule-insn and -fno-schedule-insns builds.
+   Shows up one extra spill (pair of spill markers "sfp") in verbose asm
+   output which the patch fixes.  */
+
+/* { dg-options "-O2 -march=rv64gc -mabi=lp64d -save-temps -fverbose-asm" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-final { scan-assembler-times "%sfp" 0 } } */
+
+void s();
+double b, c, d, e, f, g, h, k, l, m, n, o, p, q, t, u, v;
+int *j;
+double *r, *w;
+long x;
+void y() {
+  double *a((double *)s);
+  for (;;)
+    for (; j[1];)
+      for (int i = 1; i < j[0]; i++) {
+        k = l;
+        m = n;
+        o = p = q;
+        r[0] = t;
+        a[0] = u;
+        x = g;
+        e = f;
+        v = w[x];
+        b = c;
+        d = h;
+        }
+}