diff mbox series

[Committed] RISC-V: Make PHI initial value occupy live V_REG in dynamic LMUL cost model analysis

Message ID 20231222230742.1807755-1-juzhe.zhong@rivai.ai
State New
Headers show
Series [Committed] RISC-V: Make PHI initial value occupy live V_REG in dynamic LMUL cost model analysis | expand

Commit Message

钟居哲 Dec. 22, 2023, 11:07 p.m. UTC
Consider this following case:

foo:
        ble     a0,zero,.L11
        lui     a2,%hi(.LANCHOR0)
        addi    sp,sp,-128
        addi    a2,a2,%lo(.LANCHOR0)
        mv      a1,a0
        vsetvli a6,zero,e32,m8,ta,ma
        vid.v   v8
        vs8r.v  v8,0(sp)                     ---> spill
.L3:
        vl8re32.v       v16,0(sp)            ---> reload
        vsetvli a4,a1,e8,m2,ta,ma
        li      a3,0
        vsetvli a5,zero,e32,m8,ta,ma
        vmv8r.v v0,v16
        vmv.v.x v8,a4
        vmv.v.i v24,0
        vadd.vv v8,v16,v8
        vmv8r.v v16,v24
        vs8r.v  v8,0(sp)                    ---> spill
.L4:
        addiw   a3,a3,1
        vadd.vv v8,v0,v16
        vadd.vi v16,v16,1
        vadd.vv v24,v24,v8
        bne     a0,a3,.L4
        vsetvli zero,a4,e32,m8,ta,ma
        sub     a1,a1,a4
        vse32.v v24,0(a2)
        slli    a4,a4,2
        add     a2,a2,a4
        bne     a1,zero,.L3
        li      a0,0
        addi    sp,sp,128
        jr      ra
.L11:
        li      a0,0
        ret

Pick unexpected LMUL = 8.

The root cause is we didn't involve PHI initial value in the dynamic LMUL calculation:

  # j_17 = PHI <j_11(9), 0(5)>                       ---> # vect_vec_iv_.8_24 = PHI <_25(9), { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }(5)>

We didn't count { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } in consuming vector register but it does allocate an vector register group for it.

This patch fixes this missing count. Then after this patch we pick up perfect LMUL (LMUL = M4)

foo:
	ble	a0,zero,.L9
	lui	a4,%hi(.LANCHOR0)
	addi	a4,a4,%lo(.LANCHOR0)
	mv	a2,a0
	vsetivli	zero,16,e32,m4,ta,ma
	vid.v	v20
.L3:
	vsetvli	a3,a2,e8,m1,ta,ma
	li	a5,0
	vsetivli	zero,16,e32,m4,ta,ma
	vmv4r.v	v16,v20
	vmv.v.i	v12,0
	vmv.v.x	v4,a3
	vmv4r.v	v8,v12
	vadd.vv	v20,v20,v4
.L4:
	addiw	a5,a5,1
	vmv4r.v	v4,v8
	vadd.vi	v8,v8,1
	vadd.vv	v4,v16,v4
	vadd.vv	v12,v12,v4
	bne	a0,a5,.L4
	slli	a5,a3,2
	vsetvli	zero,a3,e32,m4,ta,ma
	sub	a2,a2,a3
	vse32.v	v12,0(a4)
	add	a4,a4,a5
	bne	a2,zero,.L3
.L9:
	li	a0,0
	ret

Tested on --with-arch=gcv no regression.

	PR target/113112

gcc/ChangeLog:

	* config/riscv/riscv-vector-costs.cc (max_number_of_live_regs): Refine dump information.
	(preferred_new_lmul_p): Make PHI initial value into live regs calculation.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c: New test.

---
 gcc/config/riscv/riscv-vector-costs.cc        | 45 ++++++++++++++++---
 .../vect/costmodel/riscv/rvv/pr113112-1.c     | 31 +++++++++++++
 2 files changed, 71 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index a316603e207..946eb4a9fc6 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -355,10 +355,11 @@  max_number_of_live_regs (const basic_block bb,
     }
 
   if (dump_enabled_p ())
-    dump_printf_loc (MSG_NOTE, vect_location,
-		     "Maximum lmul = %d, %d number of live V_REG at program "
-		     "point %d for bb %d\n",
-		     lmul, max_nregs, live_point, bb->index);
+    dump_printf_loc (
+      MSG_NOTE, vect_location,
+      "Maximum lmul = %d, At most %d number of live V_REG at program "
+      "point %d for bb %d\n",
+      lmul, max_nregs, live_point, bb->index);
   return max_nregs;
 }
 
@@ -472,6 +473,41 @@  update_local_live_ranges (
 	      tree def = gimple_phi_arg_def (phi, j);
 	      auto *live_ranges = live_ranges_per_bb.get (bb);
 	      auto *live_range = live_ranges->get (def);
+	      if (poly_int_tree_p (def))
+		{
+		  /* Insert live range of INTEGER_CST or POLY_CST since we will
+		     need to allocate a vector register for it.
+
+		     E.g. # j_17 = PHI <j_11(9), 0(5)> will be transformed
+		     into # vect_vec_iv_.8_24 = PHI <_25(9), { 0, ... }(5)>
+
+		     The live range for such value is short which only lives
+		     from program point 0 to 1.  */
+		  if (live_range)
+		    {
+		      unsigned int start = (*live_range).first;
+		      (*live_range).first = 0;
+		      if (dump_enabled_p ())
+			dump_printf_loc (
+			  MSG_NOTE, vect_location,
+			  "Update %T start point from %d to 0:\n", def, start);
+		    }
+		  else
+		    {
+		      live_ranges->put (def, pair (0, 1));
+		      auto &program_points = (*program_points_per_bb.get (bb));
+		      if (program_points.is_empty ())
+			{
+			  stmt_point info = {1, phi};
+			  program_points.safe_push (info);
+			}
+		      if (dump_enabled_p ())
+			dump_printf_loc (MSG_NOTE, vect_location,
+					 "Add %T start point from 0 to 1:\n",
+					 def);
+		    }
+		  continue;
+		}
 	      if (live_range && flow_bb_inside_loop_p (loop, e->src))
 		{
 		  unsigned int start = (*live_range).first;
@@ -580,7 +616,6 @@  preferred_new_lmul_p (loop_vec_info other_loop_vinfo)
 				       biggest_mode, lmul);
 	  if (nregs > max_nregs)
 	    max_nregs = nregs;
-	  live_ranges_per_bb.empty ();
 	}
       live_ranges_per_bb.empty ();
       return max_nregs > V_REG_NUM;
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c
new file mode 100644
index 00000000000..a44a1c041af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic --param riscv-autovec-preference=fixed-vlmax -fdump-tree-vect-details" } */
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (int n){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < n; i++) {
+    sum = 0;
+    for (j = 0; j < n; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not {jr} } } */
+/* { dg-final { scan-assembler-times {ret} 1 } } */
+/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */
+/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */
+/* { dg-final { scan-tree-dump "At most 8 number of live V_REG at program point 0 for bb 4" "vect" } } */
+/* { dg-final { scan-tree-dump "At most 40 number of live V_REG at program point 0 for bb 3" "vect" } } */
+/* { dg-final { scan-tree-dump "At most 8 number of live V_REG at program point 0 for bb 5" "vect" } } */