diff mbox series

RISC-V: Use VLS modes if the NITERS is known and smaller than VLS mode elements.

Message ID 20231016082715.3417414-1-juzhe.zhong@rivai.ai
State New
Headers show
Series RISC-V: Use VLS modes if the NITERS is known and smaller than VLS mode elements. | expand

Commit Message

钟居哲 Oct. 16, 2023, 8:27 a.m. UTC
void
foo8 (int64_t *restrict a)
{
  for (int i = 0; i < 16; ++i)
    a[i] = a[i]-16;
}

We use VLS modes instead of VLA modes even it is specified by dynamic LMUL.

gcc/ChangeLog:

	* config/riscv/riscv-vector-costs.cc (costs::preferred_new_lmul_p): Use VLS modes.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c: New test.

---
 gcc/config/riscv/riscv-vector-costs.cc        | 13 ++--
 .../costmodel/riscv/rvv/no-dynamic-lmul-1.c   | 64 +++++++++++++++++++
 2 files changed, 73 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c

Comments

Robin Dapp Oct. 16, 2023, 9:12 a.m. UTC | #1
Hi Juzhe,

this LGTM.  I was first concerned whether we would want to
stop e.g. at LMUL = 1 and only continue with a specific flag but
actually this should be done via the costs.  If an implementation
wants to penalize or incentivize some behavior it can always
adjust the costs which should be sufficient.

Regards
 Robin
钟居哲 Oct. 16, 2023, 9:18 a.m. UTC | #2
Thanks Robin.

Committed.



juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-10-16 17:12
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Use VLS modes if the NITERS is known and smaller than VLS mode elements.
Hi Juzhe,
 
this LGTM.  I was first concerned whether we would want to
stop e.g. at LMUL = 1 and only continue with a specific flag but
actually this should be done via the costs.  If an implementation
wants to penalize or incentivize some behavior it can always
adjust the costs which should be sufficient.
 
Regards
Robin
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 11257f7c2bd..4482af2e039 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -530,10 +530,6 @@  costs::preferred_new_lmul_p (const vector_costs *uncast_other) const
   auto other_loop_vinfo = as_a<loop_vec_info> (other->m_vinfo);
   class loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo);
 
-  if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (this_loop_vinfo)
-      && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (other_loop_vinfo))
-    return false;
-
   if (loop_autovec_infos.get (loop) && loop_autovec_infos.get (loop)->end_p)
     return false;
   else if (loop_autovec_infos.get (loop))
@@ -567,6 +563,15 @@  costs::preferred_new_lmul_p (const vector_costs *uncast_other) const
   machine_mode biggest_mode
     = compute_local_live_ranges (program_points_per_bb, live_ranges_per_bb);
 
+  /* If we can use simple VLS modes to handle NITERS element.
+     We don't need to use VLA modes with partial vector auto-vectorization.  */
+  if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
+      && known_le (tree_to_poly_int64 (LOOP_VINFO_NITERS (this_loop_vinfo))
+		     * GET_MODE_SIZE (biggest_mode).to_constant (),
+		   (int) RVV_M8 * BYTES_PER_RISCV_VECTOR)
+      && pow2p_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo)))
+    return vector_costs::better_main_loop_than_p (other);
+
   /* Update live ranges according to PHI.  */
   update_local_live_ranges (other->m_vinfo, program_points_per_bb,
 			    live_ranges_per_bb);
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c
new file mode 100644
index 00000000000..7ede148396f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c
@@ -0,0 +1,64 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -fdump-tree-vect-details" } */
+
+#include <stdint-gcc.h>
+
+void
+foo (int8_t *restrict a)
+{
+  for (int i = 0; i < 4096; ++i)
+    a[i] = a[i]-16;
+}
+
+void
+foo2 (int16_t *restrict a)
+{
+  for (int i = 0; i < 2048; ++i)
+    a[i] = a[i]-16;
+}
+
+void
+foo3 (int32_t *restrict a)
+{
+  for (int i = 0; i < 1024; ++i)
+    a[i] = a[i]-16;
+}
+
+void
+foo4 (int64_t *restrict a)
+{
+  for (int i = 0; i < 512; ++i)
+    a[i] = a[i]-16;
+}
+
+void
+foo5 (int8_t *restrict a)
+{
+  for (int i = 0; i < 16; ++i)
+    a[i] = a[i]-16;
+}
+
+void
+foo6 (int16_t *restrict a)
+{
+  for (int i = 0; i < 16; ++i)
+    a[i] = a[i]-16;
+}
+
+void
+foo7 (int32_t *restrict a)
+{
+  for (int i = 0; i < 16; ++i)
+    a[i] = a[i]-16;
+}
+
+void
+foo8 (int64_t *restrict a)
+{
+  for (int i = 0; i < 16; ++i)
+    a[i] = a[i]-16;
+}
+
+/* { dg-final { scan-tree-dump-not "Maximum lmul" "vect" } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 4 } } */