diff mbox series

[v2,1/2] RISC-V: Make vectorized memset handle more cases

Message ID 20241104130943.4041719-2-craig.blackmore@embecosm.com
State New
Headers show
Series RISC-V: Vector memcpy/memset fixes and improvements | expand

Commit Message

Craig Blackmore Nov. 4, 2024, 1:09 p.m. UTC
`expand_vec_setmem` only generated vectorized memset if it fitted into a
single vector store of at least (TARGET_MIN_VLEN / 8) bytes.  Also,
without dynamic LMUL the operation was always TARGET_MAX_LMUL even if it
would have fitted a smaller LMUL.

Allow vectorized memset to be generated for smaller lengths and smaller
LMUL by switching to using use_vector_string_op.  Smaller LMUL can be
seen in setmem-3.c:f3.  Smaller lengths will be seen after the second
patch in this series which selectively disables by pieces.

gcc/ChangeLog:

	* config/riscv/riscv-string.cc
	(use_vector_stringop_p): Add comment.
	(expand_vec_setmem): Use use_vector_stringop_p instead of
	check_vectorise_memory_operation.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/setmem-3.c: Expect smaller lmul.
---
 gcc/config/riscv/riscv-string.cc              | 37 ++++++++++---------
 .../gcc.target/riscv/rvv/base/setmem-3.c      |  6 +--
 2 files changed, 22 insertions(+), 21 deletions(-)

Comments

Jeff Law Nov. 4, 2024, 8:56 p.m. UTC | #1
On 11/4/24 6:09 AM, Craig Blackmore wrote:
> `expand_vec_setmem` only generated vectorized memset if it fitted into a
> single vector store of at least (TARGET_MIN_VLEN / 8) bytes.  Also,
> without dynamic LMUL the operation was always TARGET_MAX_LMUL even if it
> would have fitted a smaller LMUL.
> 
> Allow vectorized memset to be generated for smaller lengths and smaller
> LMUL by switching to using use_vector_string_op.  Smaller LMUL can be
> seen in setmem-3.c:f3.  Smaller lengths will be seen after the second
> patch in this series which selectively disables by pieces.
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-string.cc
> 	(use_vector_stringop_p): Add comment.
> 	(expand_vec_setmem): Use use_vector_stringop_p instead of
> 	check_vectorise_memory_operation.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/base/setmem-3.c: Expect smaller lmul.
Thanks.  I've pushed this to the trunk.
jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 118c02a4021..20395e19c60 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1062,6 +1062,9 @@  struct stringop_info {
 
    MAX_EW is the maximum element width that the caller wants to use and
    LENGTH_IN is the length of the stringop in bytes.
+
+   This is currently used for cpymem and setmem.  If expand_vec_cmpmem switches
+   to using it too then check_vectorise_memory_operation can be removed.
 */
 
 static bool
@@ -1600,41 +1603,39 @@  check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out)
 bool
 expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in)
 {
-  HOST_WIDE_INT lmul;
+  stringop_info info;
+
   /* Check we are able and allowed to vectorise this operation;
      bail if not.  */
-  if (!check_vectorise_memory_operation (length_in, lmul))
+  if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop)
     return false;
 
-  machine_mode vmode
-      = riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul)
-	    .require ();
   rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0));
-  rtx dst = change_address (dst_in, vmode, dst_addr);
+  rtx dst = change_address (dst_in, info.vmode, dst_addr);
 
-  rtx fill_value = gen_reg_rtx (vmode);
+  rtx fill_value = gen_reg_rtx (info.vmode);
   rtx broadcast_ops[] = { fill_value, fill_value_in };
 
   /* If the length is exactly vlmax for the selected mode, do that.
      Otherwise, use a predicated store.  */
-  if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in)))
+  if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl)))
     {
-      emit_vlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
-			  broadcast_ops);
+      emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP,
+		       broadcast_ops);
       emit_move_insn (dst, fill_value);
     }
   else
     {
-      if (!satisfies_constraint_K (length_in))
-	      length_in = force_reg (Pmode, length_in);
-      emit_nonvlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
-			  broadcast_ops, length_in);
+      if (!satisfies_constraint_K (info.avl))
+	info.avl = force_reg (Pmode, info.avl);
+      emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode),
+			  riscv_vector::UNARY_OP, broadcast_ops, info.avl);
       machine_mode mask_mode
-	      = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (vmode))
-		      .require ();
+	= riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode))
+	  .require ();
       rtx mask = CONSTM1_RTX (mask_mode);
-      emit_insn (gen_pred_store (vmode, dst, mask, fill_value, length_in,
-			  get_avl_type_rtx (riscv_vector::NONVLMAX)));
+      emit_insn (gen_pred_store (info.vmode, dst, mask, fill_value, info.avl,
+				 get_avl_type_rtx (riscv_vector::NONVLMAX)));
     }
 
   return true;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
index 25be694d248..52766fece76 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/setmem-3.c
@@ -21,13 +21,13 @@  f1 (void *a, int const b)
   return __builtin_memset (a, b, MIN_VECTOR_BYTES - 1);
 }
 
-/* Vectorise+inline minimum vector register width using requested lmul.
+/* Vectorised code should use smallest lmul known to fit length.
 ** f2:
 **  (
-**  vsetivli\s+zero,\d+,e8,m8,ta,ma
+**  vsetivli\s+zero,\d+,e8,m1,ta,ma
 **  |
 **  li\s+a\d+,\d+
-**  vsetvli\s+zero,a\d+,e8,m8,ta,ma
+**  vsetvli\s+zero,a\d+,e8,m1,ta,ma
 **  )
 **  vmv\.v\.x\s+v\d+,a1
 **  vse8\.v\s+v\d+,0\(a0\)