diff mbox series

RISC-V: Load VLS perm indices directly from memory.

Message ID D5JESY4I2T6F.SPNV2HAO84DX@gmail.com
State New
Headers show
Series RISC-V: Load VLS perm indices directly from memory. | expand

Commit Message

Robin Dapp Nov. 11, 2024, 2:12 p.m. UTC
Hi,

instead of loading the permutation indices and using vmslt in order to
determine which elements belong to which source vector we can compute
the proper mask at compile time.  That way we can emit vlm instead of
vle + vmslt.

Regtested on rv64gcv.

Regards
 Robin

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (shuffle_merge_patterns): Load VLS
	indices directly.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/vls/merge-1.c: Check for vlm and
	no vmsleu etc.
	* gcc.target/riscv/rvv/autovec/vls/merge-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/merge-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/merge-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/merge-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/merge-6.c: Ditto.
---
 gcc/config/riscv/riscv-v.cc                   | 22 +++++++++++++++++--
 .../riscv/rvv/autovec/vls/merge-1.c           |  2 ++
 .../riscv/rvv/autovec/vls/merge-2.c           |  2 ++
 .../riscv/rvv/autovec/vls/merge-3.c           |  2 ++
 .../riscv/rvv/autovec/vls/merge-4.c           |  2 ++
 .../riscv/rvv/autovec/vls/merge-5.c           |  2 ++
 .../riscv/rvv/autovec/vls/merge-6.c           |  2 ++
 7 files changed, 32 insertions(+), 2 deletions(-)

Comments

Jeff Law Nov. 11, 2024, 7:30 p.m. UTC | #1
On 11/11/24 7:12 AM, Robin Dapp wrote:
> Hi,
> 
> instead of loading the permutation indices and using vmslt in order to
> determine which elements belong to which source vector we can compute
> the proper mask at compile time.  That way we can emit vlm instead of
> vle + vmslt.
> 
> Regtested on rv64gcv.
> 
> Regards
>   Robin
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-v.cc (shuffle_merge_patterns): Load VLS
> 	indices directly.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/autovec/vls/merge-1.c: Check for vlm and
> 	no vmsleu etc.
> 	* gcc.target/riscv/rvv/autovec/vls/merge-2.c: Ditto.
> 	* gcc.target/riscv/rvv/autovec/vls/merge-3.c: Ditto.
> 	* gcc.target/riscv/rvv/autovec/vls/merge-4.c: Ditto.
> 	* gcc.target/riscv/rvv/autovec/vls/merge-5.c: Ditto.
> 	* gcc.target/riscv/rvv/autovec/vls/merge-6.c: Ditto.
OK
jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a0e22b6454b..ee7a0128c0e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3101,9 +3101,27 @@  shuffle_merge_patterns (struct expand_vec_perm_d *d)
   machine_mode mask_mode = get_mask_mode (vmode);
   rtx mask = gen_reg_rtx (mask_mode);
 
-  if (indices_fit_selector_p)
+  if (indices_fit_selector_p && vec_len.is_constant ())
     {
-      /* MASK = SELECTOR < NUNITS ? 1 : 0.  */
+      /* For a constant vector length we can generate the needed mask at
+	 compile time and load it as mask at runtime.
+	 This saves a compare at runtime.  */
+      rtx_vector_builder sel (mask_mode, d->perm.encoding ().npatterns (),
+			      d->perm.encoding ().nelts_per_pattern ());
+      unsigned int encoded_nelts = sel.encoded_nelts ();
+      for (unsigned int i = 0; i < encoded_nelts; i++)
+	sel.quick_push (gen_int_mode (d->perm[i].to_constant ()
+				      < vec_len.to_constant (),
+				      GET_MODE_INNER (mask_mode)));
+      mask = sel.build ();
+    }
+  else if (indices_fit_selector_p)
+    {
+      /* For a dynamic vector length < 256 we keep the permutation
+	 indices in the literal pool, load it at runtime and create the
+	 mask by selecting either OP0 or OP1 by
+
+	    INDICES < NUNITS ? 1 : 0.  */
       rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
       rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
       insn_code icode = code_for_pred_cmp_scalar (sel_mode);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
index cd24922d0ad..c34734cff6d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
@@ -4,3 +4,5 @@ 
 #include "../vls-vlmax/merge-1.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
index 52d91244f51..68f7b62e62f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
@@ -4,3 +4,5 @@ 
 #include "../vls-vlmax/merge-2.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
index 4931d2a3604..1250dca65d1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
@@ -4,3 +4,5 @@ 
 #include "../vls-vlmax/merge-3.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
index f22a18f8ef3..1dfd8287b7f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
@@ -4,3 +4,5 @@ 
 #include "../vls-vlmax/merge-4.c"
 
 /* dg-final scan-assembler-times {\tvmerge.vvm} 11 */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c
index cf8d04c4bce..af84a6552c0 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c
@@ -4,3 +4,5 @@ 
 #include "../vls-vlmax/merge-5.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 8 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c
index 3b6f9774d51..45e999823ce 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c
@@ -4,3 +4,5 @@ 
 #include "../vls-vlmax/merge-6.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 5 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 5 } } */