diff mbox series

[4/5] RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}

Message ID 20241023104516.2818244-4-pan2.li@intel.com
State New
Headers show
Series [1/5] Internal-fn: Introduce new IFN MASK_LEN_STRIDED_LOAD{STORE} | expand

Commit Message

Li, Pan2 Oct. 23, 2024, 10:45 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in
the RISC-V backend by leveraging the vector strided load/store insn.

For example:
void foo (int * __restrict a, int * __restrict b, int stride, int n)
{
    for (int i = 0; i < n; i++)
      a[i*stride] = b[i*stride] + 100;
}

Before this patch:
  38   │     vsetvli a5,a3,e32,m1,ta,ma
  39   │     vluxei64.v  v1,(a1),v4
  40   │     mul a4,a2,a5
  41   │     sub a3,a3,a5
  42   │     vadd.vv v1,v1,v2
  43   │     vsuxei64.v  v1,(a0),v4
  44   │     add a1,a1,a4
  45   │     add a0,a0,a4

After this patch:
  33   │     vsetvli a5,a3,e32,m1,ta,ma
  34   │     vlse32.v    v1,0(a1),a2
  35   │     mul a4,a2,a5
  36   │     sub a3,a3,a5
  37   │     vadd.vv v1,v1,v2
  38   │     vsse32.v    v1,0(a0),a2
  39   │     add a1,a1,a4
  40   │     add a0,a0,a4

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/ChangeLog:

	* config/riscv/autovec.md (mask_len_strided_load_<mode>): Add
	new pattern for MASK_LEN_STRIDED_LOAD.
	(mask_len_strided_store_<mode>): Ditto but for store.
	* config/riscv/riscv-protos.h (expand_strided_load): Add new
	func decl to expand strided load.
	(expand_strided_store): Ditto but for store.
	* config/riscv/riscv-v.cc (expand_strided_load): Add new
	func impl to expand strided load.
	(expand_strided_store): Ditto but for store.

Signed-off-by: Pan Li <pan2.li@intel.com>
Co-Authored-By: Juzhe-Zhong <juzhe.zhong@rivai.ai>
---
 gcc/config/riscv/autovec.md     | 29 ++++++++++++++++++
 gcc/config/riscv/riscv-protos.h |  2 ++
 gcc/config/riscv/riscv-v.cc     | 52 +++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+)

Comments

Robin Dapp Oct. 25, 2024, 3:55 p.m. UTC | #1
> +(define_expand "mask_len_strided_store_<mode>"
> +  [(match_operand       0 "pmode_reg_or_0_operand")
> +   (match_operand       1 "pmode_reg_or_0_operand")
> +   (match_operand:V     2 "register_operand")
> +   (match_operand:<VM>  3 "vector_mask_operand")
> +   (match_operand       4 "autovec_length_operand")
> +   (match_operand       5 "const_0_operand")]
> +  "TARGET_VECTOR"
> +  {
> +    riscv_vector::expand_strided_store(<MODE>mode, operands);

Nit, space before '('.
LGTM with that fixed and once the middle-end changes are in.
Li, Pan2 Oct. 26, 2024, 1:41 a.m. UTC | #2
> Nit, space before '('.
> LGTM with that fixed and once the middle-end changes are in.

Got it, thanks Robin.

Pan

-----Original Message-----
From: Robin Dapp <rdapp.gcc@gmail.com> 
Sent: Friday, October 25, 2024 11:56 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: richard.guenther@gmail.com; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; Robin Dapp <rdapp.gcc@gmail.com>
Subject: Re: [PATCH 4/5] RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}

> +(define_expand "mask_len_strided_store_<mode>"
> +  [(match_operand       0 "pmode_reg_or_0_operand")
> +   (match_operand       1 "pmode_reg_or_0_operand")
> +   (match_operand:V     2 "register_operand")
> +   (match_operand:<VM>  3 "vector_mask_operand")
> +   (match_operand       4 "autovec_length_operand")
> +   (match_operand       5 "const_0_operand")]
> +  "TARGET_VECTOR"
> +  {
> +    riscv_vector::expand_strided_store(<MODE>mode, operands);

Nit, space before '('.
LGTM with that fixed and once the middle-end changes are in.
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a34f63c9651..85a915bd65f 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2855,3 +2855,32 @@  (define_expand "v<bitmanip_optab><mode>3"
     DONE;
   }
 )
+
+;; =========================================================================
+;; == Strided Load/Store
+;; =========================================================================
+(define_expand "mask_len_strided_load_<mode>"
+  [(match_operand:V     0 "register_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand       2 "pmode_reg_or_0_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "autovec_length_operand")
+   (match_operand       5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_load (<MODE>mode, operands);
+    DONE;
+  })
+
+(define_expand "mask_len_strided_store_<mode>"
+  [(match_operand       0 "pmode_reg_or_0_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand:V     2 "register_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "autovec_length_operand")
+   (match_operand       5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_store(<MODE>mode, operands);
+    DONE;
+  })
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index d690162bb0c..47c9494ff2b 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -696,6 +696,8 @@  bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
 void emit_vec_extract (rtx, rtx, rtx);
 bool expand_vec_setmem (rtx, rtx, rtx);
 bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 630fbd80e94..ae028e8928a 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3833,6 +3833,58 @@  expand_load_store (rtx *ops, bool is_load)
     }
 }
 
+/* Expand MASK_LEN_STRIDED_LOAD.  */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[0];
+  rtx base = ops[1];
+  rtx stride = ops[2];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+
+  insn_code icode = code_for_pred_strided_load (mode);
+  rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+  else
+    {
+      len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+      emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len);
+    }
+}
+
+/* Expand MASK_LEN_STRIDED_STORE.  */
+void
+expand_strided_store (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[2];
+  rtx base = ops[0];
+  rtx stride = ops[1];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+  rtx vl_type;
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    {
+      len = gen_reg_rtx (Pmode);
+      emit_vlmax_vsetvl (mode, len);
+      vl_type = get_avl_type_rtx (VLMAX);
+    }
+  else
+    {
+      len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+      vl_type = get_avl_type_rtx (NONVLMAX);
+    }
+
+  emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base),
+				     mask, stride, v_reg, len, vl_type));
+}
 
 /* Return true if the operation is the floating-point operation need FRM.  */
 static bool