diff mbox series

[v1,3/4] RISC-V: Implement vector SAT_SUB for signed integer

Message ID 20241011062245.2486653-3-pan2.li@intel.com
State New
Headers show
Series [v1,1/4] Match: Support form 1 for vector signed integer SAT_SUB | expand

Commit Message

Li, Pan2 Oct. 11, 2024, 6:22 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to implement the sssub for vector signed integer.

Form 1:
  #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)                     \
  void __attribute__((noinline))                                       \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
  {                                                                    \
    unsigned i;                                                        \
    for (i = 0; i < limit; i++)                                        \
      {                                                                \
        T x = op_1[i];                                                 \
        T y = op_2[i];                                                 \
        T minus = (UT)x - (UT)y;                                       \
        out[i] = (x ^ y) >= 0                                          \
          ? minus                                                      \
          : (minus ^ x) >= 0                                           \
            ? minus                                                    \
            : x < 0 ? MIN : MAX;                                       \
      }                                                                \
  }

DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
  28   │     vle8.v  v1,0(a1)
  29   │     vle8.v  v2,0(a2)
  30   │     sub a3,a3,a5
  31   │     add a1,a1,a5
  32   │     add a2,a2,a5
  33   │     vsra.vi v4,v1,7
  34   │     vsub.vv v3,v1,v2
  35   │     vxor.vv v2,v1,v2
  36   │     vxor.vv v0,v1,v3
  37   │     vmslt.vi    v2,v2,0
  38   │     vmslt.vi    v0,v0,0
  39   │     vmand.mm    v0,v0,v2
  40   │     vxor.vv v3,v4,v5,v0.t
  41   │     vse8.v  v3,0(a0)
  42   │     add a0,a0,a5

After this patch:
  25   │     vle8.v  v1,0(a1)
  26   │     vle8.v  v2,0(a2)
  27   │     sub a3,a3,a5
  28   │     add a1,a1,a5
  29   │     add a2,a2,a5
  30   │     vssub.vv    v1,v1,v2
  31   │     vse8.v  v1,0(a0)
  32   │     add a0,a0,a5

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

	* config/riscv/autovec.md (sssub<mode>3): Add new pattern for
	signed SAT_SUB.
	* config/riscv/riscv-protos.h (expand_vec_sssub): Add new func
	decl to expand sssub to vssub.
	* config/riscv/riscv-v.cc (expand_vec_sssub): Add new func
	impl to expand sssub to vssub.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md     | 11 +++++++++++
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-v.cc     |  9 +++++++++
 3 files changed, 21 insertions(+)

Comments

钟居哲 Oct. 12, 2024, 12:31 a.m. UTC | #1
LGTM



juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2024-10-11 14:22
To: gcc-patches
CC: richard.guenther; Tamar.Christina; juzhe.zhong; kito.cheng; jeffreyalaw; rdapp.gcc; Pan Li
Subject: [PATCH v1 3/4] RISC-V: Implement vector SAT_SUB for signed integer
From: Pan Li <pan2.li@intel.com>
 
This patch would like to implement the sssub for vector signed integer.
 
Form 1:
  #define DEF_VEC_SAT_S_SUB_FMT_1(T, UT, MIN, MAX)                     \
  void __attribute__((noinline))                                       \
  vec_sat_s_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
  {                                                                    \
    unsigned i;                                                        \
    for (i = 0; i < limit; i++)                                        \
      {                                                                \
        T x = op_1[i];                                                 \
        T y = op_2[i];                                                 \
        T minus = (UT)x - (UT)y;                                       \
        out[i] = (x ^ y) >= 0                                          \
          ? minus                                                      \
          : (minus ^ x) >= 0                                           \
            ? minus                                                    \
            : x < 0 ? MIN : MAX;                                       \
      }                                                                \
  }
 
DEF_VEC_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
 
Before this patch:
  28   │     vle8.v  v1,0(a1)
  29   │     vle8.v  v2,0(a2)
  30   │     sub a3,a3,a5
  31   │     add a1,a1,a5
  32   │     add a2,a2,a5
  33   │     vsra.vi v4,v1,7
  34   │     vsub.vv v3,v1,v2
  35   │     vxor.vv v2,v1,v2
  36   │     vxor.vv v0,v1,v3
  37   │     vmslt.vi    v2,v2,0
  38   │     vmslt.vi    v0,v0,0
  39   │     vmand.mm    v0,v0,v2
  40   │     vxor.vv v3,v4,v5,v0.t
  41   │     vse8.v  v3,0(a0)
  42   │     add a0,a0,a5
 
After this patch:
  25   │     vle8.v  v1,0(a1)
  26   │     vle8.v  v2,0(a2)
  27   │     sub a3,a3,a5
  28   │     add a1,a1,a5
  29   │     add a2,a2,a5
  30   │     vssub.vv    v1,v1,v2
  31   │     vse8.v  v1,0(a0)
  32   │     add a0,a0,a5
 
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (sssub<mode>3): Add new pattern for
signed SAT_SUB.
* config/riscv/riscv-protos.h (expand_vec_sssub): Add new func
decl to expand sssub to vssub.
* config/riscv/riscv-v.cc (expand_vec_sssub): Add new func
impl to expand sssub to vssub.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md     | 11 +++++++++++
gcc/config/riscv/riscv-protos.h |  1 +
gcc/config/riscv/riscv-v.cc     |  9 +++++++++
3 files changed, 21 insertions(+)
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 836cdd4491f..7dc78a48874 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2734,6 +2734,17 @@ (define_expand "ussub<mode>3"
   }
)
+(define_expand "sssub<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2], <MODE>mode);
+    DONE;
+  }
+)
+
(define_expand "ustrunc<mode><v_double_trunc>2"
   [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
    (match_operand:VWEXTI           1 "register_operand")]
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 1e6d10a1402..b2f5d72f494 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -649,6 +649,7 @@ void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode);
void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
void expand_vec_ssadd (rtx, rtx, rtx, machine_mode);
void expand_vec_ussub (rtx, rtx, rtx, machine_mode);
+void expand_vec_sssub (rtx, rtx, rtx, machine_mode);
void expand_vec_double_ustrunc (rtx, rtx, machine_mode);
void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode);
void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ca3a80cceb9..fba35652cc2 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4902,6 +4902,15 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
   emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode);
}
+/* Expand the standard name ssadd<mode>3 for vector mode,  we can leverage
+   the vector fixed point vector single-width saturating add directly.  */
+
+void
+expand_vec_sssub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
+{
+  emit_vec_binary_alu (op_0, op_1, op_2, SS_MINUS, vec_mode);
+}
+
/* Expand the standard name ustrunc<m><n>2 for double vector mode,  like
    DI => SI.  we can leverage the vector fixed point vector narrowing
    fixed-point clip directly.  */
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 836cdd4491f..7dc78a48874 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2734,6 +2734,17 @@  (define_expand "ussub<mode>3"
   }
 )
 
+(define_expand "sssub<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2], <MODE>mode);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><v_double_trunc>2"
   [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
    (match_operand:VWEXTI           1 "register_operand")]
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 1e6d10a1402..b2f5d72f494 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -649,6 +649,7 @@  void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
 void expand_vec_ssadd (rtx, rtx, rtx, machine_mode);
 void expand_vec_ussub (rtx, rtx, rtx, machine_mode);
+void expand_vec_sssub (rtx, rtx, rtx, machine_mode);
 void expand_vec_double_ustrunc (rtx, rtx, machine_mode);
 void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index ca3a80cceb9..fba35652cc2 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4902,6 +4902,15 @@  expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
   emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode);
 }
 
+/* Expand the standard name ssadd<mode>3 for vector mode,  we can leverage
+   the vector fixed point vector single-width saturating add directly.  */
+
+void
+expand_vec_sssub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
+{
+  emit_vec_binary_alu (op_0, op_1, op_2, SS_MINUS, vec_mode);
+}
+
 /* Expand the standard name ustrunc<m><n>2 for double vector mode,  like
    DI => SI.  we can leverage the vector fixed point vector narrowing
    fixed-point clip directly.  */