diff mbox series

[v1,2/3] RISC-V: Implement scalar SAT_SUB for signed integer

Message ID 20240925025532.1584532-1-pan2.li@intel.com
State New
Headers show
Series [v1,1/3] Match: Support form 1 for scalar signed integer SAT_SUB | expand

Commit Message

Li, Pan2 Sept. 25, 2024, 2:55 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to implement the sssub form 1.  Aka:

Form 1:
  #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))                  \
  sat_s_sub_##T##_fmt_1 (T x, T y)             \
  {                                            \
    T minus = (UT)x - (UT)y;                   \
    return (x ^ y) >= 0                        \
      ? minus                                  \
      : (minus ^ x) >= 0                       \
        ? minus                                \
        : x < 0 ? MIN : MAX;                   \
  }

DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)

Before this patch:
  10   │ sat_s_sub_int8_t_fmt_1:
  11   │     subw    a5,a0,a1
  12   │     slliw   a5,a5,24
  13   │     sraiw   a5,a5,24
  14   │     xor a1,a0,a1
  15   │     xor a4,a0,a5
  16   │     and a1,a1,a4
  17   │     blt a1,zero,.L4
  18   │     mv  a0,a5
  19   │     ret
  20   │ .L4:
  21   │     srai    a0,a0,63
  22   │     xori    a5,a0,127
  23   │     mv  a0,a5
  24   │     ret

After this patch:
  10   │ sat_s_sub_int8_t_fmt_1:
  11   │     sub a4,a0,a1
  12   │     xor a5,a0,a4
  13   │     xor a1,a0,a1
  14   │     and a5,a5,a1
  15   │     srli    a5,a5,7
  16   │     andi    a5,a5,1
  17   │     srai    a0,a0,63
  18   │     xori    a3,a0,127
  19   │     neg a0,a5
  20   │     addi    a5,a5,-1
  21   │     and a3,a3,a0
  22   │     and a0,a4,a5
  23   │     or  a0,a0,a3
  24   │     slliw   a0,a0,24
  25   │     sraiw   a0,a0,24
  26   │     ret

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

	* config/riscv/riscv-protos.h (riscv_expand_sssub): Add new func
	decl for expanding signed SAT_SUB.
	* config/riscv/riscv.cc (riscv_expand_sssub): Add new func impl
	for expanding signed SAT_SUB.
	* config/riscv/riscv.md (sssub<mode>3): Add new pattern sssub
	for scalar signed integer.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc       | 69 +++++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv.md       | 11 ++++++
 3 files changed, 81 insertions(+)

Comments

Jeff Law Sept. 29, 2024, 3:29 p.m. UTC | #1
On 9/24/24 8:55 PM, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to implement the sssub form 1.  Aka:
> 
> Form 1:
>    #define DEF_SAT_S_SUB_FMT_1(T, UT, MIN, MAX) \
>    T __attribute__((noinline))                  \
>    sat_s_sub_##T##_fmt_1 (T x, T y)             \
>    {                                            \
>      T minus = (UT)x - (UT)y;                   \
>      return (x ^ y) >= 0                        \
>        ? minus                                  \
>        : (minus ^ x) >= 0                       \
>          ? minus                                \
>          : x < 0 ? MIN : MAX;                   \
>    }
> 
> DEF_SAT_S_SUB_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
> 
> Before this patch:
>    10   │ sat_s_sub_int8_t_fmt_1:
>    11   │     subw    a5,a0,a1
>    12   │     slliw   a5,a5,24
>    13   │     sraiw   a5,a5,24
>    14   │     xor a1,a0,a1
>    15   │     xor a4,a0,a5
>    16   │     and a1,a1,a4
>    17   │     blt a1,zero,.L4
>    18   │     mv  a0,a5
>    19   │     ret
>    20   │ .L4:
>    21   │     srai    a0,a0,63
>    22   │     xori    a5,a0,127
>    23   │     mv  a0,a5
>    24   │     ret
> 
> After this patch:
>    10   │ sat_s_sub_int8_t_fmt_1:
>    11   │     sub a4,a0,a1
>    12   │     xor a5,a0,a4
>    13   │     xor a1,a0,a1
>    14   │     and a5,a5,a1
>    15   │     srli    a5,a5,7
>    16   │     andi    a5,a5,1
>    17   │     srai    a0,a0,63
>    18   │     xori    a3,a0,127
>    19   │     neg a0,a5
>    20   │     addi    a5,a5,-1
>    21   │     and a3,a3,a0
>    22   │     and a0,a4,a5
>    23   │     or  a0,a0,a3
>    24   │     slliw   a0,a0,24
>    25   │     sraiw   a0,a0,24
>    26   │     ret
> 
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-protos.h (riscv_expand_sssub): Add new func
> 	decl for expanding signed SAT_SUB.
> 	* config/riscv/riscv.cc (riscv_expand_sssub): Add new func impl
> 	for expanding signed SAT_SUB.
> 	* config/riscv/riscv.md (sssub<mode>3): Add new pattern sssub
> 	for scalar signed integer.
OK
jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 07a4d42e3a5..3d8775e582d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -136,6 +136,7 @@  extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_sssub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7be3939a7f9..8708a7b42c6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12329,6 +12329,75 @@  riscv_expand_ussub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implements the signed saturation sub standard name ssadd for int mode.
+
+   z = SAT_SUB(x, y).
+   =>
+   1.  minus = x - y
+   2.  xor_0 = x ^ y
+   3.  xor_1 = x ^ minus
+   4.  lt_0 = xor_1 < 0
+   5.  lt_1 = xor_0 < 0
+   6.  and = lt_0 & lt_1
+   7.  lt = x < 0
+   8.  neg = -lt
+   9.  max = INT_MAX
+   10. max = max ^ neg
+   11. neg = -and
+   12. max = max & neg
+   13. and = and - 1
+   14. z = minus & and
+   15. z = z | max  */
+
+void
+riscv_expand_sssub (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+  rtx shift_bits = GEN_INT (bitsize - 1);
+  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_minus = gen_reg_rtx (Xmode);
+  rtx xmode_xor_0 = gen_reg_rtx (Xmode);
+  rtx xmode_xor_1 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_0 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_1 = gen_reg_rtx (Xmode);
+  rtx xmode_and = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_neg = gen_reg_rtx (Xmode);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+
+  /* Step-1: mins = x - y, xor_0 = x ^ y, xor_1 = x ^ minus.  */
+  riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_minus);
+
+  /* Step-2: and = xor_0 < 0 & xor_1 < 0.  */
+  riscv_emit_binary (LSHIFTRT, xmode_lt_0, xmode_xor_0, shift_bits);
+  riscv_emit_binary (LSHIFTRT, xmode_lt_1, xmode_xor_1, shift_bits);
+  riscv_emit_binary (AND, xmode_and, xmode_lt_0, xmode_lt_1);
+  riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode));
+
+  /* Step-3: lt = x < 0, neg = -lt.  */
+  riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_neg, xmode_lt);
+
+  /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg.  */
+  riscv_emit_move (xmode_max, riscv_gen_sign_max_cst (mode));
+  riscv_emit_binary (XOR, xmode_max, xmode_max, xmode_neg);
+  riscv_emit_unary (NEG, xmode_neg, xmode_and);
+  riscv_emit_binary (AND, xmode_max, xmode_max, xmode_neg);
+
+  /* Step-5: and = and - 1, dest = minus & and.  */
+  riscv_emit_binary (PLUS, xmode_and, xmode_and, CONSTM1_RTX (Xmode));
+  riscv_emit_binary (AND, xmode_dest, xmode_minus, xmode_and);
+
+  /* Step-6: dest = dest | max.  */
+  riscv_emit_binary (IOR, xmode_dest, xmode_dest, xmode_max);
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
 /* Implement the unsigned saturation truncation for int mode.
 
    b = SAT_TRUNC (a);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 0410d990ec5..067c2415db1 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4392,6 +4392,17 @@  (define_expand "ussub<mode>3"
   }
 )
 
+(define_expand "sssub<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "register_operand")
+   (match_operand:ANYI 2 "register_operand")]
+  ""
+  {
+    riscv_expand_sssub (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_double_truncated>2"
   [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]