diff mbox series

[v1,3/4] RISC-V: Implement scalar SAT_TRUNC for signed integer

Message ID 20241008083315.190027-1-pan2.li@intel.com
State New
Headers show
Series [v1,1/4] Match: Support form 1 for scalar signed integer SAT_TRUNC | expand

Commit Message

Li, Pan2 Oct. 8, 2024, 8:33 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to implement the sstrunc for scalar signed
integer.

Form 1:
  #define DEF_SAT_S_TRUNC_FMT_1(WT, NT, NT_MIN, NT_MAX) \
  NT __attribute__((noinline))                          \
  sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x)             \
  {                                                     \
    NT trunc = (NT)x;                                   \
    return (WT)NT_MIN <= x && x <= (WT)NT_MAX           \
      ? trunc                                           \
      : x < 0 ? NT_MIN : NT_MAX;                        \
  }

DEF_SAT_S_TRUNC_FMT_1(int64_t, int32_t, INT32_MIN, INT32_MAX)

Before this patch:
  10   │ sat_s_trunc_int64_t_to_int32_t_fmt_1:
  11   │     li  a5,1
  12   │     slli    a5,a5,31
  13   │     li  a4,-1
  14   │     add a5,a0,a5
  15   │     srli    a4,a4,32
  16   │     bgtu    a5,a4,.L2
  17   │     sext.w  a0,a0
  18   │     ret
  19   │ .L2:
  20   │     srai    a5,a0,63
  21   │     li  a0,-2147483648
  22   │     xor a0,a0,a5
  23   │     not a0,a0
  24   │     ret

After this patch:
  10   │ sat_s_trunc_int64_t_to_int32_t_fmt_1:
  11   │     li  a5,-2147483648
  12   │     xori    a3,a5,-1
  13   │     slt a4,a0,a3
  14   │     slt a5,a5,a0
  15   │     and a5,a4,a5
  16   │     srai    a4,a0,63
  17   │     xor a4,a4,a3
  18   │     addi    a3,a5,-1
  19   │     neg a5,a5
  20   │     and a4,a4,a3
  21   │     and a0,a0,a5
  22   │     or  a0,a0,a4
  23   │     sext.w  a0,a0
  24   │     ret

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

	* config/riscv/riscv-protos.h (riscv_expand_sstrunc): Add new
	func decl to expand SAT_TRUNC.
	* config/riscv/riscv.cc (riscv_expand_sstrunc): Add new func
	impl to expand SAT_TRUNC.
	* config/riscv/riscv.md (sstrunc<mode><anyi_double_truncated>2):
	Add new pattern for double truncation.
	(sstrunc<mode><anyi_quad_truncated>2): Ditto but for quad.
	(sstrunc<mode><anyi_oct_truncated>2): Ditto but for oct.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv.cc       | 61 +++++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv.md       | 30 ++++++++++++++++
 3 files changed, 92 insertions(+)

Comments

Kito Cheng Oct. 8, 2024, 11:06 a.m. UTC | #1
LGTM

<pan2.li@intel.com> 於 2024年10月8日 週二 16:33 寫道:

> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to implement the sstrunc for scalar signed
> integer.
>
> Form 1:
>   #define DEF_SAT_S_TRUNC_FMT_1(WT, NT, NT_MIN, NT_MAX) \
>   NT __attribute__((noinline))                          \
>   sat_s_trunc_##WT##_to_##NT##_fmt_1 (WT x)             \
>   {                                                     \
>     NT trunc = (NT)x;                                   \
>     return (WT)NT_MIN <= x && x <= (WT)NT_MAX           \
>       ? trunc                                           \
>       : x < 0 ? NT_MIN : NT_MAX;                        \
>   }
>
> DEF_SAT_S_TRUNC_FMT_1(int64_t, int32_t, INT32_MIN, INT32_MAX)
>
> Before this patch:
>   10   │ sat_s_trunc_int64_t_to_int32_t_fmt_1:
>   11   │     li  a5,1
>   12   │     slli    a5,a5,31
>   13   │     li  a4,-1
>   14   │     add a5,a0,a5
>   15   │     srli    a4,a4,32
>   16   │     bgtu    a5,a4,.L2
>   17   │     sext.w  a0,a0
>   18   │     ret
>   19   │ .L2:
>   20   │     srai    a5,a0,63
>   21   │     li  a0,-2147483648
>   22   │     xor a0,a0,a5
>   23   │     not a0,a0
>   24   │     ret
>
> After this patch:
>   10   │ sat_s_trunc_int64_t_to_int32_t_fmt_1:
>   11   │     li  a5,-2147483648
>   12   │     xori    a3,a5,-1
>   13   │     slt a4,a0,a3
>   14   │     slt a5,a5,a0
>   15   │     and a5,a4,a5
>   16   │     srai    a4,a0,63
>   17   │     xor a4,a4,a3
>   18   │     addi    a3,a5,-1
>   19   │     neg a5,a5
>   20   │     and a4,a4,a3
>   21   │     and a0,a0,a5
>   22   │     or  a0,a0,a4
>   23   │     sext.w  a0,a0
>   24   │     ret
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-protos.h (riscv_expand_sstrunc): Add new
>         func decl to expand SAT_TRUNC.
>         * config/riscv/riscv.cc (riscv_expand_sstrunc): Add new func
>         impl to expand SAT_TRUNC.
>         * config/riscv/riscv.md (sstrunc<mode><anyi_double_truncated>2):
>         Add new pattern for double truncation.
>         (sstrunc<mode><anyi_quad_truncated>2): Ditto but for quad.
>         (sstrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/config/riscv/riscv-protos.h |  1 +
>  gcc/config/riscv/riscv.cc       | 61 +++++++++++++++++++++++++++++++++
>  gcc/config/riscv/riscv.md       | 30 ++++++++++++++++
>  3 files changed, 92 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv-protos.h
> b/gcc/config/riscv/riscv-protos.h
> index 3d8775e582d..1e6d10a1402 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -138,6 +138,7 @@ extern void riscv_expand_ssadd (rtx, rtx, rtx);
>  extern void riscv_expand_ussub (rtx, rtx, rtx);
>  extern void riscv_expand_sssub (rtx, rtx, rtx);
>  extern void riscv_expand_ustrunc (rtx, rtx);
> +extern void riscv_expand_sstrunc (rtx, rtx);
>
>  #ifdef RTX_CODE
>  extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool
> *invert_ptr = 0);
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 8708a7b42c6..57f2554d491 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -12438,6 +12438,67 @@ riscv_expand_ustrunc (rtx dest, rtx src)
>    emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
>  }
>
> +/* Implement the signed saturation truncation for int mode.
> +
> +   b = SAT_TRUNC (a);
> +   =>
> +   1.  lt = a < max
> +   2.  gt = min < a
> +   3.  mask = lt & gt
> +   4.  trunc_mask = -mask
> +   5.  sat_mask = mask - 1
> +   6.  lt = a < 0
> +   7.  neg = -lt
> +   8.  sat = neg ^ max
> +   9.  trunc = src & trunc_mask
> +   10. sat = sat & sat_mask
> +   11. dest = trunc | sat  */
> +
> +void
> +riscv_expand_sstrunc (rtx dest, rtx src)
> +{
> +  machine_mode mode = GET_MODE (dest);
> +  unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant ();
> +  HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127
> +  HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128
> +
> +  rtx xmode_narrow_max = gen_reg_rtx (Xmode);
> +  rtx xmode_narrow_min = gen_reg_rtx (Xmode);
> +  rtx xmode_lt = gen_reg_rtx (Xmode);
> +  rtx xmode_gt = gen_reg_rtx (Xmode);
> +  rtx xmode_src = gen_lowpart (Xmode, src);
> +  rtx xmode_dest = gen_reg_rtx (Xmode);
> +  rtx xmode_mask = gen_reg_rtx (Xmode);
> +  rtx xmode_sat = gen_reg_rtx (Xmode);
> +  rtx xmode_trunc = gen_reg_rtx (Xmode);
> +  rtx xmode_sat_mask = gen_reg_rtx (Xmode);
> +  rtx xmode_trunc_mask = gen_reg_rtx (Xmode);
> +
> +  /* Step-1: lt = src < max, gt = min < src, mask = lt & gt  */
> +  emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode));
> +  emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode));
> +  riscv_emit_binary (LT, xmode_lt, xmode_src, xmode_narrow_max);
> +  riscv_emit_binary (LT, xmode_gt, xmode_narrow_min, xmode_src);
> +  riscv_emit_binary (AND, xmode_mask, xmode_lt, xmode_gt);
> +
> +  /* Step-2: sat_mask = mask - 1, trunc_mask = ~mask  */
> +  riscv_emit_binary (PLUS, xmode_sat_mask, xmode_mask, CONSTM1_RTX
> (Xmode));
> +  riscv_emit_unary (NEG, xmode_trunc_mask, xmode_mask);
> +
> +  /* Step-3: lt = src < 0, lt = -lt, sat = lt ^ narrow_max  */
> +  riscv_emit_binary (LT, xmode_lt, xmode_src, CONST0_RTX (Xmode));
> +  riscv_emit_unary (NEG, xmode_lt, xmode_lt);
> +  riscv_emit_binary (XOR, xmode_sat, xmode_lt, xmode_narrow_max);
> +
> +  /* Step-4: xmode_dest = (src & trunc_mask) | (sat & sat_mask)  */
> +  riscv_emit_binary (AND, xmode_trunc, xmode_src, xmode_trunc_mask);
> +  riscv_emit_binary (AND, xmode_sat, xmode_sat, xmode_sat_mask);
> +  riscv_emit_binary (IOR, xmode_dest, xmode_trunc, xmode_sat);
> +
> +  /* Step-5: dest = xmode_dest  */
> +  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
> +}
> +
>  /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode for
>     TI_LONG_DOUBLE_TYPE which is for long double type, go with the
>     default one for the others.  */
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 067c2415db1..688c07df46c 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -4413,6 +4413,16 @@ (define_expand
> "ustrunc<mode><anyi_double_truncated>2"
>    }
>  )
>
> +(define_expand "sstrunc<mode><anyi_double_truncated>2"
> +  [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
> +   (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
> +  ""
> +  {
> +    riscv_expand_sstrunc (operands[0], operands[1]);
> +    DONE;
> +  }
> +)
> +
>  (define_expand "ustrunc<mode><anyi_quad_truncated>2"
>    [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
>     (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
> @@ -4423,6 +4433,16 @@ (define_expand "ustrunc<mode><anyi_quad_truncated>2"
>    }
>  )
>
> +(define_expand "sstrunc<mode><anyi_quad_truncated>2"
> +  [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
> +   (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
> +  ""
> +  {
> +    riscv_expand_sstrunc (operands[0], operands[1]);
> +    DONE;
> +  }
> +)
> +
>  (define_expand "ustrunc<mode><anyi_oct_truncated>2"
>    [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
>     (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
> @@ -4433,6 +4453,16 @@ (define_expand "ustrunc<mode><anyi_oct_truncated>2"
>    }
>  )
>
> +(define_expand "sstrunc<mode><anyi_oct_truncated>2"
> +  [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
> +   (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
> +  ""
> +  {
> +    riscv_expand_sstrunc (operands[0], operands[1]);
> +    DONE;
> +  }
> +)
> +
>  ;; These are forms of (x << C1) + C2, potentially canonicalized from
>  ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
>  ;; want to go ahead and recognize this form as C2 may be cheaper to
> --
> 2.43.0
>
>
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 3d8775e582d..1e6d10a1402 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -138,6 +138,7 @@  extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_sssub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
+extern void riscv_expand_sstrunc (rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8708a7b42c6..57f2554d491 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12438,6 +12438,67 @@  riscv_expand_ustrunc (rtx dest, rtx src)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implement the signed saturation truncation for int mode.
+
+   b = SAT_TRUNC (a);
+   =>
+   1.  lt = a < max
+   2.  gt = min < a
+   3.  mask = lt & gt
+   4.  trunc_mask = -mask
+   5.  sat_mask = mask - 1
+   6.  lt = a < 0
+   7.  neg = -lt
+   8.  sat = neg ^ max
+   9.  trunc = src & trunc_mask
+   10. sat = sat & sat_mask
+   11. dest = trunc | sat  */
+
+void
+riscv_expand_sstrunc (rtx dest, rtx src)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant ();
+  HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127
+  HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128
+
+  rtx xmode_narrow_max = gen_reg_rtx (Xmode);
+  rtx xmode_narrow_min = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_gt = gen_reg_rtx (Xmode);
+  rtx xmode_src = gen_lowpart (Xmode, src);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+  rtx xmode_mask = gen_reg_rtx (Xmode);
+  rtx xmode_sat = gen_reg_rtx (Xmode);
+  rtx xmode_trunc = gen_reg_rtx (Xmode);
+  rtx xmode_sat_mask = gen_reg_rtx (Xmode);
+  rtx xmode_trunc_mask = gen_reg_rtx (Xmode);
+
+  /* Step-1: lt = src < max, gt = min < src, mask = lt & gt  */
+  emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode));
+  emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode));
+  riscv_emit_binary (LT, xmode_lt, xmode_src, xmode_narrow_max);
+  riscv_emit_binary (LT, xmode_gt, xmode_narrow_min, xmode_src);
+  riscv_emit_binary (AND, xmode_mask, xmode_lt, xmode_gt);
+
+  /* Step-2: sat_mask = mask - 1, trunc_mask = ~mask  */
+  riscv_emit_binary (PLUS, xmode_sat_mask, xmode_mask, CONSTM1_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_trunc_mask, xmode_mask);
+
+  /* Step-3: lt = src < 0, lt = -lt, sat = lt ^ narrow_max  */
+  riscv_emit_binary (LT, xmode_lt, xmode_src, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_lt, xmode_lt);
+  riscv_emit_binary (XOR, xmode_sat, xmode_lt, xmode_narrow_max);
+
+  /* Step-4: xmode_dest = (src & trunc_mask) | (sat & sat_mask)  */
+  riscv_emit_binary (AND, xmode_trunc, xmode_src, xmode_trunc_mask);
+  riscv_emit_binary (AND, xmode_sat, xmode_sat, xmode_sat_mask);
+  riscv_emit_binary (IOR, xmode_dest, xmode_trunc, xmode_sat);
+
+  /* Step-5: dest = xmode_dest  */
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode for
    TI_LONG_DOUBLE_TYPE which is for long double type, go with the
    default one for the others.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 067c2415db1..688c07df46c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4413,6 +4413,16 @@  (define_expand "ustrunc<mode><anyi_double_truncated>2"
   }
 )
 
+(define_expand "sstrunc<mode><anyi_double_truncated>2"
+  [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_sstrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_quad_truncated>2"
   [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
@@ -4423,6 +4433,16 @@  (define_expand "ustrunc<mode><anyi_quad_truncated>2"
   }
 )
 
+(define_expand "sstrunc<mode><anyi_quad_truncated>2"
+  [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_sstrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_oct_truncated>2"
   [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
@@ -4433,6 +4453,16 @@  (define_expand "ustrunc<mode><anyi_oct_truncated>2"
   }
 )
 
+(define_expand "sstrunc<mode><anyi_oct_truncated>2"
+  [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_sstrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to