diff mbox series

[v3] RISC-V: Implement the .SAT_TRUNC for scalar

Message ID 20240715103528.2326447-1-pan2.li@intel.com
State New
Headers show
Series [v3] RISC-V: Implement the .SAT_TRUNC for scalar | expand

Commit Message

Li, Pan2 July 15, 2024, 10:35 a.m. UTC
From: Pan Li <pan2.li@intel.com>

Update in v3:
* Rebase the upstream.
* Adjust asm check.

Original log:
This patch would like to implement the simple .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
  NT __attribute__((noinline))             \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {                                        \
    bool overflow = x > (WT)(NT)(-1);      \
    return ((NT)x) | (NT)-overflow;        \
  }

DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  _Bool overflow;
  unsigned char _1;
  unsigned char _2;
  unsigned char _3;
  uint8_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  overflow_5 = x_4(D) > 255;
  _1 = (unsigned char) x_4(D);
  _2 = (unsigned char) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;    succ:       EXIT

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  uint8_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;    succ:       EXIT

}

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

	* config/riscv/iterators.md (ANYI_DOUBLE_TRUNC): Add new iterator
	for int double truncation.
	(ANYI_DOUBLE_TRUNCATED): Add new attr for int double truncation.
	(anyi_double_truncated): Ditto but for lowercase.
	* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
	func decl for expanding ustrunc
	* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
	impl to expand ustrunc.
	* config/riscv/riscv.md (ustrunc<mode><anyi_double_truncated>2): Impl
	the new pattern ustrunc<m><n>2 for int.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Adjust
	asm check times from 2 to 4.
	* gcc.target/riscv/sat_arith.h: Add test helper macro.
	* gcc.target/riscv/sat_arith_data.h: New test.
	* gcc.target/riscv/sat_u_trunc-1.c: New test.
	* gcc.target/riscv/sat_u_trunc-2.c: New test.
	* gcc.target/riscv/sat_u_trunc-3.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
	* gcc.target/riscv/scalar_sat_unary.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/iterators.md                 | 10 ++++
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv.cc                     | 40 +++++++++++++
 gcc/config/riscv/riscv.md                     | 10 ++++
 .../rvv/autovec/unop/vec_sat_u_trunc-1.c      |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h    | 16 ++++++
 .../gcc.target/riscv/sat_arith_data.h         | 56 +++++++++++++++++++
 .../gcc.target/riscv/sat_u_trunc-1.c          | 17 ++++++
 .../gcc.target/riscv/sat_u_trunc-2.c          | 20 +++++++
 .../gcc.target/riscv/sat_u_trunc-3.c          | 19 +++++++
 .../gcc.target/riscv/sat_u_trunc-run-1.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-2.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-3.c      | 16 ++++++
 .../gcc.target/riscv/scalar_sat_unary.h       | 22 ++++++++
 14 files changed, 260 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_arith_data.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h

Comments

Li, Pan2 July 22, 2024, 8:44 a.m. UTC | #1
Kindly ping.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Monday, July 15, 2024 6:35 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Li, Pan2 <pan2.li@intel.com>
Subject: [PATCH v3] RISC-V: Implement the .SAT_TRUNC for scalar

From: Pan Li <pan2.li@intel.com>

Update in v3:
* Rebase the upstream.
* Adjust asm check.

Original log:
This patch would like to implement the simple .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
  NT __attribute__((noinline))             \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {                                        \
    bool overflow = x > (WT)(NT)(-1);      \
    return ((NT)x) | (NT)-overflow;        \
  }

DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  _Bool overflow;
  unsigned char _1;
  unsigned char _2;
  unsigned char _3;
  uint8_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  overflow_5 = x_4(D) > 255;
  _1 = (unsigned char) x_4(D);
  _2 = (unsigned char) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;    succ:       EXIT

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  uint8_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;    succ:       EXIT

}

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

	* config/riscv/iterators.md (ANYI_DOUBLE_TRUNC): Add new iterator
	for int double truncation.
	(ANYI_DOUBLE_TRUNCATED): Add new attr for int double truncation.
	(anyi_double_truncated): Ditto but for lowercase.
	* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
	func decl for expanding ustrunc
	* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
	impl to expand ustrunc.
	* config/riscv/riscv.md (ustrunc<mode><anyi_double_truncated>2): Impl
	the new pattern ustrunc<m><n>2 for int.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Adjust
	asm check times from 2 to 4.
	* gcc.target/riscv/sat_arith.h: Add test helper macro.
	* gcc.target/riscv/sat_arith_data.h: New test.
	* gcc.target/riscv/sat_u_trunc-1.c: New test.
	* gcc.target/riscv/sat_u_trunc-2.c: New test.
	* gcc.target/riscv/sat_u_trunc-3.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
	* gcc.target/riscv/scalar_sat_unary.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/iterators.md                 | 10 ++++
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv.cc                     | 40 +++++++++++++
 gcc/config/riscv/riscv.md                     | 10 ++++
 .../rvv/autovec/unop/vec_sat_u_trunc-1.c      |  2 +-
 gcc/testsuite/gcc.target/riscv/sat_arith.h    | 16 ++++++
 .../gcc.target/riscv/sat_arith_data.h         | 56 +++++++++++++++++++
 .../gcc.target/riscv/sat_u_trunc-1.c          | 17 ++++++
 .../gcc.target/riscv/sat_u_trunc-2.c          | 20 +++++++
 .../gcc.target/riscv/sat_u_trunc-3.c          | 19 +++++++
 .../gcc.target/riscv/sat_u_trunc-run-1.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-2.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-3.c      | 16 ++++++
 .../gcc.target/riscv/scalar_sat_unary.h       | 22 ++++++++
 14 files changed, 260 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_arith_data.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index d61ed53a8b1..734da041f0c 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -65,6 +65,16 @@ (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
 ;; Iterator for hardware-supported integer modes.
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
+
+(define_mode_attr ANYI_DOUBLE_TRUNCATED [
+  (HI "QI") (SI "HI") (DI "SI")
+])
+
+(define_mode_attr anyi_double_truncated [
+  (HI "qi") (SI "hi") (DI "si")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
 			    (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 7c0ea1b445b..ce5e38d3dbb 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -135,6 +135,7 @@ riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d50ac611e1a..cc991ee8bab 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11603,6 +11603,46 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
 }
 
+/* Implement the unsigned saturation truncation for int mode.
+
+   b = SAT_TRUNC (a);
+   =>
+   1. max = half truncated max
+   2. lt = a < max
+   3. lt = lt - 1 (lt 0, ge -1)
+   4. d = a | lt
+   5. b = (trunc)d  */
+
+void
+riscv_expand_ustrunc (rtx dest, rtx src)
+{
+  machine_mode mode = GET_MODE (dest);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+  unsigned precision = GET_MODE_PRECISION (mode).to_constant ();
+
+  gcc_assert (precision < 64);
+
+  uint64_t max = ((uint64_t)1u << precision) - 1u;
+  rtx xmode_src = gen_lowpart (Xmode, src);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+
+  /* Step-1: max = half truncated max  */
+  emit_move_insn (xmode_max, gen_int_mode (max, Xmode));
+
+  /* Step-2: lt = src < max  */
+  riscv_emit_binary (LTU, xmode_lt, xmode_src, xmode_max);
+
+  /* Step-3: lt = lt - 1  */
+  riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode));
+
+  /* Step-4: xmode_dest = lt | src  */
+  riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_src);
+
+  /* Step-5: dest = xmode_dest  */
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode for
    TI_LONG_DOUBLE_TYPE which is for long double type, go with the
    default one for the others.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 5dee837a587..2963cfa1a86 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4301,6 +4301,16 @@ (define_expand "ussub<mode>3"
   }
 )
 
+(define_expand "ustrunc<mode><anyi_double_truncated>2"
+  [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
index dc9653947fc..1812828afe8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
@@ -16,4 +16,4 @@
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 75442c94dc1..37e0a60f21b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -214,4 +214,20 @@ sat_u_sub_##T##_fmt_12 (T x, T y)                      \
 #define RUN_SAT_U_SUB_FMT_11(T, x, y) sat_u_sub_##T##_fmt_11(x, y)
 #define RUN_SAT_U_SUB_FMT_12(T, x, y) sat_u_sub_##T##_fmt_12(x, y)
 
+/******************************************************************************/
+/* Saturation Truncate (unsigned and signed)                                  */
+/******************************************************************************/
+
+#define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
+NT __attribute__((noinline))             \
+sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
+{                                        \
+  bool overflow = x > (WT)(NT)(-1);      \
+  return ((NT)x) | (NT)-overflow;        \
+}
+#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT)
+
+#define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x)
+#define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
new file mode 100644
index 00000000000..b991f8aa955
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -0,0 +1,56 @@
+#ifndef HAVE_DEFINED_SAT_ARITH_DATA_H
+#define HAVE_DEFINED_SAT_ARITH_DATA_H
+
+#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s
+#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2)
+#define TEST_UNARY_STRUCT(T1, T2)       \
+  struct TEST_UNARY_STRUCT_NAME(T1, T2) \
+    {                                   \
+      T1 to;                            \
+      T2 from;                          \
+    };
+
+#define TEST_UNARY_DATA(T1, T2)      t_##T1##_##T2##_s
+#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
+
+TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint32_t, uint64_t)
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
+  TEST_UNARY_DATA(uint8_t, uint16_t)[] =
+{
+  {  0,     0},
+  {  2,     2},
+  {254,   254},
+  {255,   255},
+  {255,   256},
+  {255, 65534},
+  {255, 65535},
+};
+
+TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
+  TEST_UNARY_DATA(uint16_t, uint32_t)[] =
+{
+  {    0,          0},
+  {    5,          5},
+  {65534,      65534},
+  {65535,      65535},
+  {65535,      65536},
+  {65535, 4294967294},
+  {65535, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
+  TEST_UNARY_DATA(uint32_t, uint64_t)[] =
+{
+  {    0,                          0},
+  {    9,                          9},
+  {4294967294,            4294967294},
+  {4294967295,            4294967295},
+  {4294967295,            4294967296},
+  {4294967295, 18446744073709551614u},
+  {4294967295, 18446744073709551615u},
+};
+
+#endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
new file mode 100644
index 00000000000..354831005b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint16_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
new file mode 100644
index 00000000000..0001d8a9ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
new file mode 100644
index 00000000000..4359935a9d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint32_t_fmt_1:
+** li\s+[atx][0-9]+,\s*-1
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
new file mode 100644
index 00000000000..39a5ce2b675
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint16_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
new file mode 100644
index 00000000000..b98114a7dfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
new file mode 100644
index 00000000000..8a92a8c1f55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint32_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h b/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h
new file mode 100644
index 00000000000..2ae058724b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h
@@ -0,0 +1,22 @@
+#ifndef HAVE_DEFINED_SCALAR_SAT_UNARY
+#define HAVE_DEFINED_SCALAR_SAT_UNARY
+
+int
+main ()
+{
+  unsigned i;
+  T d;
+
+  for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++)
+    {
+      d = DATA[i];
+
+      if (RUN_UNARY (d.from) != d.to)
+	__builtin_abort ();
+    }
+
+  return 0;
+}
+
+#endif
+
Robin Dapp July 22, 2024, 3:27 p.m. UTC | #2
LGTM.
Li, Pan2 July 23, 2024, 1:24 a.m. UTC | #3
Committed, thanks Robin.

Pan

-----Original Message-----
From: Robin Dapp <rdapp.gcc@gmail.com> 
Sent: Monday, July 22, 2024 11:27 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com
Subject: Re: [PATCH v3] RISC-V: Implement the .SAT_TRUNC for scalar

LGTM.
diff mbox series

Patch

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index d61ed53a8b1..734da041f0c 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -65,6 +65,16 @@  (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
 ;; Iterator for hardware-supported integer modes.
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
+
+(define_mode_attr ANYI_DOUBLE_TRUNCATED [
+  (HI "QI") (SI "HI") (DI "SI")
+])
+
+(define_mode_attr anyi_double_truncated [
+  (HI "qi") (SI "hi") (DI "si")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
 			    (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 7c0ea1b445b..ce5e38d3dbb 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -135,6 +135,7 @@  riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d50ac611e1a..cc991ee8bab 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11603,6 +11603,46 @@  riscv_expand_ussub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
 }
 
+/* Implement the unsigned saturation truncation for int mode.
+
+   b = SAT_TRUNC (a);
+   =>
+   1. max = half truncated max
+   2. lt = a < max
+   3. lt = lt - 1 (lt 0, ge -1)
+   4. d = a | lt
+   5. b = (trunc)d  */
+
+void
+riscv_expand_ustrunc (rtx dest, rtx src)
+{
+  machine_mode mode = GET_MODE (dest);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+  unsigned precision = GET_MODE_PRECISION (mode).to_constant ();
+
+  gcc_assert (precision < 64);
+
+  uint64_t max = ((uint64_t)1u << precision) - 1u;
+  rtx xmode_src = gen_lowpart (Xmode, src);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+
+  /* Step-1: max = half truncated max  */
+  emit_move_insn (xmode_max, gen_int_mode (max, Xmode));
+
+  /* Step-2: lt = src < max  */
+  riscv_emit_binary (LTU, xmode_lt, xmode_src, xmode_max);
+
+  /* Step-3: lt = lt - 1  */
+  riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode));
+
+  /* Step-4: xmode_dest = lt | src  */
+  riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_src);
+
+  /* Step-5: dest = xmode_dest  */
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode for
    TI_LONG_DOUBLE_TYPE which is for long double type, go with the
    default one for the others.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 5dee837a587..2963cfa1a86 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4301,6 +4301,16 @@  (define_expand "ussub<mode>3"
   }
 )
 
+(define_expand "ustrunc<mode><anyi_double_truncated>2"
+  [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
index dc9653947fc..1812828afe8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
@@ -16,4 +16,4 @@ 
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint16_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 75442c94dc1..37e0a60f21b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -214,4 +214,20 @@  sat_u_sub_##T##_fmt_12 (T x, T y)                      \
 #define RUN_SAT_U_SUB_FMT_11(T, x, y) sat_u_sub_##T##_fmt_11(x, y)
 #define RUN_SAT_U_SUB_FMT_12(T, x, y) sat_u_sub_##T##_fmt_12(x, y)
 
+/******************************************************************************/
+/* Saturation Truncate (unsigned and signed)                                  */
+/******************************************************************************/
+
+#define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
+NT __attribute__((noinline))             \
+sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
+{                                        \
+  bool overflow = x > (WT)(NT)(-1);      \
+  return ((NT)x) | (NT)-overflow;        \
+}
+#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT)
+
+#define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x)
+#define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
new file mode 100644
index 00000000000..b991f8aa955
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -0,0 +1,56 @@ 
+#ifndef HAVE_DEFINED_SAT_ARITH_DATA_H
+#define HAVE_DEFINED_SAT_ARITH_DATA_H
+
+#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s
+#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2)
+#define TEST_UNARY_STRUCT(T1, T2)       \
+  struct TEST_UNARY_STRUCT_NAME(T1, T2) \
+    {                                   \
+      T1 to;                            \
+      T2 from;                          \
+    };
+
+#define TEST_UNARY_DATA(T1, T2)      t_##T1##_##T2##_s
+#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
+
+TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint32_t, uint64_t)
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
+  TEST_UNARY_DATA(uint8_t, uint16_t)[] =
+{
+  {  0,     0},
+  {  2,     2},
+  {254,   254},
+  {255,   255},
+  {255,   256},
+  {255, 65534},
+  {255, 65535},
+};
+
+TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
+  TEST_UNARY_DATA(uint16_t, uint32_t)[] =
+{
+  {    0,          0},
+  {    5,          5},
+  {65534,      65534},
+  {65535,      65535},
+  {65535,      65536},
+  {65535, 4294967294},
+  {65535, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
+  TEST_UNARY_DATA(uint32_t, uint64_t)[] =
+{
+  {    0,                          0},
+  {    9,                          9},
+  {4294967294,            4294967294},
+  {4294967295,            4294967295},
+  {4294967295,            4294967296},
+  {4294967295, 18446744073709551614u},
+  {4294967295, 18446744073709551615u},
+};
+
+#endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
new file mode 100644
index 00000000000..354831005b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint16_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
new file mode 100644
index 00000000000..0001d8a9ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
new file mode 100644
index 00000000000..4359935a9d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint32_t_fmt_1:
+** li\s+[atx][0-9]+,\s*-1
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
new file mode 100644
index 00000000000..39a5ce2b675
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint16_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
new file mode 100644
index 00000000000..b98114a7dfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
new file mode 100644
index 00000000000..8a92a8c1f55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint32_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h b/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h
new file mode 100644
index 00000000000..2ae058724b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h
@@ -0,0 +1,22 @@ 
+#ifndef HAVE_DEFINED_SCALAR_SAT_UNARY
+#define HAVE_DEFINED_SCALAR_SAT_UNARY
+
+int
+main ()
+{
+  unsigned i;
+  T d;
+
+  for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++)
+    {
+      d = DATA[i];
+
+      if (RUN_UNARY (d.from) != d.to)
+	__builtin_abort ();
+    }
+
+  return 0;
+}
+
+#endif
+