diff mbox series

[v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar

Message ID 20240723050601.1391636-1-pan2.li@intel.com
State New
Headers show
Series [v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar | expand

Commit Message

Li, Pan2 July 23, 2024, 5:06 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
  NT __attribute__((noinline))             \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {                                        \
    bool overflow = x > (WT)(NT)(-1);      \
    return ((NT)x) | (NT)-overflow;        \
  }

DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   _Bool overflow;
   8   │   short unsigned int _1;
   9   │   short unsigned int _2;
  10   │   short unsigned int _3;
  11   │   uint16_t _6;
  12   │
  13   │ ;;   basic block 2, loop depth 0
  14   │ ;;    pred:       ENTRY
  15   │   overflow_5 = x_4(D) > 65535;
  16   │   _1 = (short unsigned int) x_4(D);
  17   │   _2 = (short unsigned int) overflow_5;
  18   │   _3 = -_2;
  19   │   _6 = _1 | _3;
  20   │   return _6;
  21   │ ;;    succ:       EXIT
  22   │
  23   │ }

After this patch:
   3   │
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   uint16_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;    pred:       ENTRY
  11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _6;
  13   │ ;;    succ:       EXIT
  14   │
  15   │ }

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

	* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
	quad truncation.
	(ANYI_OCT_TRUNC): New iterator for oct truncation.
	(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
	(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
	(anyi_quad_truncated): Ditto but for lower case.
	(anyi_oct_truncated): Ditto but for lower case.
	* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
	Add new pattern for quad truncation.
	(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
	the expand dump check times.
	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
	* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
	* gcc.target/riscv/sat_u_trunc-4.c: New test.
	* gcc.target/riscv/sat_u_trunc-5.c: New test.
	* gcc.target/riscv/sat_u_trunc-6.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-6.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/iterators.md                 | 20 ++++++++
 gcc/config/riscv/riscv.md                     | 20 ++++++++
 .../rvv/autovec/unop/vec_sat_u_trunc-2.c      |  2 +-
 .../rvv/autovec/unop/vec_sat_u_trunc-3.c      |  2 +-
 .../gcc.target/riscv/sat_arith_data.h         | 51 +++++++++++++++++++
 .../gcc.target/riscv/sat_u_trunc-4.c          | 17 +++++++
 .../gcc.target/riscv/sat_u_trunc-5.c          | 17 +++++++
 .../gcc.target/riscv/sat_u_trunc-6.c          | 20 ++++++++
 .../gcc.target/riscv/sat_u_trunc-run-4.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-5.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-6.c      | 16 ++++++
 11 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c

Comments

Li, Pan2 July 31, 2024, 1:12 a.m. UTC | #1
Kindly ping.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Tuesday, July 23, 2024 1:06 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Li, Pan2 <pan2.li@intel.com>
Subject: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar

From: Pan Li <pan2.li@intel.com>

This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
  NT __attribute__((noinline))             \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {                                        \
    bool overflow = x > (WT)(NT)(-1);      \
    return ((NT)x) | (NT)-overflow;        \
  }

DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   _Bool overflow;
   8   │   short unsigned int _1;
   9   │   short unsigned int _2;
  10   │   short unsigned int _3;
  11   │   uint16_t _6;
  12   │
  13   │ ;;   basic block 2, loop depth 0
  14   │ ;;    pred:       ENTRY
  15   │   overflow_5 = x_4(D) > 65535;
  16   │   _1 = (short unsigned int) x_4(D);
  17   │   _2 = (short unsigned int) overflow_5;
  18   │   _3 = -_2;
  19   │   _6 = _1 | _3;
  20   │   return _6;
  21   │ ;;    succ:       EXIT
  22   │
  23   │ }

After this patch:
   3   │
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   uint16_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;    pred:       ENTRY
  11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _6;
  13   │ ;;    succ:       EXIT
  14   │
  15   │ }

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

	* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
	quad truncation.
	(ANYI_OCT_TRUNC): New iterator for oct truncation.
	(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
	(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
	(anyi_quad_truncated): Ditto but for lower case.
	(anyi_oct_truncated): Ditto but for lower case.
	* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
	Add new pattern for quad truncation.
	(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
	the expand dump check times.
	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
	* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
	* gcc.target/riscv/sat_u_trunc-4.c: New test.
	* gcc.target/riscv/sat_u_trunc-5.c: New test.
	* gcc.target/riscv/sat_u_trunc-6.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-6.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/iterators.md                 | 20 ++++++++
 gcc/config/riscv/riscv.md                     | 20 ++++++++
 .../rvv/autovec/unop/vec_sat_u_trunc-2.c      |  2 +-
 .../rvv/autovec/unop/vec_sat_u_trunc-3.c      |  2 +-
 .../gcc.target/riscv/sat_arith_data.h         | 51 +++++++++++++++++++
 .../gcc.target/riscv/sat_u_trunc-4.c          | 17 +++++++
 .../gcc.target/riscv/sat_u_trunc-5.c          | 17 +++++++
 .../gcc.target/riscv/sat_u_trunc-6.c          | 20 ++++++++
 .../gcc.target/riscv/sat_u_trunc-run-4.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-5.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-6.c      | 16 ++++++
 11 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 734da041f0c..bdcdb8babc8 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,14 +67,34 @@ (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
 (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
 (define_mode_attr ANYI_DOUBLE_TRUNCATED [
   (HI "QI") (SI "HI") (DI "SI")
 ])
 
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+  (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+  (DI "QI")
+])
+
 (define_mode_attr anyi_double_truncated [
   (HI "qi") (SI "hi") (DI "si")
 ])
 
+(define_mode_attr anyi_quad_truncated [
+  (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+  (DI "qi")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
 			    (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index d9f6c1765d0..cab643f7d82 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4325,6 +4325,26 @@ (define_expand "ustrunc<mode><anyi_double_truncated>2"
   }
 )
 
+(define_expand "ustrunc<mode><anyi_quad_truncated>2"
+  [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+(define_expand "ustrunc<mode><anyi_oct_truncated>2"
+  [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
index f1b1cc7e5d9..2516468fd16 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
@@ -18,4 +18,4 @@
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
index 30e4a15eca0..5df05f72cbb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
@@ -20,4 +20,4 @@
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index b991f8aa955..52e4e2b5f9f 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -14,7 +14,10 @@
 #define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
 
 TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint8_t, uint32_t)
+TEST_UNARY_STRUCT (uint8_t, uint64_t)
 TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint16_t, uint64_t)
 TEST_UNARY_STRUCT (uint32_t, uint64_t)
 
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
@@ -29,6 +32,39 @@ TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   {255, 65535},
 };
 
+TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \
+  TEST_UNARY_DATA(uint8_t, uint32_t)[] =
+{
+  {  0,          0},
+  {  2,          2},
+  {254,        254},
+  {255,        255},
+  {255,        256},
+  {255,      65534},
+  {255,      65535},
+  {255,      65536},
+  {255, 4294967294},
+  {255, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \
+  TEST_UNARY_DATA(uint8_t, uint64_t)[] =
+{
+  {  0,                     0},
+  {  2,                     2},
+  {254,                   254},
+  {255,                   255},
+  {255,                   256},
+  {255,                 65534},
+  {255,                 65535},
+  {255,                 65536},
+  {255,            4294967294},
+  {255,            4294967295},
+  {255,            4294967296},
+  {255, 18446744073709551614u},
+  {255, 18446744073709551615u},
+};
+
 TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
   TEST_UNARY_DATA(uint16_t, uint32_t)[] =
 {
@@ -41,6 +77,21 @@ TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
   {65535, 4294967295},
 };
 
+TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \
+  TEST_UNARY_DATA(uint16_t, uint64_t)[] =
+{
+  {    0,                     0},
+  {    5,                     5},
+  {65534,                 65534},
+  {65535,                 65535},
+  {65535,                 65536},
+  {65535,            4294967294},
+  {65535,            4294967295},
+  {65535,            4294967296},
+  {65535, 18446744073709551614u},
+  {65535, 18446744073709551615u},
+};
+
 TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
   TEST_UNARY_DATA(uint32_t, uint64_t)[] =
 {
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
new file mode 100644
index 00000000000..7d84b2689ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
new file mode 100644
index 00000000000..87e6c649fbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
new file mode 100644
index 00000000000..af410916fb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
new file mode 100644
index 00000000000..609f3540555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
new file mode 100644
index 00000000000..8629b3e956e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
new file mode 100644
index 00000000000..6d25869d239
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
Li, Pan2 Aug. 8, 2024, 1:26 a.m. UTC | #2
Kindly ping++.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Wednesday, July 31, 2024 9:12 AM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: RE: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar

Kindly ping.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Tuesday, July 23, 2024 1:06 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Li, Pan2 <pan2.li@intel.com>
Subject: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar

From: Pan Li <pan2.li@intel.com>

This patch would like to implement the quad and oct .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
  NT __attribute__((noinline))             \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {                                        \
    bool overflow = x > (WT)(NT)(-1);      \
    return ((NT)x) | (NT)-overflow;        \
  }

DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)

Before this patch:
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   _Bool overflow;
   8   │   short unsigned int _1;
   9   │   short unsigned int _2;
  10   │   short unsigned int _3;
  11   │   uint16_t _6;
  12   │
  13   │ ;;   basic block 2, loop depth 0
  14   │ ;;    pred:       ENTRY
  15   │   overflow_5 = x_4(D) > 65535;
  16   │   _1 = (short unsigned int) x_4(D);
  17   │   _2 = (short unsigned int) overflow_5;
  18   │   _3 = -_2;
  19   │   _6 = _1 | _3;
  20   │   return _6;
  21   │ ;;    succ:       EXIT
  22   │
  23   │ }

After this patch:
   3   │
   4   │ __attribute__((noinline))
   5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
   6   │ {
   7   │   uint16_t _6;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;    pred:       ENTRY
  11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
  12   │   return _6;
  13   │ ;;    succ:       EXIT
  14   │
  15   │ }

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

	* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
	quad truncation.
	(ANYI_OCT_TRUNC): New iterator for oct truncation.
	(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
	(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
	(anyi_quad_truncated): Ditto but for lower case.
	(anyi_oct_truncated): Ditto but for lower case.
	* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
	Add new pattern for quad truncation.
	(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
	the expand dump check times.
	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
	* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
	* gcc.target/riscv/sat_u_trunc-4.c: New test.
	* gcc.target/riscv/sat_u_trunc-5.c: New test.
	* gcc.target/riscv/sat_u_trunc-6.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-6.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/iterators.md                 | 20 ++++++++
 gcc/config/riscv/riscv.md                     | 20 ++++++++
 .../rvv/autovec/unop/vec_sat_u_trunc-2.c      |  2 +-
 .../rvv/autovec/unop/vec_sat_u_trunc-3.c      |  2 +-
 .../gcc.target/riscv/sat_arith_data.h         | 51 +++++++++++++++++++
 .../gcc.target/riscv/sat_u_trunc-4.c          | 17 +++++++
 .../gcc.target/riscv/sat_u_trunc-5.c          | 17 +++++++
 .../gcc.target/riscv/sat_u_trunc-6.c          | 20 ++++++++
 .../gcc.target/riscv/sat_u_trunc-run-4.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-5.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-6.c      | 16 ++++++
 11 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 734da041f0c..bdcdb8babc8 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,14 +67,34 @@ (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
 (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
 (define_mode_attr ANYI_DOUBLE_TRUNCATED [
   (HI "QI") (SI "HI") (DI "SI")
 ])
 
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+  (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+  (DI "QI")
+])
+
 (define_mode_attr anyi_double_truncated [
   (HI "qi") (SI "hi") (DI "si")
 ])
 
+(define_mode_attr anyi_quad_truncated [
+  (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+  (DI "qi")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
 			    (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index d9f6c1765d0..cab643f7d82 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4325,6 +4325,26 @@ (define_expand "ustrunc<mode><anyi_double_truncated>2"
   }
 )
 
+(define_expand "ustrunc<mode><anyi_quad_truncated>2"
+  [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+(define_expand "ustrunc<mode><anyi_oct_truncated>2"
+  [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
index f1b1cc7e5d9..2516468fd16 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
@@ -18,4 +18,4 @@
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
index 30e4a15eca0..5df05f72cbb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
@@ -20,4 +20,4 @@
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index b991f8aa955..52e4e2b5f9f 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -14,7 +14,10 @@
 #define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
 
 TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint8_t, uint32_t)
+TEST_UNARY_STRUCT (uint8_t, uint64_t)
 TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint16_t, uint64_t)
 TEST_UNARY_STRUCT (uint32_t, uint64_t)
 
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
@@ -29,6 +32,39 @@ TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   {255, 65535},
 };
 
+TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \
+  TEST_UNARY_DATA(uint8_t, uint32_t)[] =
+{
+  {  0,          0},
+  {  2,          2},
+  {254,        254},
+  {255,        255},
+  {255,        256},
+  {255,      65534},
+  {255,      65535},
+  {255,      65536},
+  {255, 4294967294},
+  {255, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \
+  TEST_UNARY_DATA(uint8_t, uint64_t)[] =
+{
+  {  0,                     0},
+  {  2,                     2},
+  {254,                   254},
+  {255,                   255},
+  {255,                   256},
+  {255,                 65534},
+  {255,                 65535},
+  {255,                 65536},
+  {255,            4294967294},
+  {255,            4294967295},
+  {255,            4294967296},
+  {255, 18446744073709551614u},
+  {255, 18446744073709551615u},
+};
+
 TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
   TEST_UNARY_DATA(uint16_t, uint32_t)[] =
 {
@@ -41,6 +77,21 @@ TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
   {65535, 4294967295},
 };
 
+TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \
+  TEST_UNARY_DATA(uint16_t, uint64_t)[] =
+{
+  {    0,                     0},
+  {    5,                     5},
+  {65534,                 65534},
+  {65535,                 65535},
+  {65535,                 65536},
+  {65535,            4294967294},
+  {65535,            4294967295},
+  {65535,            4294967296},
+  {65535, 18446744073709551614u},
+  {65535, 18446744073709551615u},
+};
+
 TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
   TEST_UNARY_DATA(uint32_t, uint64_t)[] =
 {
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
new file mode 100644
index 00000000000..7d84b2689ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
new file mode 100644
index 00000000000..87e6c649fbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
new file mode 100644
index 00000000000..af410916fb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
new file mode 100644
index 00000000000..609f3540555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
new file mode 100644
index 00000000000..8629b3e956e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
new file mode 100644
index 00000000000..6d25869d239
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
Jeff Law Aug. 17, 2024, 6:20 p.m. UTC | #3
On 7/22/24 11:06 PM, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to implement the quad and oct .SAT_TRUNC pattern
> in the riscv backend. Aka:
> 
> Form 1:
>    #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
>    NT __attribute__((noinline))             \
>    sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
>    {                                        \
>      bool overflow = x > (WT)(NT)(-1);      \
>      return ((NT)x) | (NT)-overflow;        \
>    }
> 
> DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
> 
> Before this patch:
>     4   │ __attribute__((noinline))
>     5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
>     6   │ {
>     7   │   _Bool overflow;
>     8   │   short unsigned int _1;
>     9   │   short unsigned int _2;
>    10   │   short unsigned int _3;
>    11   │   uint16_t _6;
>    12   │
>    13   │ ;;   basic block 2, loop depth 0
>    14   │ ;;    pred:       ENTRY
>    15   │   overflow_5 = x_4(D) > 65535;
>    16   │   _1 = (short unsigned int) x_4(D);
>    17   │   _2 = (short unsigned int) overflow_5;
>    18   │   _3 = -_2;
>    19   │   _6 = _1 | _3;
>    20   │   return _6;
>    21   │ ;;    succ:       EXIT
>    22   │
>    23   │ }
> 
> After this patch:
>     3   │
>     4   │ __attribute__((noinline))
>     5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
>     6   │ {
>     7   │   uint16_t _6;
>     8   │
>     9   │ ;;   basic block 2, loop depth 0
>    10   │ ;;    pred:       ENTRY
>    11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
>    12   │   return _6;
>    13   │ ;;    succ:       EXIT
>    14   │
>    15   │ }
> 
> The below tests suites are passed for this patch
> 1. The rv64gcv fully regression test.
> 2. The rv64gcv build with glibc
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
> 	quad truncation.
> 	(ANYI_OCT_TRUNC): New iterator for oct truncation.
> 	(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
> 	(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
> 	(anyi_quad_truncated): Ditto but for lower case.
> 	(anyi_oct_truncated): Ditto but for lower case.
> 	* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
> 	Add new pattern for quad truncation.
> 	(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
> 	the expand dump check times.
> 	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
> 	* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
> 	* gcc.target/riscv/sat_u_trunc-4.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-5.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-6.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-6.c: New test.
OK.  Sorry for the delays here.  I wanted to make sure we had the issues 
WRT operand extension resolved before diving into this.  But in 
retrospect, this probably could have moved forward independently.

Jeff
Li, Pan2 Aug. 18, 2024, 1:08 a.m. UTC | #4
> OK.  Sorry for the delays here.  I wanted to make sure we had the issues 
> WRT operand extension resolved before diving into this.  But in 
> retrospect, this probably could have moved forward independently.

That make much sense to me, thanks a lot.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Sunday, August 18, 2024 2:21 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] RISC-V: Implement the quad and oct .SAT_TRUNC for scalar



On 7/22/24 11:06 PM, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to implement the quad and oct .SAT_TRUNC pattern
> in the riscv backend. Aka:
> 
> Form 1:
>    #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
>    NT __attribute__((noinline))             \
>    sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
>    {                                        \
>      bool overflow = x > (WT)(NT)(-1);      \
>      return ((NT)x) | (NT)-overflow;        \
>    }
> 
> DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
> 
> Before this patch:
>     4   │ __attribute__((noinline))
>     5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
>     6   │ {
>     7   │   _Bool overflow;
>     8   │   short unsigned int _1;
>     9   │   short unsigned int _2;
>    10   │   short unsigned int _3;
>    11   │   uint16_t _6;
>    12   │
>    13   │ ;;   basic block 2, loop depth 0
>    14   │ ;;    pred:       ENTRY
>    15   │   overflow_5 = x_4(D) > 65535;
>    16   │   _1 = (short unsigned int) x_4(D);
>    17   │   _2 = (short unsigned int) overflow_5;
>    18   │   _3 = -_2;
>    19   │   _6 = _1 | _3;
>    20   │   return _6;
>    21   │ ;;    succ:       EXIT
>    22   │
>    23   │ }
> 
> After this patch:
>     3   │
>     4   │ __attribute__((noinline))
>     5   │ uint16_t sat_u_truc_uint64_t_to_uint16_t_fmt_1 (uint64_t x)
>     6   │ {
>     7   │   uint16_t _6;
>     8   │
>     9   │ ;;   basic block 2, loop depth 0
>    10   │ ;;    pred:       ENTRY
>    11   │   _6 = .SAT_TRUNC (x_4(D)); [tail call]
>    12   │   return _6;
>    13   │ ;;    succ:       EXIT
>    14   │
>    15   │ }
> 
> The below tests suites are passed for this patch
> 1. The rv64gcv fully regression test.
> 2. The rv64gcv build with glibc
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/iterators.md (ANYI_QUAD_TRUNC): New iterator for
> 	quad truncation.
> 	(ANYI_OCT_TRUNC): New iterator for oct truncation.
> 	(ANYI_QUAD_TRUNCATED): New attr for truncated quad modes.
> 	(ANYI_OCT_TRUNCATED): New attr for truncated oct modes.
> 	(anyi_quad_truncated): Ditto but for lower case.
> 	(anyi_oct_truncated): Ditto but for lower case.
> 	* config/riscv/riscv.md (ustrunc<mode><anyi_quad_truncated>2):
> 	Add new pattern for quad truncation.
> 	(ustrunc<mode><anyi_oct_truncated>2): Ditto but for oct.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Adjust
> 	the expand dump check times.
> 	* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
> 	* gcc.target/riscv/sat_arith_data.h: Add test helper macros.
> 	* gcc.target/riscv/sat_u_trunc-4.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-5.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-6.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-4.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-5.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-6.c: New test.
OK.  Sorry for the delays here.  I wanted to make sure we had the issues 
WRT operand extension resolved before diving into this.  But in 
retrospect, this probably could have moved forward independently.

Jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 734da041f0c..bdcdb8babc8 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,14 +67,34 @@  (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
 (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
+
+(define_mode_iterator ANYI_OCT_TRUNC [(DI "TARGET_64BIT")])
+
 (define_mode_attr ANYI_DOUBLE_TRUNCATED [
   (HI "QI") (SI "HI") (DI "SI")
 ])
 
+(define_mode_attr ANYI_QUAD_TRUNCATED [
+  (SI "QI") (DI "HI")
+])
+
+(define_mode_attr ANYI_OCT_TRUNCATED [
+  (DI "QI")
+])
+
 (define_mode_attr anyi_double_truncated [
   (HI "qi") (SI "hi") (DI "si")
 ])
 
+(define_mode_attr anyi_quad_truncated [
+  (SI "qi") (DI "hi")
+])
+
+(define_mode_attr anyi_oct_truncated [
+  (DI "qi")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
 			    (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index d9f6c1765d0..cab643f7d82 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4325,6 +4325,26 @@  (define_expand "ustrunc<mode><anyi_double_truncated>2"
   }
 )
 
+(define_expand "ustrunc<mode><anyi_quad_truncated>2"
+  [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+(define_expand "ustrunc<mode><anyi_oct_truncated>2"
+  [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
index f1b1cc7e5d9..2516468fd16 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c
@@ -18,4 +18,4 @@ 
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint32_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
index 30e4a15eca0..5df05f72cbb 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c
@@ -20,4 +20,4 @@ 
 */
 DEF_VEC_SAT_U_TRUNC_FMT_1 (uint8_t, uint64_t)
 
-/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 4 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index b991f8aa955..52e4e2b5f9f 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -14,7 +14,10 @@ 
 #define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
 
 TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint8_t, uint32_t)
+TEST_UNARY_STRUCT (uint8_t, uint64_t)
 TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint16_t, uint64_t)
 TEST_UNARY_STRUCT (uint32_t, uint64_t)
 
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
@@ -29,6 +32,39 @@  TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   {255, 65535},
 };
 
+TEST_UNARY_STRUCT_DECL(uint8_t, uint32_t) \
+  TEST_UNARY_DATA(uint8_t, uint32_t)[] =
+{
+  {  0,          0},
+  {  2,          2},
+  {254,        254},
+  {255,        255},
+  {255,        256},
+  {255,      65534},
+  {255,      65535},
+  {255,      65536},
+  {255, 4294967294},
+  {255, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint64_t) \
+  TEST_UNARY_DATA(uint8_t, uint64_t)[] =
+{
+  {  0,                     0},
+  {  2,                     2},
+  {254,                   254},
+  {255,                   255},
+  {255,                   256},
+  {255,                 65534},
+  {255,                 65535},
+  {255,                 65536},
+  {255,            4294967294},
+  {255,            4294967295},
+  {255,            4294967296},
+  {255, 18446744073709551614u},
+  {255, 18446744073709551615u},
+};
+
 TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
   TEST_UNARY_DATA(uint16_t, uint32_t)[] =
 {
@@ -41,6 +77,21 @@  TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
   {65535, 4294967295},
 };
 
+TEST_UNARY_STRUCT_DECL(uint16_t, uint64_t) \
+  TEST_UNARY_DATA(uint16_t, uint64_t)[] =
+{
+  {    0,                     0},
+  {    5,                     5},
+  {65534,                 65534},
+  {65535,                 65535},
+  {65535,                 65536},
+  {65535,            4294967294},
+  {65535,            4294967295},
+  {65535,            4294967296},
+  {65535, 18446744073709551614u},
+  {65535, 18446744073709551615u},
+};
+
 TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
   TEST_UNARY_DATA(uint32_t, uint64_t)[] =
 {
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
new file mode 100644
index 00000000000..7d84b2689ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-4.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
new file mode 100644
index 00000000000..87e6c649fbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-5.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
new file mode 100644
index 00000000000..af410916fb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-6.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
new file mode 100644
index 00000000000..609f3540555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-4.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
new file mode 100644
index 00000000000..8629b3e956e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-5.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
new file mode 100644
index 00000000000..6d25869d239
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-6.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"