[v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

Message ID	20240829062542.3451179-1-pan2.li@intel.com
State	New
Headers	show Return-Path: <gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 432AB3858D39 From: pan2.li@intel.com To: gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, jeffreyalaw@gmail.com, rdapp.gcc@gmail.com, Pan Li <pan2.li@intel.com> Subject: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD Date: Thu, 29 Aug 2024 14:25:42 +0800 Message-ID: <20240829062542.3451179-1-pan2.li@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	[v1] RISC-V: Support form 1 of integer scalar .SAT_ADD \| expand [v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

Message ID

20240829062542.3451179-1-pan2.li@intel.com

State

New

Headers

DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 432AB3858D39
From: pan2.li@intel.com
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, jeffreyalaw@gmail.com,
 rdapp.gcc@gmail.com, Pan Li <pan2.li@intel.com>
Subject: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD
Date: Thu, 29 Aug 2024 14:25:42 +0800
Message-ID: <20240829062542.3451179-1-pan2.li@intel.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Precedence: list
Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org

Series

[v1] RISC-V: Support form 1 of integer scalar .SAT_ADD | expand

Commit Message

Li, Pan2 Aug. 29, 2024, 6:25 a.m. UTC

From: Pan Li <pan2.li@intel.com>

This patch would like to support the scalar signed ssadd pattern
for the RISC-V backend.  Aka

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))                  \
  sat_s_add_##T##_fmt_1 (T x, T y)             \
  {                                            \
    T sum = (UT)x + (UT)y;                     \
    return (x ^ y) < 0                         \
      ? sum                                    \
      : (sum ^ x) >= 0                         \
        ? sum                                  \
        : x < 0 ? MIN : MAX;                   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

Before this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │     mv   a5,a0
  12   │     add  a0,a0,a1
  13   │     xor  a1,a5,a1
  14   │     not  a1,a1
  15   │     xor  a4,a5,a0
  16   │     and  a1,a1,a4
  17   │     blt  a1,zero,.L5
  18   │     ret
  19   │ .L5:
  20   │     srai a5,a5,63
  21   │     li   a0,-1
  22   │     srli a0,a0,1
  23   │     xor  a0,a5,a0
  24   │     ret

After this patch:
  10   │ sat_s_add_int64_t_fmt_1:
  11   │     add  a2,a0,a1
  12   │     xor  a1,a0,a1
  13   │     xor  a5,a0,a2
  14   │     srli a5,a5,63
  15   │     srli a1,a1,63
  16   │     xori a1,a1,1
  17   │     and  a5,a5,a1
  18   │     srai a4,a0,63
  19   │     li   a3,-1
  20   │     srli a3,a3,1
  21   │     xor  a3,a3,a4
  22   │     neg  a4,a5
  23   │     and  a3,a3,a4
  24   │     addi a5,a5,-1
  25   │     and  a0,a2,a5
  26   │     or   a0,a0,a3
  27   │     ret

The below test suites are passed for this patch:
1. The rv64gcv fully regression test.

gcc/ChangeLog:

	* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
	decl for expanding ssadd.
	* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
	impl to gen the max int rtx.
	(riscv_expand_ssadd): Add new func impl to expand the ssadd.
	* config/riscv/riscv.md (ssadd<mode>3): Add new pattern for
	signed integer .SAT_ADD.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/sat_arith.h: Add test helper macros.
	* gcc.target/riscv/sat_arith_data.h: Add test data.
	* gcc.target/riscv/sat_s_add-1.c: New test.
	* gcc.target/riscv/sat_s_add-2.c: New test.
	* gcc.target/riscv/sat_s_add-3.c: New test.
	* gcc.target/riscv/sat_s_add-4.c: New test.
	* gcc.target/riscv/sat_s_add-run-1.c: New test.
	* gcc.target/riscv/sat_s_add-run-2.c: New test.
	* gcc.target/riscv/sat_s_add-run-3.c: New test.
	* gcc.target/riscv/sat_s_add-run-4.c: New test.
	* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv.cc                     | 90 +++++++++++++++++++
 gcc/config/riscv/riscv.md                     | 11 +++
 gcc/testsuite/gcc.target/riscv/sat_arith.h    | 17 ++++
 .../gcc.target/riscv/sat_arith_data.h         | 85 ++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c  | 30 +++++++
 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c  | 32 +++++++
 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c  | 31 +++++++
 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c  | 30 +++++++
 .../gcc.target/riscv/sat_s_add-run-1.c        | 16 ++++
 .../gcc.target/riscv/sat_s_add-run-2.c        | 16 ++++
 .../gcc.target/riscv/sat_s_add-run-3.c        | 16 ++++
 .../gcc.target/riscv/sat_s_add-run-4.c        | 16 ++++
 .../riscv/scalar_sat_binary_run_xxx.h         | 26 ++++++
 14 files changed, 417 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h

Comments

Jeff Law Sept. 1, 2024, 3:35 p.m. UTC | #1

On 8/29/24 12:25 AM, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to support the scalar signed ssadd pattern
> for the RISC-V backend.  Aka
> 
> Form 1:
>    #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>    T __attribute__((noinline))                  \
>    sat_s_add_##T##_fmt_1 (T x, T y)             \
>    {                                            \
>      T sum = (UT)x + (UT)y;                     \
>      return (x ^ y) < 0                         \
>        ? sum                                    \
>        : (sum ^ x) >= 0                         \
>          ? sum                                  \
>          : x < 0 ? MIN : MAX;                   \
>    }
> 
> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
> 
> Before this patch:
>    10   │ sat_s_add_int64_t_fmt_1:
>    11   │     mv   a5,a0
>    12   │     add  a0,a0,a1
>    13   │     xor  a1,a5,a1
>    14   │     not  a1,a1
>    15   │     xor  a4,a5,a0
>    16   │     and  a1,a1,a4
>    17   │     blt  a1,zero,.L5
>    18   │     ret
>    19   │ .L5:
>    20   │     srai a5,a5,63
>    21   │     li   a0,-1
>    22   │     srli a0,a0,1
>    23   │     xor  a0,a5,a0
>    24   │     ret
> 
> After this patch:
>    10   │ sat_s_add_int64_t_fmt_1:
>    11   │     add  a2,a0,a1
>    12   │     xor  a1,a0,a1
>    13   │     xor  a5,a0,a2
>    14   │     srli a5,a5,63
>    15   │     srli a1,a1,63
>    16   │     xori a1,a1,1
>    17   │     and  a5,a5,a1
>    18   │     srai a4,a0,63
>    19   │     li   a3,-1
>    20   │     srli a3,a3,1
>    21   │     xor  a3,a3,a4
>    22   │     neg  a4,a5
>    23   │     and  a3,a3,a4
>    24   │     addi a5,a5,-1
>    25   │     and  a0,a2,a5
>    26   │     or   a0,a0,a3
>    27   │     ret
> 
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression test.
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
> 	decl for expanding ssadd.
> 	* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
> 	impl to gen the max int rtx.
> 	(riscv_expand_ssadd): Add new func impl to expand the ssadd.
> 	* config/riscv/riscv.md (ssadd<mode>3): Add new pattern for
> 	signed integer .SAT_ADD.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/sat_arith.h: Add test helper macros.
> 	* gcc.target/riscv/sat_arith_data.h: Add test data.
> 	* gcc.target/riscv/sat_s_add-1.c: New test.
> 	* gcc.target/riscv/sat_s_add-2.c: New test.
> 	* gcc.target/riscv/sat_s_add-3.c: New test.
> 	* gcc.target/riscv/sat_s_add-4.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-1.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-2.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-3.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-4.c: New test.
> 	* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.
OK.  Presumably the code you're getting here is more efficient than 
whatever standard expansion would provide?  If so, should we be looking 
at moving some of this stuff into generic expanders?  I don't really see 
anything all that target specific here.

jeff

Li, Pan2 Sept. 2, 2024, 2:50 a.m. UTC | #2

Thanks Jeff for comments.

> OK.  Presumably the code you're getting here is more efficient than 
> whatever standard expansion would provide?  If so, should we be looking 
> at moving some of this stuff into generic expanders?  I don't really see 
> anything all that target specific here.

Mostly for that we can eliminate the branch for .SAT_ADD in scalar. Given we
don't have one SAT_ADD like insn like RVV vsadd.vv/vx/vi.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Sunday, September 1, 2024 11:35 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD



On 8/29/24 12:25 AM, pan2.li@intel.com wrote:
> From: Pan Li <pan2.li@intel.com>
> 
> This patch would like to support the scalar signed ssadd pattern
> for the RISC-V backend.  Aka
> 
> Form 1:
>    #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>    T __attribute__((noinline))                  \
>    sat_s_add_##T##_fmt_1 (T x, T y)             \
>    {                                            \
>      T sum = (UT)x + (UT)y;                     \
>      return (x ^ y) < 0                         \
>        ? sum                                    \
>        : (sum ^ x) >= 0                         \
>          ? sum                                  \
>          : x < 0 ? MIN : MAX;                   \
>    }
> 
> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
> 
> Before this patch:
>    10   │ sat_s_add_int64_t_fmt_1:
>    11   │     mv   a5,a0
>    12   │     add  a0,a0,a1
>    13   │     xor  a1,a5,a1
>    14   │     not  a1,a1
>    15   │     xor  a4,a5,a0
>    16   │     and  a1,a1,a4
>    17   │     blt  a1,zero,.L5
>    18   │     ret
>    19   │ .L5:
>    20   │     srai a5,a5,63
>    21   │     li   a0,-1
>    22   │     srli a0,a0,1
>    23   │     xor  a0,a5,a0
>    24   │     ret
> 
> After this patch:
>    10   │ sat_s_add_int64_t_fmt_1:
>    11   │     add  a2,a0,a1
>    12   │     xor  a1,a0,a1
>    13   │     xor  a5,a0,a2
>    14   │     srli a5,a5,63
>    15   │     srli a1,a1,63
>    16   │     xori a1,a1,1
>    17   │     and  a5,a5,a1
>    18   │     srai a4,a0,63
>    19   │     li   a3,-1
>    20   │     srli a3,a3,1
>    21   │     xor  a3,a3,a4
>    22   │     neg  a4,a5
>    23   │     and  a3,a3,a4
>    24   │     addi a5,a5,-1
>    25   │     and  a0,a2,a5
>    26   │     or   a0,a0,a3
>    27   │     ret
> 
> The below test suites are passed for this patch:
> 1. The rv64gcv fully regression test.
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
> 	decl for expanding ssadd.
> 	* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
> 	impl to gen the max int rtx.
> 	(riscv_expand_ssadd): Add new func impl to expand the ssadd.
> 	* config/riscv/riscv.md (ssadd<mode>3): Add new pattern for
> 	signed integer .SAT_ADD.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/sat_arith.h: Add test helper macros.
> 	* gcc.target/riscv/sat_arith_data.h: Add test data.
> 	* gcc.target/riscv/sat_s_add-1.c: New test.
> 	* gcc.target/riscv/sat_s_add-2.c: New test.
> 	* gcc.target/riscv/sat_s_add-3.c: New test.
> 	* gcc.target/riscv/sat_s_add-4.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-1.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-2.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-3.c: New test.
> 	* gcc.target/riscv/sat_s_add-run-4.c: New test.
> 	* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.
OK.  Presumably the code you're getting here is more efficient than 
whatever standard expansion would provide?  If so, should we be looking 
at moving some of this stuff into generic expanders?  I don't really see 
anything all that target specific here.

jeff

Jeff Law Sept. 2, 2024, 3:31 a.m. UTC | #3

On 9/1/24 8:50 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> OK.  Presumably the code you're getting here is more efficient than
>> whatever standard expansion would provide?  If so, should we be looking
>> at moving some of this stuff into generic expanders?  I don't really see
>> anything all that target specific here.
> 
> Mostly for that we can eliminate the branch for .SAT_ADD in scalar. Given we
> don't have one SAT_ADD like insn like RVV vsadd.vv/vx/vi.
But I would expect that may be beneficial on other targets as well. 
It's not conceptually a lot different than what we do basic arithmetic 
with overflow, which has generic expansion which can be overridden by 
target specific expanders.  See expand_addsub_overflow.

Again, I think this is OK, but I'm thinking we probably want something 
more generic in the longer term.

The other question that I think Robin initially raised to me privately 
is whether or not the sequences we're generating are well suited for 
zicond or not.  If not, we might want to consider adjustments to either 
generate zicond if-then-else constructs during initial code generation 
or bias initial code generator towards sequences that ifcvt & combine 
can turn into zicond.  But again not strictly necessary for this patch 
to go forward, more a potential avenue for further improvements.


> 
> Pan
> 
> -----Original Message-----
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Sunday, September 1, 2024 11:35 PM
> To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD
> 
> 
> 
> On 8/29/24 12:25 AM, pan2.li@intel.com wrote:
>> From: Pan Li <pan2.li@intel.com>
>>
>> This patch would like to support the scalar signed ssadd pattern
>> for the RISC-V backend.  Aka
>>
>> Form 1:
>>     #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>>     T __attribute__((noinline))                  \
>>     sat_s_add_##T##_fmt_1 (T x, T y)             \
>>     {                                            \
>>       T sum = (UT)x + (UT)y;                     \
>>       return (x ^ y) < 0                         \
>>         ? sum                                    \
>>         : (sum ^ x) >= 0                         \
>>           ? sum                                  \
>>           : x < 0 ? MIN : MAX;                   \
>>     }
>>
>> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
>>
>> Before this patch:
>>     10   │ sat_s_add_int64_t_fmt_1:
>>     11   │     mv   a5,a0
>>     12   │     add  a0,a0,a1
>>     13   │     xor  a1,a5,a1
>>     14   │     not  a1,a1
>>     15   │     xor  a4,a5,a0
>>     16   │     and  a1,a1,a4
>>     17   │     blt  a1,zero,.L5
>>     18   │     ret
>>     19   │ .L5:
>>     20   │     srai a5,a5,63
>>     21   │     li   a0,-1
>>     22   │     srli a0,a0,1
>>     23   │     xor  a0,a5,a0
>>     24   │     ret
>>
>> After this patch:
>>     10   │ sat_s_add_int64_t_fmt_1:
>>     11   │     add  a2,a0,a1
>>     12   │     xor  a1,a0,a1
>>     13   │     xor  a5,a0,a2
>>     14   │     srli a5,a5,63
>>     15   │     srli a1,a1,63
>>     16   │     xori a1,a1,1
>>     17   │     and  a5,a5,a1
>>     18   │     srai a4,a0,63
>>     19   │     li   a3,-1
>>     20   │     srli a3,a3,1
>>     21   │     xor  a3,a3,a4
>>     22   │     neg  a4,a5
>>     23   │     and  a3,a3,a4
>>     24   │     addi a5,a5,-1
>>     25   │     and  a0,a2,a5
>>     26   │     or   a0,a0,a3
>>     27   │     ret
>>
>> The below test suites are passed for this patch:
>> 1. The rv64gcv fully regression test.
>>
>> gcc/ChangeLog:
>>
>> 	* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
>> 	decl for expanding ssadd.
>> 	* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
>> 	impl to gen the max int rtx.
>> 	(riscv_expand_ssadd): Add new func impl to expand the ssadd.
>> 	* config/riscv/riscv.md (ssadd<mode>3): Add new pattern for
>> 	signed integer .SAT_ADD.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/riscv/sat_arith.h: Add test helper macros.
>> 	* gcc.target/riscv/sat_arith_data.h: Add test data.
>> 	* gcc.target/riscv/sat_s_add-1.c: New test.
>> 	* gcc.target/riscv/sat_s_add-2.c: New test.
>> 	* gcc.target/riscv/sat_s_add-3.c: New test.
>> 	* gcc.target/riscv/sat_s_add-4.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-1.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-2.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-3.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-4.c: New test.
>> 	* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.
> OK.  Presumably the code you're getting here is more efficient than
> whatever standard expansion would provide?  If so, should we be looking
> at moving some of this stuff into generic expanders?  I don't really see
> anything all that target specific here.
> 
> jeff
>

Li, Pan2 Sept. 2, 2024, 4:48 a.m. UTC | #4

Thanks Jeff.

> But I would expect that may be beneficial on other targets as well.
I think x86 have the similar insn for saturation, for example as paddsw in below link.
https://www.felixcloutier.com/x86/paddsb:paddsw

And the backend of x86 implemented some of them already I bet, like usadd, ussub.

> The other question that I think Robin initially raised to me privately 
> is whether or not the sequences we're generating are well suited for 
> zicond or not.  

Got it, cmov like insn is well designed for such case(s). We can consider the best
practice to leverage zicond ext in further improvements.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Monday, September 2, 2024 11:32 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD



On 9/1/24 8:50 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> OK.  Presumably the code you're getting here is more efficient than
>> whatever standard expansion would provide?  If so, should we be looking
>> at moving some of this stuff into generic expanders?  I don't really see
>> anything all that target specific here.
> 
> Mostly for that we can eliminate the branch for .SAT_ADD in scalar. Given we
> don't have one SAT_ADD like insn like RVV vsadd.vv/vx/vi.
But I would expect that may be beneficial on other targets as well. 
It's not conceptually a lot different than what we do basic arithmetic 
with overflow, which has generic expansion which can be overridden by 
target specific expanders.  See expand_addsub_overflow.

Again, I think this is OK, but I'm thinking we probably want something 
more generic in the longer term.

The other question that I think Robin initially raised to me privately 
is whether or not the sequences we're generating are well suited for 
zicond or not.  If not, we might want to consider adjustments to either 
generate zicond if-then-else constructs during initial code generation 
or bias initial code generator towards sequences that ifcvt & combine 
can turn into zicond.  But again not strictly necessary for this patch 
to go forward, more a potential avenue for further improvements.


> 
> Pan
> 
> -----Original Message-----
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Sunday, September 1, 2024 11:35 PM
> To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] RISC-V: Support form 1 of integer scalar .SAT_ADD
> 
> 
> 
> On 8/29/24 12:25 AM, pan2.li@intel.com wrote:
>> From: Pan Li <pan2.li@intel.com>
>>
>> This patch would like to support the scalar signed ssadd pattern
>> for the RISC-V backend.  Aka
>>
>> Form 1:
>>     #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>>     T __attribute__((noinline))                  \
>>     sat_s_add_##T##_fmt_1 (T x, T y)             \
>>     {                                            \
>>       T sum = (UT)x + (UT)y;                     \
>>       return (x ^ y) < 0                         \
>>         ? sum                                    \
>>         : (sum ^ x) >= 0                         \
>>           ? sum                                  \
>>           : x < 0 ? MIN : MAX;                   \
>>     }
>>
>> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
>>
>> Before this patch:
>>     10   │ sat_s_add_int64_t_fmt_1:
>>     11   │     mv   a5,a0
>>     12   │     add  a0,a0,a1
>>     13   │     xor  a1,a5,a1
>>     14   │     not  a1,a1
>>     15   │     xor  a4,a5,a0
>>     16   │     and  a1,a1,a4
>>     17   │     blt  a1,zero,.L5
>>     18   │     ret
>>     19   │ .L5:
>>     20   │     srai a5,a5,63
>>     21   │     li   a0,-1
>>     22   │     srli a0,a0,1
>>     23   │     xor  a0,a5,a0
>>     24   │     ret
>>
>> After this patch:
>>     10   │ sat_s_add_int64_t_fmt_1:
>>     11   │     add  a2,a0,a1
>>     12   │     xor  a1,a0,a1
>>     13   │     xor  a5,a0,a2
>>     14   │     srli a5,a5,63
>>     15   │     srli a1,a1,63
>>     16   │     xori a1,a1,1
>>     17   │     and  a5,a5,a1
>>     18   │     srai a4,a0,63
>>     19   │     li   a3,-1
>>     20   │     srli a3,a3,1
>>     21   │     xor  a3,a3,a4
>>     22   │     neg  a4,a5
>>     23   │     and  a3,a3,a4
>>     24   │     addi a5,a5,-1
>>     25   │     and  a0,a2,a5
>>     26   │     or   a0,a0,a3
>>     27   │     ret
>>
>> The below test suites are passed for this patch:
>> 1. The rv64gcv fully regression test.
>>
>> gcc/ChangeLog:
>>
>> 	* config/riscv/riscv-protos.h (riscv_expand_ssadd): Add new func
>> 	decl for expanding ssadd.
>> 	* config/riscv/riscv.cc (riscv_gen_sign_max_cst): Add new func
>> 	impl to gen the max int rtx.
>> 	(riscv_expand_ssadd): Add new func impl to expand the ssadd.
>> 	* config/riscv/riscv.md (ssadd<mode>3): Add new pattern for
>> 	signed integer .SAT_ADD.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/riscv/sat_arith.h: Add test helper macros.
>> 	* gcc.target/riscv/sat_arith_data.h: Add test data.
>> 	* gcc.target/riscv/sat_s_add-1.c: New test.
>> 	* gcc.target/riscv/sat_s_add-2.c: New test.
>> 	* gcc.target/riscv/sat_s_add-3.c: New test.
>> 	* gcc.target/riscv/sat_s_add-4.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-1.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-2.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-3.c: New test.
>> 	* gcc.target/riscv/sat_s_add-run-4.c: New test.
>> 	* gcc.target/riscv/scalar_sat_binary_run_xxx.h: New test.
> OK.  Presumably the code you're getting here is more efficient than
> whatever standard expansion would provide?  If so, should we be looking
> at moving some of this stuff into generic expanders?  I don't really see
> anything all that target specific here.
> 
> jeff
>

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 926899ccad6..3358e3887b9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -134,6 +134,7 @@  extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
+extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e9b1b9bc3ad..e2b28a278f6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11947,6 +11947,96 @@  riscv_expand_usadd (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Return a new const RTX of MAX value based on given mode.  Only
+   int scalar mode is allowed.  */
+
+static rtx
+riscv_gen_sign_max_cst (machine_mode mode)
+{
+  switch (mode)
+    {
+    case QImode:
+      return GEN_INT (INT8_MAX);
+    case HImode:
+      return GEN_INT (INT16_MAX);
+    case SImode:
+      return GEN_INT (INT32_MAX);
+    case DImode:
+      return GEN_INT (INT64_MAX);
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implements the signed saturation sub standard name ssadd for int mode.
+
+   z = SAT_ADD(x, y).
+   =>
+   1.  sum = x + y
+   2.  xor_0 = x ^ y
+   3.  xor_1 = x ^ sum
+   4.  lt = xor_1 < 0
+   5.  ge = xor_0 >= 0
+   6.  and = ge & lt
+   7.  lt = x < 0
+   8.  neg = -lt
+   9.  max = INT_MAX
+   10. max = max ^ neg
+   11. neg = -and
+   12. max = max & neg
+   13. and = and - 1
+   14. z = sum & and
+   15. z = z | max  */
+
+void
+riscv_expand_ssadd (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+  rtx shift_bits = GEN_INT (bitsize - 1);
+  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_sum = gen_reg_rtx (Xmode);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+  rtx xmode_xor_0 = gen_reg_rtx (Xmode);
+  rtx xmode_xor_1 = gen_reg_rtx (Xmode);
+  rtx xmode_ge = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_neg = gen_reg_rtx (Xmode);
+  rtx xmode_and = gen_reg_rtx (Xmode);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+
+  /* Step-1: sum = x + y, xor_0 = x ^ y, xor_1 = x ^ sum.  */
+  riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_sum);
+
+  /* Step-2: lt = xor_1 < 0, ge = xor_0 >= 0, and = ge & lt.  */
+  riscv_emit_binary (LSHIFTRT, xmode_lt, xmode_xor_1, shift_bits);
+  riscv_emit_binary (LSHIFTRT, xmode_ge, xmode_xor_0, shift_bits);
+  riscv_emit_binary (XOR, xmode_ge, xmode_ge, CONST1_RTX (Xmode));
+  riscv_emit_binary (AND, xmode_and, xmode_lt, xmode_ge);
+  riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode));
+
+  /* Step-3: lt = x < 0, neg = -lt  */
+  riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_neg, xmode_lt);
+
+  /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg  */
+  riscv_emit_move (xmode_max, riscv_gen_sign_max_cst (mode));
+  riscv_emit_binary (XOR, xmode_max, xmode_max, xmode_neg);
+  riscv_emit_unary (NEG, xmode_neg, xmode_and);
+  riscv_emit_binary (AND, xmode_max, xmode_max, xmode_neg);
+
+  /* Step-5: and = and - 1, dest = sum & and  */
+  riscv_emit_binary (PLUS, xmode_and, xmode_and, CONSTM1_RTX (Xmode));
+  riscv_emit_binary (AND, xmode_dest, xmode_sum, xmode_and);
+
+  /* Step-6: xmode_dest = xmode_dest | xmode_max, dest = xmode_dest  */
+  riscv_emit_binary (IOR, xmode_dest, xmode_dest, xmode_max);
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
 /* Generate a REG rtx of Xmode from the given rtx and mode.
    The rtx x can be REG (QI/HI/SI/DI) or const_int.
    The machine_mode mode is the original mode from define pattern.
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 3289ed2155a..789faf66cb8 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4367,6 +4367,17 @@  (define_expand "usadd<mode>3"
   }
 )
 
+(define_expand "ssadd<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "register_operand")
+   (match_operand:ANYI 2 "register_operand")]
+  ""
+  {
+    riscv_expand_ssadd (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (define_expand "ussub<mode>3"
   [(match_operand:ANYI 0 "register_operand")
    (match_operand:ANYI 1 "reg_or_int_operand")
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index a899979904b..86cd6bea8df 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -118,6 +118,23 @@  sat_u_add_imm_type_check##_##T##_fmt_2 (T x)                    \
 #define RUN_SAT_U_ADD_IMM_FMT_4(T, x, IMM, expect) \
   if (sat_u_add_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort ()
 
+#define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
+T __attribute__((noinline))                  \
+sat_s_add_##T##_fmt_1 (T x, T y)             \
+{                                            \
+  T sum = (UT)x + (UT)y;                     \
+  return (x ^ y) < 0                         \
+    ? sum                                    \
+    : (sum ^ x) >= 0                         \
+      ? sum                                  \
+      : x < 0 ? MIN : MAX;                   \
+}
+#define DEF_SAT_S_ADD_FMT_1_WRAP(T, UT, MIN, MAX) \
+  DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX)
+
+#define RUN_SAT_S_ADD_FMT_1(T, x, y) sat_s_add_##T##_fmt_1(x, y)
+#define RUN_SAT_S_ADD_FMT_1_WRAP(T, x, y) RUN_SAT_S_ADD_FMT_1(T, x, y)
+
 /******************************************************************************/
 /* Saturation Sub (Unsigned and Signed)                                       */
 /******************************************************************************/
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
index 52e4e2b5f9f..75037c5d806 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -10,9 +10,21 @@ 
       T2 from;                          \
     };
 
+#define TEST_BINARY_STRUCT_NAME(T, NAME) test_##T##_##NAME##_s
+#define TEST_BINARY_STRUCT_DECL(T, NAME) struct TEST_BINARY_STRUCT_NAME(T, NAME)
+#define TEST_BINARY_STRUCT(T, NAME)       \
+  struct TEST_BINARY_STRUCT_NAME(T, NAME) \
+    {                                     \
+      T a, b;                             \
+      T expect;                           \
+    };
+
 #define TEST_UNARY_DATA(T1, T2)      t_##T1##_##T2##_s
 #define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
 
+#define TEST_BINARY_DATA(T, NAME)      t_##T##_##NAME##_s
+#define TEST_BINARY_DATA_WRAP(T, NAME) TEST_BINARY_DATA(T, NAME)
+
 TEST_UNARY_STRUCT (uint8_t, uint16_t)
 TEST_UNARY_STRUCT (uint8_t, uint32_t)
 TEST_UNARY_STRUCT (uint8_t, uint64_t)
@@ -20,6 +32,11 @@  TEST_UNARY_STRUCT (uint16_t, uint32_t)
 TEST_UNARY_STRUCT (uint16_t, uint64_t)
 TEST_UNARY_STRUCT (uint32_t, uint64_t)
 
+TEST_BINARY_STRUCT (int8_t,  ssadd)
+TEST_BINARY_STRUCT (int16_t, ssadd)
+TEST_BINARY_STRUCT (int32_t, ssadd)
+TEST_BINARY_STRUCT (int64_t, ssadd)
+
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   TEST_UNARY_DATA(uint8_t, uint16_t)[] =
 {
@@ -104,4 +121,72 @@  TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
   {4294967295, 18446744073709551615u},
 };
 
+TEST_BINARY_STRUCT_DECL(int8_t, ssadd) TEST_BINARY_DATA(int8_t, ssadd)[] =
+{
+  {   0,    0,    0},
+  {   2,    2,    4},
+  { 126,    1,  127},
+  { 127,    1,  127},
+  { 127,  127,  127},
+  {  -7,   -4,  -11},
+  {-128,   -1, -128},
+  {-127,   -1, -128},
+  {-128, -128, -128},
+  {-128,  127,   -1},
+  {-127,  127,    0},
+  {-122,  105,  -17},
+  {-122,  125,    3},
+};
+
+TEST_BINARY_STRUCT_DECL(int16_t, ssadd) TEST_BINARY_DATA(int16_t, ssadd)[] =
+{
+  {     0,      0,      0},
+  {     2,      2,      4},
+  { 32766,      1,  32767},
+  { 32767,      1,  32767},
+  { 32767,  32767,  32767},
+  {    -7,     -4,    -11},
+  {-32768,     -1, -32768},
+  {-32767,     -1, -32768},
+  {-32768, -32768, -32768},
+  {-32768,  32767,     -1},
+  {-32767,  32767,      0},
+  {-32732,  32712,    -20},
+  {-32732,  32734,      2},
+};
+
+TEST_BINARY_STRUCT_DECL(int32_t, ssadd) TEST_BINARY_DATA(int32_t, ssadd)[] =
+{
+  {          0,           0,           0},
+  {          2,           2,           4},
+  { 2147483646,           1,  2147483647},
+  { 2147483647,           1,  2147483647},
+  { 2147483647,  2147483647,  2147483647},
+  {         -7,          -4,         -11},
+  {-2147483648,          -1, -2147483648},
+  {-2147483647,          -1, -2147483648},
+  {-2147483648, -2147483648, -2147483648},
+  {-2147483648,  2147483647,          -1},
+  {-2147483647,  2147483647,           0},
+  {-2147483613,  2147483601,         -12},
+  {-2147483613,  2147483637,          24},
+};
+
+TEST_BINARY_STRUCT_DECL(int64_t, ssadd) TEST_BINARY_DATA(int64_t, ssadd)[] =
+{
+  {                      0,                       0,                       0},
+  {                      2,                       2,                       4},
+  {  9223372036854775806ll,                       1,   9223372036854775807ll},
+  {  9223372036854775807ll,                       1,   9223372036854775807ll},
+  {  9223372036854775807ll,   9223372036854775807ll,   9223372036854775807ll},
+  {                     -7,                      -4,                     -11},
+  {-9223372036854775808ull,                      -1, -9223372036854775808ull},
+  { -9223372036854775807ll,                      -1, -9223372036854775808ull},
+  {-9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull},
+  {-9223372036854775808ull,   9223372036854775807ll,                      -1},
+  { -9223372036854775807ll,   9223372036854775807ll,                       0},
+  { -9223372036854775803ll,   9223372036854775800ll,                      -3},
+  { -9223372036854775803ll,   9223372036854775805ll,                       2},
+};
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-1.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-1.c
new file mode 100644
index 00000000000..f85675c1a05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-1.c
@@ -0,0 +1,30 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_add_int8_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slliw\s+a0,\s*a0,\s*24
+** sraiw\s+a0,\s*a0,\s*24
+** ret
+*/
+DEF_SAT_S_ADD_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-2.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-2.c
new file mode 100644
index 00000000000..6a2f8d8a11e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-2.c
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_add_int16_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*15
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** li\s+[atx][0-9]+,\s*32768
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slliw\s+a0,\s*a0,\s*16
+** sraiw\s+a0,\s*a0,\s*16
+** ret
+*/
+DEF_SAT_S_ADD_FMT_1(int16_t, uint16_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-3.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-3.c
new file mode 100644
index 00000000000..adfed8361d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-3.c
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_add_int32_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*31
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** li\s+[atx][0-9]+,\s*-2147483648
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext\.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_S_ADD_FMT_1(int32_t, uint32_t, INT32_MIN, INT32_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
new file mode 100644
index 00000000000..f85675c1a05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c
@@ -0,0 +1,30 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_s_add_int8_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*a1
+** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1
+** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63
+** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slliw\s+a0,\s*a0,\s*24
+** sraiw\s+a0,\s*a0,\s*24
+** ret
+*/
+DEF_SAT_S_ADD_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c
new file mode 100644
index 00000000000..9a4ce338d0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-1.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 int8_t
+#define T2 uint8_t
+
+DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT8_MIN, INT8_MAX)
+
+#define DATA             TEST_BINARY_DATA_WRAP(T1, ssadd)
+#define T                TEST_BINARY_STRUCT_DECL(T1, ssadd)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+
+#include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c
new file mode 100644
index 00000000000..34459b85e2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-2.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 int16_t
+#define T2 uint16_t
+
+DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT16_MIN, INT16_MAX)
+
+#define DATA             TEST_BINARY_DATA_WRAP(T1, ssadd)
+#define T                TEST_BINARY_STRUCT_DECL(T1, ssadd)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+
+#include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c
new file mode 100644
index 00000000000..4d4841f4066
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-3.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 int32_t
+#define T2 uint32_t
+
+DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT32_MIN, INT32_MAX)
+
+#define DATA             TEST_BINARY_DATA_WRAP(T1, ssadd)
+#define T                TEST_BINARY_STRUCT_DECL(T1, ssadd)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+
+#include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c
new file mode 100644
index 00000000000..df818879628
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-run-4.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 int64_t
+#define T2 uint64_t
+
+DEF_SAT_S_ADD_FMT_1_WRAP(T1, T2, INT64_MIN, INT64_MAX)
+
+#define DATA             TEST_BINARY_DATA_WRAP(T1, ssadd)
+#define T                TEST_BINARY_STRUCT_DECL(T1, ssadd)
+#define RUN_BINARY(x, y) RUN_SAT_S_ADD_FMT_1_WRAP(T1, x, y)
+
+#include "scalar_sat_binary_run_xxx.h"
diff --git a/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h b/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h
new file mode 100644
index 00000000000..7578453e944
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/scalar_sat_binary_run_xxx.h
@@ -0,0 +1,26 @@ 
+#ifndef HAVE_DEFINED_SCALAR_SAT_BINARY_RUN_XXX
+#define HAVE_DEFINED_SCALAR_SAT_BINARY_RUN_XXX
+
+#include <stdio.h>
+
+int
+main ()
+{
+  unsigned i;
+  T d;
+
+  for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++)
+    {
+      d = DATA[i];
+
+      if (RUN_BINARY (d.a, d.b) != d.expect)
+      {
+	printf ("%d + %d = %d, but %d\n", d.a, d.b, d.expect, RUN_BINARY (d.a, d.b));
+	__builtin_abort ();
+      }
+    }
+
+  return 0;
+}
+
+#endif

[v1] RISC-V: Support form 1 of integer scalar .SAT_ADD

Commit Message

Comments

Patch