diff mbox series

[v2] RISC-V: Implement the .SAT_TRUNC for scalar

Message ID 20240702063304.3200322-1-pan2.li@intel.com
State New
Headers show
Series [v2] RISC-V: Implement the .SAT_TRUNC for scalar | expand

Commit Message

Li, Pan2 July 2, 2024, 6:33 a.m. UTC
From: Pan Li <pan2.li@intel.com>

Update in v2:
Rebase the upstream.

Log in v1:
This patch would like to implement the simple .SAT_TRUNC pattern
in the riscv backend. Aka:

Form 1:
  #define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
  NT __attribute__((noinline))             \
  sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
  {                                        \
    bool overflow = x > (WT)(NT)(-1);      \
    return ((NT)x) | (NT)-overflow;        \
  }

DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  _Bool overflow;
  unsigned char _1;
  unsigned char _2;
  unsigned char _3;
  uint8_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  overflow_5 = x_4(D) > 255;
  _1 = (unsigned char) x_4(D);
  _2 = (unsigned char) overflow_5;
  _3 = -_2;
  _6 = _1 | _3;
  return _6;
;;    succ:       EXIT

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  uint8_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
;;    succ:       EXIT

}

The below tests suites are passed for this patch
1. The rv64gcv fully regression test.
2. The rv64gcv build with glibc

gcc/ChangeLog:

	* config/riscv/iterators.md (TARGET_64BIT): Add new iterator
	and related attr(s).
	* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
	func decl for expanding ustrunc
	* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
	impl to expand ustrunc.
	* config/riscv/riscv.md (ustrunc<mode><anyi_narrowed>2): Add
	new pattern ustrunc<m><n>2.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/sat_arith.h: Add test helper macro.
	* gcc.target/riscv/sat_arith_data.h: New test.
	* gcc.target/riscv/sat_u_trunc-1.c: New test.
	* gcc.target/riscv/sat_u_trunc-2.c: New test.
	* gcc.target/riscv/sat_u_trunc-3.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
	* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
	* gcc.target/riscv/scalar_sat_unary.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/iterators.md                 | 10 ++++
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv.cc                     | 40 +++++++++++++
 gcc/config/riscv/riscv.md                     | 10 ++++
 gcc/testsuite/gcc.target/riscv/sat_arith.h    | 16 ++++++
 .../gcc.target/riscv/sat_arith_data.h         | 56 +++++++++++++++++++
 .../gcc.target/riscv/sat_u_trunc-1.c          | 17 ++++++
 .../gcc.target/riscv/sat_u_trunc-2.c          | 20 +++++++
 .../gcc.target/riscv/sat_u_trunc-3.c          | 19 +++++++
 .../gcc.target/riscv/sat_u_trunc-run-1.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-2.c      | 16 ++++++
 .../gcc.target/riscv/sat_u_trunc-run-3.c      | 16 ++++++
 .../gcc.target/riscv/scalar_sat_unary.h       | 22 ++++++++
 13 files changed, 259 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_arith_data.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h

Comments

Jeff Law July 3, 2024, 12:30 a.m. UTC | #1
On 7/2/24 12:33 AM, pan2.li@intel.com wrote:

> 
> The below tests suites are passed for this patch
> 1. The rv64gcv fully regression test.
> 2. The rv64gcv build with glibc
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/iterators.md (TARGET_64BIT): Add new iterator
> 	and related attr(s).
Rather than reference TARGET_64BIT, you should reference the new 
iterators names.

> 	* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
> 	func decl for expanding ustrunc
> 	* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
> 	impl to expand ustrunc.
> 	* config/riscv/riscv.md (ustrunc<mode><anyi_narrowed>2): Add
> 	new pattern ustrunc<m><n>2.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/sat_arith.h: Add test helper macro.
> 	* gcc.target/riscv/sat_arith_data.h: New test.
> 	* gcc.target/riscv/sat_u_trunc-1.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-2.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-3.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
> 	* gcc.target/riscv/scalar_sat_unary.h: New test.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---

>   
> +/* Implement the unsigned saturation truncation for int mode.
> +
> +   b = SAT_TRUNC (a);
> +   =>
> +   1. max = half truncated max
> +   2. lt = a < max
> +   3. lt = lt - 1 (lt 0, ge -1)
> +   4. d = a | lt
> +   5. b = (trunc)d  */
> +
> +void
> +riscv_expand_ustrunc (rtx dest, rtx src)
> +{
> +  machine_mode omode = GET_MODE (dest);
> +  rtx pmode_max = gen_reg_rtx (Pmode);
> +  unsigned precision = GET_MODE_PRECISION (omode).to_constant ();
> +
> +  gcc_assert (precision < 64);
> +
> +  uint64_t max = ((uint64_t)1u << precision) - 1u;
> +  rtx pmode_src = gen_lowpart (Pmode, src);
> +  rtx pmode_dest = gen_reg_rtx (Pmode);
> +  rtx pmode_lt = gen_reg_rtx (Pmode);
> +
> +  /* Step-1: max = half truncated max  */
> +  emit_move_insn (pmode_max, GEN_INT (max));
> +
> +  /* Step-2: lt = src < max  */
> +  riscv_emit_binary (LTU, pmode_lt, pmode_src, pmode_max);
> +
> +  /* Step-3: lt = lt - 1  */
> +  riscv_emit_binary (PLUS, pmode_lt, pmode_lt, CONSTM1_RTX (Pmode));
> +
> +  /* Step-4: pmode_dest = lt | src  */
> +  riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_src);
> +
> +  /* Step-5: dest = pmode_dest  */
> +  emit_move_insn (dest, gen_lowpart (omode, pmode_dest));
> +}
You probably want gen_int_mode rather than GEN_INT.

Why are you using Pmode?  Pmode is for pointers.  This stuff looks like 
basic integer ops, so I don't see why Pmode is appropriate.


jeff
Li, Pan2 July 3, 2024, 1:16 a.m. UTC | #2
Thanks Jeff for comments.

> Rather than reference TARGET_64BIT, you should reference the new 
> iterators names.	

Got it, generated need some manual adjustment.

> You probably want gen_int_mode rather than GEN_INT.

Sure.

> Why are you using Pmode?  Pmode is for pointers.  This stuff looks like 
> basic integer ops, so I don't see why Pmode is appropriate.

The incoming operand may be HI/QI/SImode, so we need to prompt the mode.
So there we should take Xmode? Will update in v2.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Wednesday, July 3, 2024 8:30 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v2] RISC-V: Implement the .SAT_TRUNC for scalar



On 7/2/24 12:33 AM, pan2.li@intel.com wrote:

> 
> The below tests suites are passed for this patch
> 1. The rv64gcv fully regression test.
> 2. The rv64gcv build with glibc
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/iterators.md (TARGET_64BIT): Add new iterator
> 	and related attr(s).
Rather than reference TARGET_64BIT, you should reference the new 
iterators names.

> 	* config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new
> 	func decl for expanding ustrunc
> 	* config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func
> 	impl to expand ustrunc.
> 	* config/riscv/riscv.md (ustrunc<mode><anyi_narrowed>2): Add
> 	new pattern ustrunc<m><n>2.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/sat_arith.h: Add test helper macro.
> 	* gcc.target/riscv/sat_arith_data.h: New test.
> 	* gcc.target/riscv/sat_u_trunc-1.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-2.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-3.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-1.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-2.c: New test.
> 	* gcc.target/riscv/sat_u_trunc-run-3.c: New test.
> 	* gcc.target/riscv/scalar_sat_unary.h: New test.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---

>   
> +/* Implement the unsigned saturation truncation for int mode.
> +
> +   b = SAT_TRUNC (a);
> +   =>
> +   1. max = half truncated max
> +   2. lt = a < max
> +   3. lt = lt - 1 (lt 0, ge -1)
> +   4. d = a | lt
> +   5. b = (trunc)d  */
> +
> +void
> +riscv_expand_ustrunc (rtx dest, rtx src)
> +{
> +  machine_mode omode = GET_MODE (dest);
> +  rtx pmode_max = gen_reg_rtx (Pmode);
> +  unsigned precision = GET_MODE_PRECISION (omode).to_constant ();
> +
> +  gcc_assert (precision < 64);
> +
> +  uint64_t max = ((uint64_t)1u << precision) - 1u;
> +  rtx pmode_src = gen_lowpart (Pmode, src);
> +  rtx pmode_dest = gen_reg_rtx (Pmode);
> +  rtx pmode_lt = gen_reg_rtx (Pmode);
> +
> +  /* Step-1: max = half truncated max  */
> +  emit_move_insn (pmode_max, GEN_INT (max));
> +
> +  /* Step-2: lt = src < max  */
> +  riscv_emit_binary (LTU, pmode_lt, pmode_src, pmode_max);
> +
> +  /* Step-3: lt = lt - 1  */
> +  riscv_emit_binary (PLUS, pmode_lt, pmode_lt, CONSTM1_RTX (Pmode));
> +
> +  /* Step-4: pmode_dest = lt | src  */
> +  riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_src);
> +
> +  /* Step-5: dest = pmode_dest  */
> +  emit_move_insn (dest, gen_lowpart (omode, pmode_dest));
> +}
You probably want gen_int_mode rather than GEN_INT.

Why are you using Pmode?  Pmode is for pointers.  This stuff looks like 
basic integer ops, so I don't see why Pmode is appropriate.


jeff
Jeff Law July 3, 2024, 3:14 p.m. UTC | #3
On 7/2/24 7:16 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> Why are you using Pmode?  Pmode is for pointers.  This stuff looks like
>> basic integer ops, so I don't see why Pmode is appropriate.
> 
> The incoming operand may be HI/QI/SImode, so we need to prompt the mode.
> So there we should take Xmode? Will update in v2.
I would expect that QI/HI shouldn't be happening in practice due to the 
definition of WORD_REGISTER_OPERATIONS.

For rv32 I would expect to just see SI.  For rv64 we're likely to see 
both SI and DI and I would expect that you can just use GET_MODE (src) 
to get that input mode -- unless the input is a constant.

Note that since you're ultimately generating an IOR, if you've got an SI 
input on rv64, then you're going to need to either extend the input or 
wrap it in a suitable widening subreg.

If we allow constants, then we probably need further adjustments.


Jeff
Li, Pan2 July 4, 2024, 12:48 a.m. UTC | #4
Thanks Jeff for comments.

> I would expect that QI/HI shouldn't be happening in practice due to the 
> definition of WORD_REGISTER_OPERATIONS.

Sorry I don't get the point here, I suppose there may be 6 kinds of truncation for scalar.

uint64_t => uint32_t	
uint64_t => uint16_t
uint64_t => uint8_t
uint32_t => uint16_t
uint32_t => uint8_t
uint16_t => uint8_t

Take uint16_t to uint8_t as example:

uint8_t   test (uint16_t x) 
{                                        
  bool overflow = x > (uint16_t)(uint8_t)(-1);     
  return ((uint8_t)x) | (uint8_t)-overflow;       
}

Will be expand to:

uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
{
  uint8_t _6;
  _6 = .SAT_TRUNC (x_4(D)); [tail call]
  return _6;
}

Then we will have HImode as src and the QImode as the dest when enter riscv_expand_ustrunc.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Wednesday, July 3, 2024 11:14 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v2] RISC-V: Implement the .SAT_TRUNC for scalar



On 7/2/24 7:16 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> Why are you using Pmode?  Pmode is for pointers.  This stuff looks like
>> basic integer ops, so I don't see why Pmode is appropriate.
> 
> The incoming operand may be HI/QI/SImode, so we need to prompt the mode.
> So there we should take Xmode? Will update in v2.
I would expect that QI/HI shouldn't be happening in practice due to the 
definition of WORD_REGISTER_OPERATIONS.

For rv32 I would expect to just see SI.  For rv64 we're likely to see 
both SI and DI and I would expect that you can just use GET_MODE (src) 
to get that input mode -- unless the input is a constant.

Note that since you're ultimately generating an IOR, if you've got an SI 
input on rv64, then you're going to need to either extend the input or 
wrap it in a suitable widening subreg.

If we allow constants, then we probably need further adjustments.


Jeff
Jeff Law July 4, 2024, 1:52 a.m. UTC | #5
On 7/3/24 6:48 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> I would expect that QI/HI shouldn't be happening in practice due to the
>> definition of WORD_REGISTER_OPERATIONS.
> 
> Sorry I don't get the point here, I suppose there may be 6 kinds of truncation for scalar.
> 
> uint64_t => uint32_t	
> uint64_t => uint16_t
> uint64_t => uint8_t
> uint32_t => uint16_t
> uint32_t => uint8_t
> uint16_t => uint8_t
> 
> Take uint16_t to uint8_t as example:
> 
> uint8_t   test (uint16_t x)
> {
>    bool overflow = x > (uint16_t)(uint8_t)(-1);
>    return ((uint8_t)x) | (uint8_t)-overflow;
> }
> 
> Will be expand to:
> 
> uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
> {
>    uint8_t _6;
>    _6 = .SAT_TRUNC (x_4(D)); [tail call]
>    return _6;
> }
> 
> Then we will have HImode as src and the QImode as the dest when enter riscv_expand_ustrunc.
But if you look at what the hardware can actually support, it doesn't 
have HImode or QImode operations other than load/store and for rv64 
there are no SImode logicals.

That's what WORD_REGISTER_OPERATIONS is designed to support.  Regardless 
of what happens at the source level, the generic parts of gimple->RTL 
expansion arrange to widen the types appropriately.

I haven't looked at the expansion of the SAT_* builtins, but the way 
this is generally supposed to work is you just have to have your 
expander only accept the modes the processor actually supports and 
generic code will handle the widening for you.


Jeff
Li, Pan2 July 4, 2024, 2:07 a.m. UTC | #6
> But if you look at what the hardware can actually support, it doesn't 
> have HImode or QImode operations other than load/store and for rv64 
> there are no SImode logicals.

> That's what WORD_REGISTER_OPERATIONS is designed to support.  Regardless 
> of what happens at the source level, the generic parts of gimple->RTL 
> expansion arrange to widen the types appropriately.

> I haven't looked at the expansion of the SAT_* builtins, but the way 
> this is generally supposed to work is you just have to have your 
> expander only accept the modes the processor actually supports and 
> generic code will handle the widening for you.

Thanks Jeff.
Got it, you mean the widening ops will be covered automatically before expanding.
I am not sure which part take care auto-widening, but the SAT_TRUNC expands from middle end may look
like below.

Could you please help to enlighten me is there something missing here ?

static void
match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
{
  tree ops[1];
  tree lhs = gimple_assign_lhs (stmt);
  tree type = TREE_TYPE (lhs);

  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
				       tree_pair (type, TREE_TYPE (ops[0])),
				       OPTIMIZE_FOR_BOTH))
    {
      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
      gimple_call_set_lhs (call, lhs);
      gsi_replace (gsi, call, /* update_eh_info */ true);
    }
}

Pan


-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Thursday, July 4, 2024 9:52 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v2] RISC-V: Implement the .SAT_TRUNC for scalar



On 7/3/24 6:48 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> I would expect that QI/HI shouldn't be happening in practice due to the
>> definition of WORD_REGISTER_OPERATIONS.
> 
> Sorry I don't get the point here, I suppose there may be 6 kinds of truncation for scalar.
> 
> uint64_t => uint32_t	
> uint64_t => uint16_t
> uint64_t => uint8_t
> uint32_t => uint16_t
> uint32_t => uint8_t
> uint16_t => uint8_t
> 
> Take uint16_t to uint8_t as example:
> 
> uint8_t   test (uint16_t x)
> {
>    bool overflow = x > (uint16_t)(uint8_t)(-1);
>    return ((uint8_t)x) | (uint8_t)-overflow;
> }
> 
> Will be expand to:
> 
> uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
> {
>    uint8_t _6;
>    _6 = .SAT_TRUNC (x_4(D)); [tail call]
>    return _6;
> }
> 
> Then we will have HImode as src and the QImode as the dest when enter riscv_expand_ustrunc.
But if you look at what the hardware can actually support, it doesn't 
have HImode or QImode operations other than load/store and for rv64 
there are no SImode logicals.

That's what WORD_REGISTER_OPERATIONS is designed to support.  Regardless 
of what happens at the source level, the generic parts of gimple->RTL 
expansion arrange to widen the types appropriately.

I haven't looked at the expansion of the SAT_* builtins, but the way 
this is generally supposed to work is you just have to have your 
expander only accept the modes the processor actually supports and 
generic code will handle the widening for you.


Jeff
Li, Pan2 July 5, 2024, 2:35 a.m. UTC | #7
Hi Jeff,

I have a try to only allow SI/DI mode in the iterator of the ustrunc<m><n>2 pattern in the backend.
But it will get false when the middle-end try to tell direct_internal_fn_supported_p for HImode, and
finally of course failed to detect the .SAT_TRUNC.

Indeed most patterns of riscv.md only takes GPR instead of ANYI, and I am not sure if we need to adjust
the middle-end for the fn_supported check (failed to find similar case from tree-ssa-math-opts.cc).

Additionally, we may need to improve the usadd/ussub for almost the same scenarios.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Thursday, July 4, 2024 10:07 AM
To: Jeff Law <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: RE: [PATCH v2] RISC-V: Implement the .SAT_TRUNC for scalar

> But if you look at what the hardware can actually support, it doesn't 
> have HImode or QImode operations other than load/store and for rv64 
> there are no SImode logicals.

> That's what WORD_REGISTER_OPERATIONS is designed to support.  Regardless 
> of what happens at the source level, the generic parts of gimple->RTL 
> expansion arrange to widen the types appropriately.

> I haven't looked at the expansion of the SAT_* builtins, but the way 
> this is generally supposed to work is you just have to have your 
> expander only accept the modes the processor actually supports and 
> generic code will handle the widening for you.

Thanks Jeff.
Got it, you mean the widening ops will be covered automatically before expanding.
I am not sure which part take care auto-widening, but the SAT_TRUNC expands from middle end may look
like below.

Could you please help to enlighten me is there something missing here ?

static void
match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
{
  tree ops[1];
  tree lhs = gimple_assign_lhs (stmt);
  tree type = TREE_TYPE (lhs);

  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
    && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
				       tree_pair (type, TREE_TYPE (ops[0])),
				       OPTIMIZE_FOR_BOTH))
    {
      gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
      gimple_call_set_lhs (call, lhs);
      gsi_replace (gsi, call, /* update_eh_info */ true);
    }
}

Pan


-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Thursday, July 4, 2024 9:52 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v2] RISC-V: Implement the .SAT_TRUNC for scalar



On 7/3/24 6:48 PM, Li, Pan2 wrote:
> Thanks Jeff for comments.
> 
>> I would expect that QI/HI shouldn't be happening in practice due to the
>> definition of WORD_REGISTER_OPERATIONS.
> 
> Sorry I don't get the point here, I suppose there may be 6 kinds of truncation for scalar.
> 
> uint64_t => uint32_t	
> uint64_t => uint16_t
> uint64_t => uint8_t
> uint32_t => uint16_t
> uint32_t => uint8_t
> uint16_t => uint8_t
> 
> Take uint16_t to uint8_t as example:
> 
> uint8_t   test (uint16_t x)
> {
>    bool overflow = x > (uint16_t)(uint8_t)(-1);
>    return ((uint8_t)x) | (uint8_t)-overflow;
> }
> 
> Will be expand to:
> 
> uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x)
> {
>    uint8_t _6;
>    _6 = .SAT_TRUNC (x_4(D)); [tail call]
>    return _6;
> }
> 
> Then we will have HImode as src and the QImode as the dest when enter riscv_expand_ustrunc.
But if you look at what the hardware can actually support, it doesn't 
have HImode or QImode operations other than load/store and for rv64 
there are no SImode logicals.

That's what WORD_REGISTER_OPERATIONS is designed to support.  Regardless 
of what happens at the source level, the generic parts of gimple->RTL 
expansion arrange to widen the types appropriately.

I haven't looked at the expansion of the SAT_* builtins, but the way 
this is generally supposed to work is you just have to have your 
expander only accept the modes the processor actually supports and 
generic code will handle the widening for you.


Jeff
Jeff Law July 8, 2024, 8:44 p.m. UTC | #8
On 7/3/24 8:07 PM, Li, Pan2 wrote:
>> But if you look at what the hardware can actually support, it doesn't
>> have HImode or QImode operations other than load/store and for rv64
>> there are no SImode logicals.
> 
>> That's what WORD_REGISTER_OPERATIONS is designed to support.  Regardless
>> of what happens at the source level, the generic parts of gimple->RTL
>> expansion arrange to widen the types appropriately.
> 
>> I haven't looked at the expansion of the SAT_* builtins, but the way
>> this is generally supposed to work is you just have to have your
>> expander only accept the modes the processor actually supports and
>> generic code will handle the widening for you.
> 
> Thanks Jeff.
> Got it, you mean the widening ops will be covered automatically before expanding.
That's what happens for most basic operations.  The caveat here is 
you're not going to be going through the standard expander paths.  It's 
one of the unfortunate downsides of builtins/intrinsics, so we may need 
to do something a bit special as a result.



> I am not sure which part take care auto-widening, but the SAT_TRUNC expands from middle end may look
> like below.
> 
> Could you please help to enlighten me is there something missing here ?
> 
> static void
> match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
> {
>    tree ops[1];
>    tree lhs = gimple_assign_lhs (stmt);
>    tree type = TREE_TYPE (lhs);
> 
>    if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
>      && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
> 				       tree_pair (type, TREE_TYPE (ops[0])),
> 				       OPTIMIZE_FOR_BOTH))
>      {
>        gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
>        gimple_call_set_lhs (call, lhs);
>        gsi_replace (gsi, call, /* update_eh_info */ true);
>      }
> }
Right.  That's what I was worried about.  It essentially just asks the 
backend "do you have this operation with this set of modes" without any 
of the widening bits like you'd find in expand_binop.

We ran into this with the CRC work as well.

What this means is is that you'll have to do the widening operations in 
the RISC-V expander.  Essentially for an argument smaller than word_mode 
you'd want to widen to word_mode while retaining the right semantics.

jeff
Li, Pan2 July 9, 2024, 1:24 a.m. UTC | #9
> What this means is is that you'll have to do the widening operations in 
> the RISC-V expander.  Essentially for an argument smaller than word_mode 
> you'd want to widen to word_mode while retaining the right semantics.

Got it, I think it is the most different part between the scalar and vector expander.
Back to this patch, looks for now we can only leverage gen_lowpart to perform
the widen for the right stmt.

If so and my understanding is correct, I will rebase this patch in v3.

Pan


-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Tuesday, July 9, 2024 4:44 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v2] RISC-V: Implement the .SAT_TRUNC for scalar



On 7/3/24 8:07 PM, Li, Pan2 wrote:
>> But if you look at what the hardware can actually support, it doesn't
>> have HImode or QImode operations other than load/store and for rv64
>> there are no SImode logicals.
> 
>> That's what WORD_REGISTER_OPERATIONS is designed to support.  Regardless
>> of what happens at the source level, the generic parts of gimple->RTL
>> expansion arrange to widen the types appropriately.
> 
>> I haven't looked at the expansion of the SAT_* builtins, but the way
>> this is generally supposed to work is you just have to have your
>> expander only accept the modes the processor actually supports and
>> generic code will handle the widening for you.
> 
> Thanks Jeff.
> Got it, you mean the widening ops will be covered automatically before expanding.
That's what happens for most basic operations.  The caveat here is 
you're not going to be going through the standard expander paths.  It's 
one of the unfortunate downsides of builtins/intrinsics, so we may need 
to do something a bit special as a result.



> I am not sure which part take care auto-widening, but the SAT_TRUNC expands from middle end may look
> like below.
> 
> Could you please help to enlighten me is there something missing here ?
> 
> static void
> match_unsigned_saturation_trunc (gimple_stmt_iterator *gsi, gassign *stmt)
> {
>    tree ops[1];
>    tree lhs = gimple_assign_lhs (stmt);
>    tree type = TREE_TYPE (lhs);
> 
>    if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
>      && direct_internal_fn_supported_p (IFN_SAT_TRUNC,
> 				       tree_pair (type, TREE_TYPE (ops[0])),
> 				       OPTIMIZE_FOR_BOTH))
>      {
>        gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[0]);
>        gimple_call_set_lhs (call, lhs);
>        gsi_replace (gsi, call, /* update_eh_info */ true);
>      }
> }
Right.  That's what I was worried about.  It essentially just asks the 
backend "do you have this operation with this set of modes" without any 
of the widening bits like you'd find in expand_binop.

We ran into this with the CRC work as well.

What this means is is that you'll have to do the widening operations in 
the RISC-V expander.  Essentially for an argument smaller than word_mode 
you'd want to widen to word_mode while retaining the right semantics.

jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 20745faa55e..5e2216fdafb 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -65,6 +65,16 @@  (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
 ;; Iterator for hardware-supported integer modes.
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
+(define_mode_iterator ANYI_NARROW [HI SI (DI "TARGET_64BIT")])
+
+(define_mode_attr ANYI_NARROWED [
+  (HI "QI") (SI "HI") (DI "SI")
+])
+
+(define_mode_attr anyi_narrowed [
+  (HI "qi") (SI "hi") (DI "si")
+])
+
 ;; Iterator for hardware-supported floating-point modes.
 (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
 			    (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a8b76173fa0..61a22a187df 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -135,6 +135,7 @@  riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_ustrunc (rtx, rtx);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9bba5da016e..12248d0add9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11649,6 +11649,46 @@  riscv_expand_ussub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, pmode_dest));
 }
 
+/* Implement the unsigned saturation truncation for int mode.
+
+   b = SAT_TRUNC (a);
+   =>
+   1. max = half truncated max
+   2. lt = a < max
+   3. lt = lt - 1 (lt 0, ge -1)
+   4. d = a | lt
+   5. b = (trunc)d  */
+
+void
+riscv_expand_ustrunc (rtx dest, rtx src)
+{
+  machine_mode omode = GET_MODE (dest);
+  rtx pmode_max = gen_reg_rtx (Pmode);
+  unsigned precision = GET_MODE_PRECISION (omode).to_constant ();
+
+  gcc_assert (precision < 64);
+
+  uint64_t max = ((uint64_t)1u << precision) - 1u;
+  rtx pmode_src = gen_lowpart (Pmode, src);
+  rtx pmode_dest = gen_reg_rtx (Pmode);
+  rtx pmode_lt = gen_reg_rtx (Pmode);
+
+  /* Step-1: max = half truncated max  */
+  emit_move_insn (pmode_max, GEN_INT (max));
+
+  /* Step-2: lt = src < max  */
+  riscv_emit_binary (LTU, pmode_lt, pmode_src, pmode_max);
+
+  /* Step-3: lt = lt - 1  */
+  riscv_emit_binary (PLUS, pmode_lt, pmode_lt, CONSTM1_RTX (Pmode));
+
+  /* Step-4: pmode_dest = lt | src  */
+  riscv_emit_binary (IOR, pmode_dest, pmode_lt, pmode_src);
+
+  /* Step-5: dest = pmode_dest  */
+  emit_move_insn (dest, gen_lowpart (omode, pmode_dest));
+}
+
 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode for
    TI_LONG_DOUBLE_TYPE which is for long double type, go with the
    default one for the others.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index c0c960353eb..83fc54fb630 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4271,6 +4271,16 @@  (define_expand "ussub<mode>3"
   }
 )
 
+(define_expand "ustrunc<mode><anyi_narrowed>2"
+  [(match_operand:<ANYI_NARROWED> 0 "register_operand")
+   (match_operand:ANYI_NARROW     1 "register_operand")]
+  ""
+  {
+    riscv_expand_ustrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h
index 75442c94dc1..37e0a60f21b 100644
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@@ -214,4 +214,20 @@  sat_u_sub_##T##_fmt_12 (T x, T y)                      \
 #define RUN_SAT_U_SUB_FMT_11(T, x, y) sat_u_sub_##T##_fmt_11(x, y)
 #define RUN_SAT_U_SUB_FMT_12(T, x, y) sat_u_sub_##T##_fmt_12(x, y)
 
+/******************************************************************************/
+/* Saturation Truncate (unsigned and signed)                                  */
+/******************************************************************************/
+
+#define DEF_SAT_U_TRUC_FMT_1(NT, WT)     \
+NT __attribute__((noinline))             \
+sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \
+{                                        \
+  bool overflow = x > (WT)(NT)(-1);      \
+  return ((NT)x) | (NT)-overflow;        \
+}
+#define DEF_SAT_U_TRUC_FMT_1_WRAP(NT, WT) DEF_SAT_U_TRUC_FMT_1(NT, WT)
+
+#define RUN_SAT_U_TRUC_FMT_1(NT, WT, x) sat_u_truc_##WT##_to_##NT##_fmt_1 (x)
+#define RUN_SAT_U_TRUC_FMT_1_WRAP(NT, WT, x) RUN_SAT_U_TRUC_FMT_1(NT, WT, x)
+
 #endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith_data.h b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
new file mode 100644
index 00000000000..b991f8aa955
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith_data.h
@@ -0,0 +1,56 @@ 
+#ifndef HAVE_DEFINED_SAT_ARITH_DATA_H
+#define HAVE_DEFINED_SAT_ARITH_DATA_H
+
+#define TEST_UNARY_STRUCT_NAME(T1, T2) test_##T1##_##T2##_s
+#define TEST_UNARY_STRUCT_DECL(T1, T2) struct TEST_UNARY_STRUCT_NAME(T1, T2)
+#define TEST_UNARY_STRUCT(T1, T2)       \
+  struct TEST_UNARY_STRUCT_NAME(T1, T2) \
+    {                                   \
+      T1 to;                            \
+      T2 from;                          \
+    };
+
+#define TEST_UNARY_DATA(T1, T2)      t_##T1##_##T2##_s
+#define TEST_UNARY_DATA_WRAP(T1, T2) TEST_UNARY_DATA(T1, T2)
+
+TEST_UNARY_STRUCT (uint8_t, uint16_t)
+TEST_UNARY_STRUCT (uint16_t, uint32_t)
+TEST_UNARY_STRUCT (uint32_t, uint64_t)
+
+TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
+  TEST_UNARY_DATA(uint8_t, uint16_t)[] =
+{
+  {  0,     0},
+  {  2,     2},
+  {254,   254},
+  {255,   255},
+  {255,   256},
+  {255, 65534},
+  {255, 65535},
+};
+
+TEST_UNARY_STRUCT_DECL(uint16_t, uint32_t) \
+  TEST_UNARY_DATA(uint16_t, uint32_t)[] =
+{
+  {    0,          0},
+  {    5,          5},
+  {65534,      65534},
+  {65535,      65535},
+  {65535,      65536},
+  {65535, 4294967294},
+  {65535, 4294967295},
+};
+
+TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
+  TEST_UNARY_DATA(uint32_t, uint64_t)[] =
+{
+  {    0,                          0},
+  {    9,                          9},
+  {4294967294,            4294967294},
+  {4294967295,            4294967295},
+  {4294967295,            4294967296},
+  {4294967295, 18446744073709551614u},
+  {4294967295, 18446744073709551615u},
+};
+
+#endif
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
new file mode 100644
index 00000000000..354831005b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint16_t_to_uint8_t_fmt_1:
+** sltiu\s+[atx][0-9]+,\s*a0,\s*255
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint8_t, uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
new file mode 100644
index 00000000000..0001d8a9ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint32_t_to_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint16_t, uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
new file mode 100644
index 00000000000..4359935a9d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_truc_uint64_t_to_uint32_t_fmt_1:
+** li\s+[atx][0-9]+,\s*-1
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
+** sltu\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
new file mode 100644
index 00000000000..39a5ce2b675
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint8_t
+#define T2 uint16_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
new file mode 100644
index 00000000000..b98114a7dfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint16_t
+#define T2 uint32_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
new file mode 100644
index 00000000000..8a92a8c1f55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c
@@ -0,0 +1,16 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+#include "sat_arith_data.h"
+
+#define T1 uint32_t
+#define T2 uint64_t
+
+DEF_SAT_U_TRUC_FMT_1_WRAP(T1, T2)
+
+#define DATA           TEST_UNARY_DATA_WRAP(T1, T2)
+#define T              TEST_UNARY_STRUCT_DECL(T1, T2)
+#define RUN_UNARY(x)   RUN_SAT_U_TRUC_FMT_1_WRAP(T1, T2, x)
+
+#include "scalar_sat_unary.h"
diff --git a/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h b/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h
new file mode 100644
index 00000000000..2ae058724b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h
@@ -0,0 +1,22 @@ 
+#ifndef HAVE_DEFINED_SCALAR_SAT_UNARY
+#define HAVE_DEFINED_SCALAR_SAT_UNARY
+
+int
+main ()
+{
+  unsigned i;
+  T d;
+
+  for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++)
+    {
+      d = DATA[i];
+
+      if (RUN_UNARY (d.from) != d.to)
+	__builtin_abort ();
+    }
+
+  return 0;
+}
+
+#endif
+