diff mbox series

[v1] Match: Support form 1 for scalar signed integer .SAT_ADD

Message ID 20240805071406.4089749-1-pan2.li@intel.com
State New
Headers show
Series [v1] Match: Support form 1 for scalar signed integer .SAT_ADD | expand

Commit Message

Li, Pan2 Aug. 5, 2024, 7:14 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T)             \
  T __attribute__((noinline))                \
  sat_s_add_##T##_fmt_1 (T x, T y)           \
  {                                          \
    T min = (T)1u << (sizeof (T) * 8 - 1);   \
    T max = min - 1;                         \
    return (x ^ y) < 0                       \
      ? (T)(x + y)                           \
      : ((T)(x + y) ^ x) >= 0                \
        ? (T)(x + y)                         \
        : x < 0 ? min : max;                 \
  }

DEF_SAT_S_ADD_FMT_1 (int64_t)

We can tell the difference before and after this patch if backend
implemented the ssadd<m>3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   long int _1;
   8   │   long int _2;
   9   │   long int _3;
  10   │   int64_t _4;
  11   │   long int _7;
  12   │   _Bool _9;
  13   │   long int _10;
  14   │   long int _11;
  15   │   long int _12;
  16   │   long int _13;
  17   │
  18   │ ;;   basic block 2, loop depth 0
  19   │ ;;    pred:       ENTRY
  20   │   _1 = x_5(D) ^ y_6(D);
  21   │   _13 = x_5(D) + y_6(D);
  22   │   _3 = x_5(D) ^ _13;
  23   │   _2 = ~_1;
  24   │   _7 = _2 & _3;
  25   │   if (_7 >= 0)
  26   │     goto <bb 4>; [59.00%]
  27   │   else
  28   │     goto <bb 3>; [41.00%]
  29   │ ;;    succ:       4
  30   │ ;;                3
  31   │
  32   │ ;;   basic block 3, loop depth 0
  33   │ ;;    pred:       2
  34   │   _9 = x_5(D) < 0;
  35   │   _10 = (long int) _9;
  36   │   _11 = -_10;
  37   │   _12 = _11 ^ 9223372036854775807;
  38   │ ;;    succ:       4
  39   │
  40   │ ;;   basic block 4, loop depth 0
  41   │ ;;    pred:       2
  42   │ ;;                3
  43   │   # _4 = PHI <_13(2), _12(3)>
  44   │   return _4;
  45   │ ;;    succ:       EXIT
  46   │
  47   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t _4;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;    pred:       ENTRY
  11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  12   │   return _4;
  13   │ ;;    succ:       EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

	* match.pd: Add the matching for signed .SAT_ADD.
	* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
	matching func decl.
	(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
	to ...
	(match_saturation_add): ... here.
	(math_opts_dom_walker::after_dom_children): Update the above renamed
	func from caller.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd              | 14 +++++++++++++
 gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 51 insertions(+), 5 deletions(-)

Comments

Richard Biener Aug. 5, 2024, 11:16 a.m. UTC | #1
On Mon, Aug 5, 2024 at 9:14 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD.  Aka below example:
>
> Form 1:
>   #define DEF_SAT_S_ADD_FMT_1(T)             \
>   T __attribute__((noinline))                \
>   sat_s_add_##T##_fmt_1 (T x, T y)           \
>   {                                          \
>     T min = (T)1u << (sizeof (T) * 8 - 1);   \
>     T max = min - 1;                         \
>     return (x ^ y) < 0                       \
>       ? (T)(x + y)                           \
>       : ((T)(x + y) ^ x) >= 0                \
>         ? (T)(x + y)                         \
>         : x < 0 ? min : max;                 \
>   }
>
> DEF_SAT_S_ADD_FMT_1 (int64_t)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   long int _1;
>    8   │   long int _2;
>    9   │   long int _3;
>   10   │   int64_t _4;
>   11   │   long int _7;
>   12   │   _Bool _9;
>   13   │   long int _10;
>   14   │   long int _11;
>   15   │   long int _12;
>   16   │   long int _13;
>   17   │
>   18   │ ;;   basic block 2, loop depth 0
>   19   │ ;;    pred:       ENTRY
>   20   │   _1 = x_5(D) ^ y_6(D);
>   21   │   _13 = x_5(D) + y_6(D);
>   22   │   _3 = x_5(D) ^ _13;
>   23   │   _2 = ~_1;
>   24   │   _7 = _2 & _3;
>   25   │   if (_7 >= 0)
>   26   │     goto <bb 4>; [59.00%]
>   27   │   else
>   28   │     goto <bb 3>; [41.00%]
>   29   │ ;;    succ:       4
>   30   │ ;;                3
>   31   │
>   32   │ ;;   basic block 3, loop depth 0
>   33   │ ;;    pred:       2
>   34   │   _9 = x_5(D) < 0;
>   35   │   _10 = (long int) _9;
>   36   │   _11 = -_10;
>   37   │   _12 = _11 ^ 9223372036854775807;
>   38   │ ;;    succ:       4
>   39   │
>   40   │ ;;   basic block 4, loop depth 0
>   41   │ ;;    pred:       2
>   42   │ ;;                3
>   43   │   # _4 = PHI <_13(2), _12(3)>
>   44   │   return _4;
>   45   │ ;;    succ:       EXIT
>   46   │
>   47   │ }
>
> After this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t _4;
>    8   │
>    9   │ ;;   basic block 2, loop depth 0
>   10   │ ;;    pred:       ENTRY
>   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   12   │   return _4;
>   13   │ ;;    succ:       EXIT
>   14   │
>   15   │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
>         * match.pd: Add the matching for signed .SAT_ADD.
>         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
>         matching func decl.
>         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
>         to ...
>         (match_saturation_add): ... here.
>         (math_opts_dom_walker::after_dom_children): Update the above renamed
>         func from caller.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd              | 14 +++++++++++++
>  gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 51 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c9c8478d286..0a2ffc733d3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    }
>    (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
>
> +/* Signed saturation add, case 1:
> +   T min = (T)1u << (sizeof (T) * 8 - 1);
> +   T max = min - 1;
> +   SAT_S_ADD = (X ^ Y) < 0
> +     ? (X + Y)
> +     : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
> +(match (signed_integer_sat_add @0 @1)
> +  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))

This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
The GIMPLE IL in the comment below suggests Z == X + Y?

The convert looks odd to me given @0 is involved in both & operands.
The comment above has the same logic error.

I believe the bit_xor lack :c

> +   integer_zerop)

Please indent this to line up with the first operand of the 'ge' to make it
better readable.

> +   (convert? (plus@2 (convert1? @0) (convert1? @1)))

Same with the converts.  The plus needs :c I think.  Is this about
common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?

Note all the :c and conditional converts makes this a quite heavy pattern
(all combinations of swaps and converts gets code).

> +   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..d5c9b475f72 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
>                                     tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>  }
>
>  /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
>   *   <bb 2> :
>   *   _1 = x_3(D) + y_4(D);
>   *   if (_1 >= x_3(D))
> @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>   *   # _2 = PHI <255(2), _1(3)>
>   *   =>
>   *   <bb 4> [local count: 1073741824]:
> - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + *   _1 = x_5(D) ^ y_6(D);
> + *   _13 = x_5(D) + y_6(D);
> + *   _3 = x_5(D) ^ _13;
> + *   _2 = ~_1;
> + *   _7 = _2 & _3;
> + *   if (_7 >= 0)
> + *     goto <bb 4>; [59.00%]
> + *   else
> + *     goto <bb 3>; [41.00%]
> + *   ;;    succ:       4
> + *   ;;                3
> + *   ;;   basic block 3, loop depth 0
> + *   ;;    pred:       2
> + *   _9 = x_5(D) < 0;
> + *   _10 = (long int) _9;
> + *   _11 = -_10;
> + *   _12 = _11 ^ 9223372036854775807;
> + *   ;;    succ:       4
> + *   ;;   basic block 4, loop depth 0
> + *   ;;    pred:       2
> + *   ;;                3
> + *   # _4 = PHI <_13(2), _12(3)>
> + *   =>
> + *   ;;   basic block 2, loop depth 0
> + *   ;;    pred:       ENTRY
> + *   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
>
>  static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>  {
>    if (gimple_phi_num_args (phi) != 2)
>      return;
> @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>    tree ops[2];
>    tree phi_result = gimple_phi_result (phi);
>
> -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
>      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
>                                         ops[0], ops[1]);
>  }
> @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>      gsi_next (&psi))
>      {
>        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> -      match_unsigned_saturation_add (&gsi, psi.phi ());
> +      match_saturation_add (&gsi, psi.phi ());
>        match_unsigned_saturation_sub (&gsi, psi.phi ());
>      }
>
> --
> 2.43.0
>
Li, Pan2 Aug. 5, 2024, 1:52 p.m. UTC | #2
Thanks Richard for comments.

> The convert looks odd to me given @0 is involved in both & operands.

The convert is introduced as the GIMPLE IL is somehow different for int8_t when compares to int32_t or int64_t.
There are some additional ops convert to unsigned for plus, see below line 8-9 and line 22-23.
But we cannot see similar GIMPLE IL for int32_t and int64_t. To reconcile the types from int8_t to int64_t, add the
convert here.

Or may be I have some mistake in the example, let me revisit it and send v2 if no surprise.

   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_1 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t sum;
   8   │   unsigned char x.1_1;
   9   │   unsigned char y.2_2;
  10   │   unsigned char _3;
  11   │   signed char _4;
  12   │   signed char _5;
  13   │   int8_t _6;
  14   │   _Bool _11;
  15   │   signed char _12;
  16   │   signed char _13;
  17   │   signed char _14;
  18   │   signed char _22;
  19   │   signed char _23;
  20   │
  21   │   <bb 2> [local count: 1073741822]:
  22   │   x.1_1 = (unsigned char) x_7(D);
  23   │   y.2_2 = (unsigned char) y_8(D);
  24   │   _3 = x.1_1 + y.2_2;
  25   │   sum_9 = (int8_t) _3;
  26   │   _4 = x_7(D) ^ y_8(D);
  27   │   _5 = x_7(D) ^ sum_9;
  28   │   _23 = ~_4;
  29   │   _22 = _5 & _23;
  30   │   if (_22 < 0)
  31   │     goto <bb 3>; [41.00%]
  32   │   else
  33   │     goto <bb 4>; [59.00%]
  34   │
  35   │   <bb 3> [local count: 259738146]:
  36   │   _11 = x_7(D) < 0;
  37   │   _12 = (signed char) _11;
  38   │   _13 = -_12;
  39   │   _14 = _13 ^ 127;
  40   │
  41   │   <bb 4> [local count: 1073741824]:
  42   │   # _6 = PHI <_14(3), sum_9(2)>
  43   │   return _6;
  44   │
  45   │ }

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Monday, August 5, 2024 7:16 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

On Mon, Aug 5, 2024 at 9:14 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD.  Aka below example:
>
> Form 1:
>   #define DEF_SAT_S_ADD_FMT_1(T)             \
>   T __attribute__((noinline))                \
>   sat_s_add_##T##_fmt_1 (T x, T y)           \
>   {                                          \
>     T min = (T)1u << (sizeof (T) * 8 - 1);   \
>     T max = min - 1;                         \
>     return (x ^ y) < 0                       \
>       ? (T)(x + y)                           \
>       : ((T)(x + y) ^ x) >= 0                \
>         ? (T)(x + y)                         \
>         : x < 0 ? min : max;                 \
>   }
>
> DEF_SAT_S_ADD_FMT_1 (int64_t)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   long int _1;
>    8   │   long int _2;
>    9   │   long int _3;
>   10   │   int64_t _4;
>   11   │   long int _7;
>   12   │   _Bool _9;
>   13   │   long int _10;
>   14   │   long int _11;
>   15   │   long int _12;
>   16   │   long int _13;
>   17   │
>   18   │ ;;   basic block 2, loop depth 0
>   19   │ ;;    pred:       ENTRY
>   20   │   _1 = x_5(D) ^ y_6(D);
>   21   │   _13 = x_5(D) + y_6(D);
>   22   │   _3 = x_5(D) ^ _13;
>   23   │   _2 = ~_1;
>   24   │   _7 = _2 & _3;
>   25   │   if (_7 >= 0)
>   26   │     goto <bb 4>; [59.00%]
>   27   │   else
>   28   │     goto <bb 3>; [41.00%]
>   29   │ ;;    succ:       4
>   30   │ ;;                3
>   31   │
>   32   │ ;;   basic block 3, loop depth 0
>   33   │ ;;    pred:       2
>   34   │   _9 = x_5(D) < 0;
>   35   │   _10 = (long int) _9;
>   36   │   _11 = -_10;
>   37   │   _12 = _11 ^ 9223372036854775807;
>   38   │ ;;    succ:       4
>   39   │
>   40   │ ;;   basic block 4, loop depth 0
>   41   │ ;;    pred:       2
>   42   │ ;;                3
>   43   │   # _4 = PHI <_13(2), _12(3)>
>   44   │   return _4;
>   45   │ ;;    succ:       EXIT
>   46   │
>   47   │ }
>
> After this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t _4;
>    8   │
>    9   │ ;;   basic block 2, loop depth 0
>   10   │ ;;    pred:       ENTRY
>   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   12   │   return _4;
>   13   │ ;;    succ:       EXIT
>   14   │
>   15   │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
>         * match.pd: Add the matching for signed .SAT_ADD.
>         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
>         matching func decl.
>         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
>         to ...
>         (match_saturation_add): ... here.
>         (math_opts_dom_walker::after_dom_children): Update the above renamed
>         func from caller.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd              | 14 +++++++++++++
>  gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 51 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c9c8478d286..0a2ffc733d3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    }
>    (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
>
> +/* Signed saturation add, case 1:
> +   T min = (T)1u << (sizeof (T) * 8 - 1);
> +   T max = min - 1;
> +   SAT_S_ADD = (X ^ Y) < 0
> +     ? (X + Y)
> +     : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
> +(match (signed_integer_sat_add @0 @1)
> +  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))

This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
The GIMPLE IL in the comment below suggests Z == X + Y?

The convert looks odd to me given @0 is involved in both & operands.
The comment above has the same logic error.

I believe the bit_xor lack :c

> +   integer_zerop)

Please indent this to line up with the first operand of the 'ge' to make it
better readable.

> +   (convert? (plus@2 (convert1? @0) (convert1? @1)))

Same with the converts.  The plus needs :c I think.  Is this about
common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?

Note all the :c and conditional converts makes this a quite heavy pattern
(all combinations of swaps and converts gets code).

> +   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..d5c9b475f72 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
>                                     tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>  }
>
>  /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
>   *   <bb 2> :
>   *   _1 = x_3(D) + y_4(D);
>   *   if (_1 >= x_3(D))
> @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>   *   # _2 = PHI <255(2), _1(3)>
>   *   =>
>   *   <bb 4> [local count: 1073741824]:
> - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + *   _1 = x_5(D) ^ y_6(D);
> + *   _13 = x_5(D) + y_6(D);
> + *   _3 = x_5(D) ^ _13;
> + *   _2 = ~_1;
> + *   _7 = _2 & _3;
> + *   if (_7 >= 0)
> + *     goto <bb 4>; [59.00%]
> + *   else
> + *     goto <bb 3>; [41.00%]
> + *   ;;    succ:       4
> + *   ;;                3
> + *   ;;   basic block 3, loop depth 0
> + *   ;;    pred:       2
> + *   _9 = x_5(D) < 0;
> + *   _10 = (long int) _9;
> + *   _11 = -_10;
> + *   _12 = _11 ^ 9223372036854775807;
> + *   ;;    succ:       4
> + *   ;;   basic block 4, loop depth 0
> + *   ;;    pred:       2
> + *   ;;                3
> + *   # _4 = PHI <_13(2), _12(3)>
> + *   =>
> + *   ;;   basic block 2, loop depth 0
> + *   ;;    pred:       ENTRY
> + *   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
>
>  static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>  {
>    if (gimple_phi_num_args (phi) != 2)
>      return;
> @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>    tree ops[2];
>    tree phi_result = gimple_phi_result (phi);
>
> -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
>      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
>                                         ops[0], ops[1]);
>  }
> @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>      gsi_next (&psi))
>      {
>        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> -      match_unsigned_saturation_add (&gsi, psi.phi ());
> +      match_saturation_add (&gsi, psi.phi ());
>        match_unsigned_saturation_sub (&gsi, psi.phi ());
>      }
>
> --
> 2.43.0
>
Li, Pan2 Aug. 6, 2024, 1:21 a.m. UTC | #3
Hi Richard,

It looks like the plus will have additional convert to unsigned in int8 and int16, see below example in test.c.006t.gimple.
And we need these convert ops in one matching pattern to cover all int scalar types.

I am not sure if there is a better way here, given convert in matching pattern is not very elegant up to a point.

int16_t
add_i16 (int16_t a, int16_t b)
{
  int16_t sum = a + b;
  return sum;
}

int32_t
add_i32 (int32_t a, int32_t b)
{
  int32_t sum = a + b;
  return sum;
}

------- 006t.gimple -------
int16_t add_i16 (int16_t a, int16_t b)
{
  int16_t D.2815;
  int16_t sum;

  a.0_1 = (unsigned short) a;
  b.1_2 = (unsigned short) b;
  _3 = a.0_1 + b.1_2;
  sum = (int16_t) _3;
  D.2815 = sum;
  return D.2815;
}

int32_t add_i32 (int32_t a, int32_t b)
{
  int32_t D.2817;
  int32_t sum;

  sum = a + b;
  D.2817 = sum;
  return D.2817;
}

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Monday, August 5, 2024 9:52 PM
To: Richard Biener <richard.guenther@gmail.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: RE: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

Thanks Richard for comments.

> The convert looks odd to me given @0 is involved in both & operands.

The convert is introduced as the GIMPLE IL is somehow different for int8_t when compares to int32_t or int64_t.
There are some additional ops convert to unsigned for plus, see below line 8-9 and line 22-23.
But we cannot see similar GIMPLE IL for int32_t and int64_t. To reconcile the types from int8_t to int64_t, add the
convert here.

Or may be I have some mistake in the example, let me revisit it and send v2 if no surprise.

   4   │ __attribute__((noinline))
   5   │ int8_t sat_s_add_int8_t_fmt_1 (int8_t x, int8_t y)
   6   │ {
   7   │   int8_t sum;
   8   │   unsigned char x.1_1;
   9   │   unsigned char y.2_2;
  10   │   unsigned char _3;
  11   │   signed char _4;
  12   │   signed char _5;
  13   │   int8_t _6;
  14   │   _Bool _11;
  15   │   signed char _12;
  16   │   signed char _13;
  17   │   signed char _14;
  18   │   signed char _22;
  19   │   signed char _23;
  20   │
  21   │   <bb 2> [local count: 1073741822]:
  22   │   x.1_1 = (unsigned char) x_7(D);
  23   │   y.2_2 = (unsigned char) y_8(D);
  24   │   _3 = x.1_1 + y.2_2;
  25   │   sum_9 = (int8_t) _3;
  26   │   _4 = x_7(D) ^ y_8(D);
  27   │   _5 = x_7(D) ^ sum_9;
  28   │   _23 = ~_4;
  29   │   _22 = _5 & _23;
  30   │   if (_22 < 0)
  31   │     goto <bb 3>; [41.00%]
  32   │   else
  33   │     goto <bb 4>; [59.00%]
  34   │
  35   │   <bb 3> [local count: 259738146]:
  36   │   _11 = x_7(D) < 0;
  37   │   _12 = (signed char) _11;
  38   │   _13 = -_12;
  39   │   _14 = _13 ^ 127;
  40   │
  41   │   <bb 4> [local count: 1073741824]:
  42   │   # _6 = PHI <_14(3), sum_9(2)>
  43   │   return _6;
  44   │
  45   │ }

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Monday, August 5, 2024 7:16 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

On Mon, Aug 5, 2024 at 9:14 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD.  Aka below example:
>
> Form 1:
>   #define DEF_SAT_S_ADD_FMT_1(T)             \
>   T __attribute__((noinline))                \
>   sat_s_add_##T##_fmt_1 (T x, T y)           \
>   {                                          \
>     T min = (T)1u << (sizeof (T) * 8 - 1);   \
>     T max = min - 1;                         \
>     return (x ^ y) < 0                       \
>       ? (T)(x + y)                           \
>       : ((T)(x + y) ^ x) >= 0                \
>         ? (T)(x + y)                         \
>         : x < 0 ? min : max;                 \
>   }
>
> DEF_SAT_S_ADD_FMT_1 (int64_t)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   long int _1;
>    8   │   long int _2;
>    9   │   long int _3;
>   10   │   int64_t _4;
>   11   │   long int _7;
>   12   │   _Bool _9;
>   13   │   long int _10;
>   14   │   long int _11;
>   15   │   long int _12;
>   16   │   long int _13;
>   17   │
>   18   │ ;;   basic block 2, loop depth 0
>   19   │ ;;    pred:       ENTRY
>   20   │   _1 = x_5(D) ^ y_6(D);
>   21   │   _13 = x_5(D) + y_6(D);
>   22   │   _3 = x_5(D) ^ _13;
>   23   │   _2 = ~_1;
>   24   │   _7 = _2 & _3;
>   25   │   if (_7 >= 0)
>   26   │     goto <bb 4>; [59.00%]
>   27   │   else
>   28   │     goto <bb 3>; [41.00%]
>   29   │ ;;    succ:       4
>   30   │ ;;                3
>   31   │
>   32   │ ;;   basic block 3, loop depth 0
>   33   │ ;;    pred:       2
>   34   │   _9 = x_5(D) < 0;
>   35   │   _10 = (long int) _9;
>   36   │   _11 = -_10;
>   37   │   _12 = _11 ^ 9223372036854775807;
>   38   │ ;;    succ:       4
>   39   │
>   40   │ ;;   basic block 4, loop depth 0
>   41   │ ;;    pred:       2
>   42   │ ;;                3
>   43   │   # _4 = PHI <_13(2), _12(3)>
>   44   │   return _4;
>   45   │ ;;    succ:       EXIT
>   46   │
>   47   │ }
>
> After this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t _4;
>    8   │
>    9   │ ;;   basic block 2, loop depth 0
>   10   │ ;;    pred:       ENTRY
>   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   12   │   return _4;
>   13   │ ;;    succ:       EXIT
>   14   │
>   15   │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
>         * match.pd: Add the matching for signed .SAT_ADD.
>         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
>         matching func decl.
>         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
>         to ...
>         (match_saturation_add): ... here.
>         (math_opts_dom_walker::after_dom_children): Update the above renamed
>         func from caller.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd              | 14 +++++++++++++
>  gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 51 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index c9c8478d286..0a2ffc733d3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    }
>    (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
>
> +/* Signed saturation add, case 1:
> +   T min = (T)1u << (sizeof (T) * 8 - 1);
> +   T max = min - 1;
> +   SAT_S_ADD = (X ^ Y) < 0
> +     ? (X + Y)
> +     : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
> +(match (signed_integer_sat_add @0 @1)
> +  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))

This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
The GIMPLE IL in the comment below suggests Z == X + Y?

The convert looks odd to me given @0 is involved in both & operands.
The comment above has the same logic error.

I believe the bit_xor lack :c

> +   integer_zerop)

Please indent this to line up with the first operand of the 'ge' to make it
better readable.

> +   (convert? (plus@2 (convert1? @0) (convert1? @1)))

Same with the converts.  The plus needs :c I think.  Is this about
common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?

Note all the :c and conditional converts makes this a quite heavy pattern
(all combinations of swaps and converts gets code).

> +   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..d5c9b475f72 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
>                                     tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>  }
>
>  /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
>   *   <bb 2> :
>   *   _1 = x_3(D) + y_4(D);
>   *   if (_1 >= x_3(D))
> @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>   *   # _2 = PHI <255(2), _1(3)>
>   *   =>
>   *   <bb 4> [local count: 1073741824]:
> - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + *   _1 = x_5(D) ^ y_6(D);
> + *   _13 = x_5(D) + y_6(D);
> + *   _3 = x_5(D) ^ _13;
> + *   _2 = ~_1;
> + *   _7 = _2 & _3;
> + *   if (_7 >= 0)
> + *     goto <bb 4>; [59.00%]
> + *   else
> + *     goto <bb 3>; [41.00%]
> + *   ;;    succ:       4
> + *   ;;                3
> + *   ;;   basic block 3, loop depth 0
> + *   ;;    pred:       2
> + *   _9 = x_5(D) < 0;
> + *   _10 = (long int) _9;
> + *   _11 = -_10;
> + *   _12 = _11 ^ 9223372036854775807;
> + *   ;;    succ:       4
> + *   ;;   basic block 4, loop depth 0
> + *   ;;    pred:       2
> + *   ;;                3
> + *   # _4 = PHI <_13(2), _12(3)>
> + *   =>
> + *   ;;   basic block 2, loop depth 0
> + *   ;;    pred:       ENTRY
> + *   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
>
>  static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>  {
>    if (gimple_phi_num_args (phi) != 2)
>      return;
> @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>    tree ops[2];
>    tree phi_result = gimple_phi_result (phi);
>
> -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
>      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
>                                         ops[0], ops[1]);
>  }
> @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>      gsi_next (&psi))
>      {
>        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> -      match_unsigned_saturation_add (&gsi, psi.phi ());
> +      match_saturation_add (&gsi, psi.phi ());
>        match_unsigned_saturation_sub (&gsi, psi.phi ());
>      }
>
> --
> 2.43.0
>
Richard Biener Aug. 6, 2024, 11:50 a.m. UTC | #4
On Tue, Aug 6, 2024 at 3:21 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> It looks like the plus will have additional convert to unsigned in int8 and int16, see below example in test.c.006t.gimple.
> And we need these convert ops in one matching pattern to cover all int scalar types.

Ah, yeah - that's the usual (premature) frontend optimization to
shorten operations after the standard
mandated standard conversion (to 'int' in this case).

> I am not sure if there is a better way here, given convert in matching pattern is not very elegant up to a point.
>
> int16_t
> add_i16 (int16_t a, int16_t b)
> {
>   int16_t sum = a + b;
>   return sum;
> }
>
> int32_t
> add_i32 (int32_t a, int32_t b)
> {
>   int32_t sum = a + b;
>   return sum;
> }
>
> ------- 006t.gimple -------
> int16_t add_i16 (int16_t a, int16_t b)
> {
>   int16_t D.2815;
>   int16_t sum;
>
>   a.0_1 = (unsigned short) a;
>   b.1_2 = (unsigned short) b;
>   _3 = a.0_1 + b.1_2;
>   sum = (int16_t) _3;
>   D.2815 = sum;
>   return D.2815;
> }
>
> int32_t add_i32 (int32_t a, int32_t b)
> {
>   int32_t D.2817;
>   int32_t sum;
>
>   sum = a + b;
>   D.2817 = sum;
>   return D.2817;
> }
>
> Pan
>
> -----Original Message-----
> From: Li, Pan2
> Sent: Monday, August 5, 2024 9:52 PM
> To: Richard Biener <richard.guenther@gmail.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: RE: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD
>
> Thanks Richard for comments.
>
> > The convert looks odd to me given @0 is involved in both & operands.
>
> The convert is introduced as the GIMPLE IL is somehow different for int8_t when compares to int32_t or int64_t.
> There are some additional ops convert to unsigned for plus, see below line 8-9 and line 22-23.
> But we cannot see similar GIMPLE IL for int32_t and int64_t. To reconcile the types from int8_t to int64_t, add the
> convert here.
>
> Or may be I have some mistake in the example, let me revisit it and send v2 if no surprise.
>
>    4   │ __attribute__((noinline))
>    5   │ int8_t sat_s_add_int8_t_fmt_1 (int8_t x, int8_t y)
>    6   │ {
>    7   │   int8_t sum;
>    8   │   unsigned char x.1_1;
>    9   │   unsigned char y.2_2;
>   10   │   unsigned char _3;
>   11   │   signed char _4;
>   12   │   signed char _5;
>   13   │   int8_t _6;
>   14   │   _Bool _11;
>   15   │   signed char _12;
>   16   │   signed char _13;
>   17   │   signed char _14;
>   18   │   signed char _22;
>   19   │   signed char _23;
>   20   │
>   21   │   <bb 2> [local count: 1073741822]:
>   22   │   x.1_1 = (unsigned char) x_7(D);
>   23   │   y.2_2 = (unsigned char) y_8(D);
>   24   │   _3 = x.1_1 + y.2_2;
>   25   │   sum_9 = (int8_t) _3;
>   26   │   _4 = x_7(D) ^ y_8(D);
>   27   │   _5 = x_7(D) ^ sum_9;
>   28   │   _23 = ~_4;
>   29   │   _22 = _5 & _23;
>   30   │   if (_22 < 0)
>   31   │     goto <bb 3>; [41.00%]
>   32   │   else
>   33   │     goto <bb 4>; [59.00%]
>   34   │
>   35   │   <bb 3> [local count: 259738146]:
>   36   │   _11 = x_7(D) < 0;
>   37   │   _12 = (signed char) _11;
>   38   │   _13 = -_12;
>   39   │   _14 = _13 ^ 127;
>   40   │
>   41   │   <bb 4> [local count: 1073741824]:
>   42   │   # _6 = PHI <_14(3), sum_9(2)>
>   43   │   return _6;
>   44   │
>   45   │ }
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Monday, August 5, 2024 7:16 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD
>
> On Mon, Aug 5, 2024 at 9:14 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to support the form 1 of the scalar signed
> > integer .SAT_ADD.  Aka below example:
> >
> > Form 1:
> >   #define DEF_SAT_S_ADD_FMT_1(T)             \
> >   T __attribute__((noinline))                \
> >   sat_s_add_##T##_fmt_1 (T x, T y)           \
> >   {                                          \
> >     T min = (T)1u << (sizeof (T) * 8 - 1);   \
> >     T max = min - 1;                         \
> >     return (x ^ y) < 0                       \
> >       ? (T)(x + y)                           \
> >       : ((T)(x + y) ^ x) >= 0                \
> >         ? (T)(x + y)                         \
> >         : x < 0 ? min : max;                 \
> >   }
> >
> > DEF_SAT_S_ADD_FMT_1 (int64_t)
> >
> > We can tell the difference before and after this patch if backend
> > implemented the ssadd<m>3 pattern similar as below.
> >
> > Before this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   long int _1;
> >    8   │   long int _2;
> >    9   │   long int _3;
> >   10   │   int64_t _4;
> >   11   │   long int _7;
> >   12   │   _Bool _9;
> >   13   │   long int _10;
> >   14   │   long int _11;
> >   15   │   long int _12;
> >   16   │   long int _13;
> >   17   │
> >   18   │ ;;   basic block 2, loop depth 0
> >   19   │ ;;    pred:       ENTRY
> >   20   │   _1 = x_5(D) ^ y_6(D);
> >   21   │   _13 = x_5(D) + y_6(D);
> >   22   │   _3 = x_5(D) ^ _13;
> >   23   │   _2 = ~_1;
> >   24   │   _7 = _2 & _3;
> >   25   │   if (_7 >= 0)
> >   26   │     goto <bb 4>; [59.00%]
> >   27   │   else
> >   28   │     goto <bb 3>; [41.00%]
> >   29   │ ;;    succ:       4
> >   30   │ ;;                3
> >   31   │
> >   32   │ ;;   basic block 3, loop depth 0
> >   33   │ ;;    pred:       2
> >   34   │   _9 = x_5(D) < 0;
> >   35   │   _10 = (long int) _9;
> >   36   │   _11 = -_10;
> >   37   │   _12 = _11 ^ 9223372036854775807;
> >   38   │ ;;    succ:       4
> >   39   │
> >   40   │ ;;   basic block 4, loop depth 0
> >   41   │ ;;    pred:       2
> >   42   │ ;;                3
> >   43   │   # _4 = PHI <_13(2), _12(3)>
> >   44   │   return _4;
> >   45   │ ;;    succ:       EXIT
> >   46   │
> >   47   │ }
> >
> > After this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   int64_t _4;
> >    8   │
> >    9   │ ;;   basic block 2, loop depth 0
> >   10   │ ;;    pred:       ENTRY
> >   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   12   │   return _4;
> >   13   │ ;;    succ:       EXIT
> >   14   │
> >   15   │ }
> >
> > The below test suites are passed for this patch.
> > * The rv64gcv fully regression test.
> > * The x86 bootstrap test.
> > * The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> >         * match.pd: Add the matching for signed .SAT_ADD.
> >         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> >         matching func decl.
> >         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> >         to ...
> >         (match_saturation_add): ... here.
> >         (math_opts_dom_walker::after_dom_children): Update the above renamed
> >         func from caller.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/match.pd              | 14 +++++++++++++
> >  gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
> >  2 files changed, 51 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index c9c8478d286..0a2ffc733d3 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    }
> >    (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
> >
> > +/* Signed saturation add, case 1:
> > +   T min = (T)1u << (sizeof (T) * 8 - 1);
> > +   T max = min - 1;
> > +   SAT_S_ADD = (X ^ Y) < 0
> > +     ? (X + Y)
> > +     : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
> > +(match (signed_integer_sat_add @0 @1)
> > +  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))
>
> This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
> The GIMPLE IL in the comment below suggests Z == X + Y?
>
> The convert looks odd to me given @0 is involved in both & operands.
> The comment above has the same logic error.
>
> I believe the bit_xor lack :c
>
> > +   integer_zerop)
>
> Please indent this to line up with the first operand of the 'ge' to make it
> better readable.
>
> > +   (convert? (plus@2 (convert1? @0) (convert1? @1)))
>
> Same with the converts.  The plus needs :c I think.  Is this about
> common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?
>
> Note all the :c and conditional converts makes this a quite heavy pattern
> (all combinations of swaps and converts gets code).
>
> > +   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 8d96a4c964b..d5c9b475f72 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
> >
> > +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> >  static void
> >  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> >                                     tree lhs, tree op_0, tree op_1)
> > @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >  }
> >
> >  /*
> > - * Try to match saturation unsigned add with PHI.
> > + * Try to match saturation add with PHI.
> > + * For unsigned integer:
> >   *   <bb 2> :
> >   *   _1 = x_3(D) + y_4(D);
> >   *   if (_1 >= x_3(D))
> > @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >   *   # _2 = PHI <255(2), _1(3)>
> >   *   =>
> >   *   <bb 4> [local count: 1073741824]:
> > - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> > + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> > + *
> > + * For signed integer:
> > + *   _1 = x_5(D) ^ y_6(D);
> > + *   _13 = x_5(D) + y_6(D);
> > + *   _3 = x_5(D) ^ _13;
> > + *   _2 = ~_1;
> > + *   _7 = _2 & _3;
> > + *   if (_7 >= 0)
> > + *     goto <bb 4>; [59.00%]
> > + *   else
> > + *     goto <bb 3>; [41.00%]
> > + *   ;;    succ:       4
> > + *   ;;                3
> > + *   ;;   basic block 3, loop depth 0
> > + *   ;;    pred:       2
> > + *   _9 = x_5(D) < 0;
> > + *   _10 = (long int) _9;
> > + *   _11 = -_10;
> > + *   _12 = _11 ^ 9223372036854775807;
> > + *   ;;    succ:       4
> > + *   ;;   basic block 4, loop depth 0
> > + *   ;;    pred:       2
> > + *   ;;                3
> > + *   # _4 = PHI <_13(2), _12(3)>
> > + *   =>
> > + *   ;;   basic block 2, loop depth 0
> > + *   ;;    pred:       ENTRY
> > + *   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
> >
> >  static void
> > -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> > +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >  {
> >    if (gimple_phi_num_args (phi) != 2)
> >      return;
> > @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >    tree ops[2];
> >    tree phi_result = gimple_phi_result (phi);
> >
> > -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> > +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> > +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> >      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> >                                         ops[0], ops[1]);
> >  }
> > @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >      gsi_next (&psi))
> >      {
> >        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> > -      match_unsigned_saturation_add (&gsi, psi.phi ());
> > +      match_saturation_add (&gsi, psi.phi ());
> >        match_unsigned_saturation_sub (&gsi, psi.phi ());
> >      }
> >
> > --
> > 2.43.0
> >
Li, Pan2 Aug. 6, 2024, 1:01 p.m. UTC | #5
> Ah, yeah - that's the usual (premature) frontend optimization to
> shorten operations after the standard
> mandated standard conversion (to 'int' in this case).

Thanks Richard for confirmation, let me refine the matching in v2.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Tuesday, August 6, 2024 7:50 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD

On Tue, Aug 6, 2024 at 3:21 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> It looks like the plus will have additional convert to unsigned in int8 and int16, see below example in test.c.006t.gimple.
> And we need these convert ops in one matching pattern to cover all int scalar types.

Ah, yeah - that's the usual (premature) frontend optimization to
shorten operations after the standard
mandated standard conversion (to 'int' in this case).

> I am not sure if there is a better way here, given convert in matching pattern is not very elegant up to a point.
>
> int16_t
> add_i16 (int16_t a, int16_t b)
> {
>   int16_t sum = a + b;
>   return sum;
> }
>
> int32_t
> add_i32 (int32_t a, int32_t b)
> {
>   int32_t sum = a + b;
>   return sum;
> }
>
> ------- 006t.gimple -------
> int16_t add_i16 (int16_t a, int16_t b)
> {
>   int16_t D.2815;
>   int16_t sum;
>
>   a.0_1 = (unsigned short) a;
>   b.1_2 = (unsigned short) b;
>   _3 = a.0_1 + b.1_2;
>   sum = (int16_t) _3;
>   D.2815 = sum;
>   return D.2815;
> }
>
> int32_t add_i32 (int32_t a, int32_t b)
> {
>   int32_t D.2817;
>   int32_t sum;
>
>   sum = a + b;
>   D.2817 = sum;
>   return D.2817;
> }
>
> Pan
>
> -----Original Message-----
> From: Li, Pan2
> Sent: Monday, August 5, 2024 9:52 PM
> To: Richard Biener <richard.guenther@gmail.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: RE: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD
>
> Thanks Richard for comments.
>
> > The convert looks odd to me given @0 is involved in both & operands.
>
> The convert is introduced as the GIMPLE IL is somehow different for int8_t when compares to int32_t or int64_t.
> There are some additional ops convert to unsigned for plus, see below line 8-9 and line 22-23.
> But we cannot see similar GIMPLE IL for int32_t and int64_t. To reconcile the types from int8_t to int64_t, add the
> convert here.
>
> Or may be I have some mistake in the example, let me revisit it and send v2 if no surprise.
>
>    4   │ __attribute__((noinline))
>    5   │ int8_t sat_s_add_int8_t_fmt_1 (int8_t x, int8_t y)
>    6   │ {
>    7   │   int8_t sum;
>    8   │   unsigned char x.1_1;
>    9   │   unsigned char y.2_2;
>   10   │   unsigned char _3;
>   11   │   signed char _4;
>   12   │   signed char _5;
>   13   │   int8_t _6;
>   14   │   _Bool _11;
>   15   │   signed char _12;
>   16   │   signed char _13;
>   17   │   signed char _14;
>   18   │   signed char _22;
>   19   │   signed char _23;
>   20   │
>   21   │   <bb 2> [local count: 1073741822]:
>   22   │   x.1_1 = (unsigned char) x_7(D);
>   23   │   y.2_2 = (unsigned char) y_8(D);
>   24   │   _3 = x.1_1 + y.2_2;
>   25   │   sum_9 = (int8_t) _3;
>   26   │   _4 = x_7(D) ^ y_8(D);
>   27   │   _5 = x_7(D) ^ sum_9;
>   28   │   _23 = ~_4;
>   29   │   _22 = _5 & _23;
>   30   │   if (_22 < 0)
>   31   │     goto <bb 3>; [41.00%]
>   32   │   else
>   33   │     goto <bb 4>; [59.00%]
>   34   │
>   35   │   <bb 3> [local count: 259738146]:
>   36   │   _11 = x_7(D) < 0;
>   37   │   _12 = (signed char) _11;
>   38   │   _13 = -_12;
>   39   │   _14 = _13 ^ 127;
>   40   │
>   41   │   <bb 4> [local count: 1073741824]:
>   42   │   # _6 = PHI <_14(3), sum_9(2)>
>   43   │   return _6;
>   44   │
>   45   │ }
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Monday, August 5, 2024 7:16 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Match: Support form 1 for scalar signed integer .SAT_ADD
>
> On Mon, Aug 5, 2024 at 9:14 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to support the form 1 of the scalar signed
> > integer .SAT_ADD.  Aka below example:
> >
> > Form 1:
> >   #define DEF_SAT_S_ADD_FMT_1(T)             \
> >   T __attribute__((noinline))                \
> >   sat_s_add_##T##_fmt_1 (T x, T y)           \
> >   {                                          \
> >     T min = (T)1u << (sizeof (T) * 8 - 1);   \
> >     T max = min - 1;                         \
> >     return (x ^ y) < 0                       \
> >       ? (T)(x + y)                           \
> >       : ((T)(x + y) ^ x) >= 0                \
> >         ? (T)(x + y)                         \
> >         : x < 0 ? min : max;                 \
> >   }
> >
> > DEF_SAT_S_ADD_FMT_1 (int64_t)
> >
> > We can tell the difference before and after this patch if backend
> > implemented the ssadd<m>3 pattern similar as below.
> >
> > Before this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   long int _1;
> >    8   │   long int _2;
> >    9   │   long int _3;
> >   10   │   int64_t _4;
> >   11   │   long int _7;
> >   12   │   _Bool _9;
> >   13   │   long int _10;
> >   14   │   long int _11;
> >   15   │   long int _12;
> >   16   │   long int _13;
> >   17   │
> >   18   │ ;;   basic block 2, loop depth 0
> >   19   │ ;;    pred:       ENTRY
> >   20   │   _1 = x_5(D) ^ y_6(D);
> >   21   │   _13 = x_5(D) + y_6(D);
> >   22   │   _3 = x_5(D) ^ _13;
> >   23   │   _2 = ~_1;
> >   24   │   _7 = _2 & _3;
> >   25   │   if (_7 >= 0)
> >   26   │     goto <bb 4>; [59.00%]
> >   27   │   else
> >   28   │     goto <bb 3>; [41.00%]
> >   29   │ ;;    succ:       4
> >   30   │ ;;                3
> >   31   │
> >   32   │ ;;   basic block 3, loop depth 0
> >   33   │ ;;    pred:       2
> >   34   │   _9 = x_5(D) < 0;
> >   35   │   _10 = (long int) _9;
> >   36   │   _11 = -_10;
> >   37   │   _12 = _11 ^ 9223372036854775807;
> >   38   │ ;;    succ:       4
> >   39   │
> >   40   │ ;;   basic block 4, loop depth 0
> >   41   │ ;;    pred:       2
> >   42   │ ;;                3
> >   43   │   # _4 = PHI <_13(2), _12(3)>
> >   44   │   return _4;
> >   45   │ ;;    succ:       EXIT
> >   46   │
> >   47   │ }
> >
> > After this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   int64_t _4;
> >    8   │
> >    9   │ ;;   basic block 2, loop depth 0
> >   10   │ ;;    pred:       ENTRY
> >   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   12   │   return _4;
> >   13   │ ;;    succ:       EXIT
> >   14   │
> >   15   │ }
> >
> > The below test suites are passed for this patch.
> > * The rv64gcv fully regression test.
> > * The x86 bootstrap test.
> > * The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> >         * match.pd: Add the matching for signed .SAT_ADD.
> >         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> >         matching func decl.
> >         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> >         to ...
> >         (match_saturation_add): ... here.
> >         (math_opts_dom_walker::after_dom_children): Update the above renamed
> >         func from caller.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/match.pd              | 14 +++++++++++++
> >  gcc/tree-ssa-math-opts.cc | 42 ++++++++++++++++++++++++++++++++++-----
> >  2 files changed, 51 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index c9c8478d286..0a2ffc733d3 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3311,6 +3311,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    }
> >    (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
> >
> > +/* Signed saturation add, case 1:
> > +   T min = (T)1u << (sizeof (T) * 8 - 1);
> > +   T max = min - 1;
> > +   SAT_S_ADD = (X ^ Y) < 0
> > +     ? (X + Y)
> > +     : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
> > +(match (signed_integer_sat_add @0 @1)
> > +  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))
>
> This matches arbitrary Z in (X ^ (T)Z) & ~(X ^ Y) which cannot be intended.
> The GIMPLE IL in the comment below suggests Z == X + Y?
>
> The convert looks odd to me given @0 is involved in both & operands.
> The comment above has the same logic error.
>
> I believe the bit_xor lack :c
>
> > +   integer_zerop)
>
> Please indent this to line up with the first operand of the 'ge' to make it
> better readable.
>
> > +   (convert? (plus@2 (convert1? @0) (convert1? @1)))
>
> Same with the converts.  The plus needs :c I think.  Is this about
> common sign-conversions being hoisted from (int)x + (int)y -> (int)(x+y)?
>
> Note all the :c and conditional converts makes this a quite heavy pattern
> (all combinations of swaps and converts gets code).
>
> > +   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
> > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 8d96a4c964b..d5c9b475f72 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
> >
> > +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> >  static void
> >  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> >                                     tree lhs, tree op_0, tree op_1)
> > @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >  }
> >
> >  /*
> > - * Try to match saturation unsigned add with PHI.
> > + * Try to match saturation add with PHI.
> > + * For unsigned integer:
> >   *   <bb 2> :
> >   *   _1 = x_3(D) + y_4(D);
> >   *   if (_1 >= x_3(D))
> > @@ -4086,10 +4089,38 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >   *   # _2 = PHI <255(2), _1(3)>
> >   *   =>
> >   *   <bb 4> [local count: 1073741824]:
> > - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> > + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> > + *
> > + * For signed integer:
> > + *   _1 = x_5(D) ^ y_6(D);
> > + *   _13 = x_5(D) + y_6(D);
> > + *   _3 = x_5(D) ^ _13;
> > + *   _2 = ~_1;
> > + *   _7 = _2 & _3;
> > + *   if (_7 >= 0)
> > + *     goto <bb 4>; [59.00%]
> > + *   else
> > + *     goto <bb 3>; [41.00%]
> > + *   ;;    succ:       4
> > + *   ;;                3
> > + *   ;;   basic block 3, loop depth 0
> > + *   ;;    pred:       2
> > + *   _9 = x_5(D) < 0;
> > + *   _10 = (long int) _9;
> > + *   _11 = -_10;
> > + *   _12 = _11 ^ 9223372036854775807;
> > + *   ;;    succ:       4
> > + *   ;;   basic block 4, loop depth 0
> > + *   ;;    pred:       2
> > + *   ;;                3
> > + *   # _4 = PHI <_13(2), _12(3)>
> > + *   =>
> > + *   ;;   basic block 2, loop depth 0
> > + *   ;;    pred:       ENTRY
> > + *   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
> >
> >  static void
> > -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> > +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >  {
> >    if (gimple_phi_num_args (phi) != 2)
> >      return;
> > @@ -4097,7 +4128,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >    tree ops[2];
> >    tree phi_result = gimple_phi_result (phi);
> >
> > -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> > +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> > +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> >      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> >                                         ops[0], ops[1]);
> >  }
> > @@ -6097,7 +6129,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >      gsi_next (&psi))
> >      {
> >        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> > -      match_unsigned_saturation_add (&gsi, psi.phi ());
> > +      match_saturation_add (&gsi, psi.phi ());
> >        match_unsigned_saturation_sub (&gsi, psi.phi ());
> >      }
> >
> > --
> > 2.43.0
> >
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index c9c8478d286..0a2ffc733d3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3311,6 +3311,20 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst))))))
 
+/* Signed saturation add, case 1:
+   T min = (T)1u << (sizeof (T) * 8 - 1);
+   T max = min - 1;
+   SAT_S_ADD = (X ^ Y) < 0
+     ? (X + Y)
+     : ((T)(X + Y) ^ X) >= 0 ? (X + Y) : X < 0 ? min : max.  */
+(match (signed_integer_sat_add @0 @1)
+  (cond^ (ge (bit_and:c (bit_xor @0 (convert? @2)) (bit_not (bit_xor @0 @1)))
+   integer_zerop)
+   (convert? (plus@2 (convert1? @0) (convert1? @1)))
+   (bit_xor (negate (convert (lt @0 integer_zerop))) max_value))
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8d96a4c964b..d5c9b475f72 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4023,6 +4023,8 @@  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
 				    tree lhs, tree op_0, tree op_1)
@@ -4072,7 +4074,8 @@  match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
 }
 
 /*
- * Try to match saturation unsigned add with PHI.
+ * Try to match saturation add with PHI.
+ * For unsigned integer:
  *   <bb 2> :
  *   _1 = x_3(D) + y_4(D);
  *   if (_1 >= x_3(D))
@@ -4086,10 +4089,38 @@  match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
  *   # _2 = PHI <255(2), _1(3)>
  *   =>
  *   <bb 4> [local count: 1073741824]:
- *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
+ *   _2 = .SAT_ADD (x_4(D), y_5(D));
+ *
+ * For signed integer:
+ *   _1 = x_5(D) ^ y_6(D);
+ *   _13 = x_5(D) + y_6(D);
+ *   _3 = x_5(D) ^ _13;
+ *   _2 = ~_1;
+ *   _7 = _2 & _3;
+ *   if (_7 >= 0)
+ *     goto <bb 4>; [59.00%]
+ *   else
+ *     goto <bb 3>; [41.00%]
+ *   ;;    succ:       4
+ *   ;;                3
+ *   ;;   basic block 3, loop depth 0
+ *   ;;    pred:       2
+ *   _9 = x_5(D) < 0;
+ *   _10 = (long int) _9;
+ *   _11 = -_10;
+ *   _12 = _11 ^ 9223372036854775807;
+ *   ;;    succ:       4
+ *   ;;   basic block 4, loop depth 0
+ *   ;;    pred:       2
+ *   ;;                3
+ *   # _4 = PHI <_13(2), _12(3)>
+ *   =>
+ *   ;;   basic block 2, loop depth 0
+ *   ;;    pred:       ENTRY
+ *   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
 
 static void
-match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
+match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
 {
   if (gimple_phi_num_args (phi) != 2)
     return;
@@ -4097,7 +4128,8 @@  match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
   tree ops[2];
   tree phi_result = gimple_phi_result (phi);
 
-  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
+  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
     build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
 					ops[0], ops[1]);
 }
@@ -6097,7 +6129,7 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
     gsi_next (&psi))
     {
       gimple_stmt_iterator gsi = gsi_after_labels (bb);
-      match_unsigned_saturation_add (&gsi, psi.phi ());
+      match_saturation_add (&gsi, psi.phi ());
       match_unsigned_saturation_sub (&gsi, psi.phi ());
     }