diff mbox series

[v3] Match: Support form 1 for scalar signed integer .SAT_ADD

Message ID 20240826021941.2635027-1-pan2.li@intel.com
State New
Headers show
Series [v3] Match: Support form 1 for scalar signed integer .SAT_ADD | expand

Commit Message

Li, Pan2 Aug. 26, 2024, 2:19 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to support the form 1 of the scalar signed
integer .SAT_ADD.  Aka below example:

Form 1:
  #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
  T __attribute__((noinline))                  \
  sat_s_add_##T##_fmt_1 (T x, T y)             \
  {                                            \
    T sum = (UT)x + (UT)y;                     \
    return (x ^ y) < 0                         \
      ? sum                                    \
      : (sum ^ x) >= 0                         \
        ? sum                                  \
        : x < 0 ? MIN : MAX;                   \
  }

DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)

We can tell the difference before and after this patch if backend
implemented the ssadd<m>3 pattern similar as below.

Before this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t sum;
   8   │   long unsigned int x.0_1;
   9   │   long unsigned int y.1_2;
  10   │   long unsigned int _3;
  11   │   long int _4;
  12   │   long int _5;
  13   │   int64_t _6;
  14   │   _Bool _11;
  15   │   long int _12;
  16   │   long int _13;
  17   │   long int _14;
  18   │   long int _16;
  19   │   long int _17;
  20   │
  21   │ ;;   basic block 2, loop depth 0
  22   │ ;;    pred:       ENTRY
  23   │   x.0_1 = (long unsigned int) x_7(D);
  24   │   y.1_2 = (long unsigned int) y_8(D);
  25   │   _3 = x.0_1 + y.1_2;
  26   │   sum_9 = (int64_t) _3;
  27   │   _4 = x_7(D) ^ y_8(D);
  28   │   _5 = x_7(D) ^ sum_9;
  29   │   _17 = ~_4;
  30   │   _16 = _5 & _17;
  31   │   if (_16 < 0)
  32   │     goto <bb 3>; [41.00%]
  33   │   else
  34   │     goto <bb 4>; [59.00%]
  35   │ ;;    succ:       3
  36   │ ;;                4
  37   │
  38   │ ;;   basic block 3, loop depth 0
  39   │ ;;    pred:       2
  40   │   _11 = x_7(D) < 0;
  41   │   _12 = (long int) _11;
  42   │   _13 = -_12;
  43   │   _14 = _13 ^ 9223372036854775807;
  44   │ ;;    succ:       4
  45   │
  46   │ ;;   basic block 4, loop depth 0
  47   │ ;;    pred:       2
  48   │ ;;                3
  49   │   # _6 = PHI <sum_9(2), _14(3)>
  50   │   return _6;
  51   │ ;;    succ:       EXIT
  52   │
  53   │ }

After this patch:
   4   │ __attribute__((noinline))
   5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
   6   │ {
   7   │   int64_t _4;
   8   │
   9   │ ;;   basic block 2, loop depth 0
  10   │ ;;    pred:       ENTRY
  11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  12   │   return _4;
  13   │ ;;    succ:       EXIT
  14   │
  15   │ }

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

	* match.pd: Add the matching for signed .SAT_ADD.
	* tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
	matching func decl.
	(match_unsigned_saturation_add): Try signed .SAT_ADD and rename
	to ...
	(match_saturation_add): ... here.
	(math_opts_dom_walker::after_dom_children): Update the above renamed
	func from caller.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd              | 18 ++++++++++++++++++
 gcc/tree-ssa-math-opts.cc | 35 ++++++++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 5 deletions(-)

Comments

Richard Biener Aug. 26, 2024, 1:39 p.m. UTC | #1
On Mon, Aug 26, 2024 at 4:20 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD.  Aka below example:
>
> Form 1:
>   #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>   T __attribute__((noinline))                  \
>   sat_s_add_##T##_fmt_1 (T x, T y)             \
>   {                                            \
>     T sum = (UT)x + (UT)y;                     \
>     return (x ^ y) < 0                         \
>       ? sum                                    \
>       : (sum ^ x) >= 0                         \
>         ? sum                                  \
>         : x < 0 ? MIN : MAX;                   \
>   }
>
> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t sum;
>    8   │   long unsigned int x.0_1;
>    9   │   long unsigned int y.1_2;
>   10   │   long unsigned int _3;
>   11   │   long int _4;
>   12   │   long int _5;
>   13   │   int64_t _6;
>   14   │   _Bool _11;
>   15   │   long int _12;
>   16   │   long int _13;
>   17   │   long int _14;
>   18   │   long int _16;
>   19   │   long int _17;
>   20   │
>   21   │ ;;   basic block 2, loop depth 0
>   22   │ ;;    pred:       ENTRY
>   23   │   x.0_1 = (long unsigned int) x_7(D);
>   24   │   y.1_2 = (long unsigned int) y_8(D);
>   25   │   _3 = x.0_1 + y.1_2;
>   26   │   sum_9 = (int64_t) _3;
>   27   │   _4 = x_7(D) ^ y_8(D);
>   28   │   _5 = x_7(D) ^ sum_9;
>   29   │   _17 = ~_4;
>   30   │   _16 = _5 & _17;
>   31   │   if (_16 < 0)
>   32   │     goto <bb 3>; [41.00%]
>   33   │   else
>   34   │     goto <bb 4>; [59.00%]
>   35   │ ;;    succ:       3
>   36   │ ;;                4
>   37   │
>   38   │ ;;   basic block 3, loop depth 0
>   39   │ ;;    pred:       2
>   40   │   _11 = x_7(D) < 0;
>   41   │   _12 = (long int) _11;
>   42   │   _13 = -_12;
>   43   │   _14 = _13 ^ 9223372036854775807;
>   44   │ ;;    succ:       4
>   45   │
>   46   │ ;;   basic block 4, loop depth 0
>   47   │ ;;    pred:       2
>   48   │ ;;                3
>   49   │   # _6 = PHI <sum_9(2), _14(3)>
>   50   │   return _6;
>   51   │ ;;    succ:       EXIT
>   52   │
>   53   │ }
>
> After this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t _4;
>    8   │
>    9   │ ;;   basic block 2, loop depth 0
>   10   │ ;;    pred:       ENTRY
>   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   12   │   return _4;
>   13   │ ;;    succ:       EXIT
>   14   │
>   15   │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
>         * match.pd: Add the matching for signed .SAT_ADD.
>         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
>         matching func decl.
>         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
>         to ...
>         (match_saturation_add): ... here.
>         (math_opts_dom_walker::after_dom_children): Update the above renamed
>         func from caller.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd              | 18 ++++++++++++++++++
>  gcc/tree-ssa-math-opts.cc | 35 ++++++++++++++++++++++++++++++-----
>  2 files changed, 48 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 78f1957e8c7..b059e313415 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3192,6 +3192,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0))))
>
> +/* Signed saturation add, case 1:
> +   T sum = (UT)X + (UT)Y;
> +   SAT_S_ADD = (X ^ Y) < 0
> +     ? sum
> +     : (sum ^ x) >= 0
> +       ? sum
> +       : x < 0 ? MIN : MAX;
> +   T and UT are type pair like T=int8_t, UT=uint8_t.  */
> +(match (signed_integer_sat_add @0 @1)
> + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert@2 (plus:c (convert @0)
> +                                                       (convert @1))))

I think you want to use nop_convert here, for sure a truncation or
extension wouldn't be valid?

> +                      (bit_not (bit_xor:c @0 @1)))

I think you don't need :c on both the inner plus and the bit_xor here?

> +           integer_zerop)
> +       (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)

The comment above quotes 'MIN' but that's not present here - that is,
the comment quotes a source form while we match what we see on
GIMPLE?  I do expect the matching will be quite fragile when not
being isolated.

How do you select the cases you want to support?

> +       @2)
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* Unsigned saturation sub, case 1 (branch with gt):
>     SAT_U_SUB = X > Y ? X - Y : 0  */
>  (match (unsigned_integer_sat_sub @0 @1)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..3c93fca5b53 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
>                                     tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>  }
>
>  /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
>   *   <bb 2> :
>   *   _1 = x_3(D) + y_4(D);
>   *   if (_1 >= x_3(D))
> @@ -4086,10 +4089,31 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>   *   # _2 = PHI <255(2), _1(3)>
>   *   =>
>   *   <bb 4> [local count: 1073741824]:
> - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + *   x.0_1 = (long unsigned int) x_7(D);
> + *   y.1_2 = (long unsigned int) y_8(D);
> + *   _3 = x.0_1 + y.1_2;
> + *   sum_9 = (int64_t) _3;
> + *   _4 = x_7(D) ^ y_8(D);
> + *   _5 = x_7(D) ^ sum_9;
> + *   _15 = ~_4;
> + *   _16 = _5 & _15;
> + *   if (_16 < 0)
> + *     goto <bb 3>; [41.00%]
> + *   else
> + *     goto <bb 4>; [59.00%]
> + *   _11 = x_7(D) < 0;
> + *   _12 = (long int) _11;
> + *   _13 = -_12;
> + *   _14 = _13 ^ 9223372036854775807;
> + *   # _6 = PHI <_14(3), sum_9(2)>
> + *   =>
> + *   _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
>
>  static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>  {
>    if (gimple_phi_num_args (phi) != 2)
>      return;
> @@ -4097,7 +4121,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>    tree ops[2];
>    tree phi_result = gimple_phi_result (phi);
>
> -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
>      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
>                                         ops[0], ops[1]);
>  }
> @@ -6097,7 +6122,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>      gsi_next (&psi))
>      {
>        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> -      match_unsigned_saturation_add (&gsi, psi.phi ());
> +      match_saturation_add (&gsi, psi.phi ());
>        match_unsigned_saturation_sub (&gsi, psi.phi ());
>      }
>
> --
> 2.43.0
>
Li, Pan2 Aug. 27, 2024, 1:06 a.m. UTC | #2
Thanks Richard for comments.

> I think you want to use nop_convert here, for sure a truncation or
> extension wouldn't be valid?

Oh, yes, should be nop_convert.

> I think you don't need :c on both the inner plus and the bit_xor here?

Sure, could you please help to explain more about when should I need to add :c?
Liker inner plus/and/or ... etc, sometimes got confused for similar scenarios.

> +           integer_zerop)
> +       (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)

> The comment above quotes 'MIN' but that's not present here - that is,
> the comment quotes a source form while we match what we see on
> GIMPLE?  I do expect the matching will be quite fragile when not
> being isolated.

Got it, will update the comments to gimple.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Monday, August 26, 2024 9:40 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v3] Match: Support form 1 for scalar signed integer .SAT_ADD

On Mon, Aug 26, 2024 at 4:20 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to support the form 1 of the scalar signed
> integer .SAT_ADD.  Aka below example:
>
> Form 1:
>   #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
>   T __attribute__((noinline))                  \
>   sat_s_add_##T##_fmt_1 (T x, T y)             \
>   {                                            \
>     T sum = (UT)x + (UT)y;                     \
>     return (x ^ y) < 0                         \
>       ? sum                                    \
>       : (sum ^ x) >= 0                         \
>         ? sum                                  \
>         : x < 0 ? MIN : MAX;                   \
>   }
>
> DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
>
> We can tell the difference before and after this patch if backend
> implemented the ssadd<m>3 pattern similar as below.
>
> Before this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t sum;
>    8   │   long unsigned int x.0_1;
>    9   │   long unsigned int y.1_2;
>   10   │   long unsigned int _3;
>   11   │   long int _4;
>   12   │   long int _5;
>   13   │   int64_t _6;
>   14   │   _Bool _11;
>   15   │   long int _12;
>   16   │   long int _13;
>   17   │   long int _14;
>   18   │   long int _16;
>   19   │   long int _17;
>   20   │
>   21   │ ;;   basic block 2, loop depth 0
>   22   │ ;;    pred:       ENTRY
>   23   │   x.0_1 = (long unsigned int) x_7(D);
>   24   │   y.1_2 = (long unsigned int) y_8(D);
>   25   │   _3 = x.0_1 + y.1_2;
>   26   │   sum_9 = (int64_t) _3;
>   27   │   _4 = x_7(D) ^ y_8(D);
>   28   │   _5 = x_7(D) ^ sum_9;
>   29   │   _17 = ~_4;
>   30   │   _16 = _5 & _17;
>   31   │   if (_16 < 0)
>   32   │     goto <bb 3>; [41.00%]
>   33   │   else
>   34   │     goto <bb 4>; [59.00%]
>   35   │ ;;    succ:       3
>   36   │ ;;                4
>   37   │
>   38   │ ;;   basic block 3, loop depth 0
>   39   │ ;;    pred:       2
>   40   │   _11 = x_7(D) < 0;
>   41   │   _12 = (long int) _11;
>   42   │   _13 = -_12;
>   43   │   _14 = _13 ^ 9223372036854775807;
>   44   │ ;;    succ:       4
>   45   │
>   46   │ ;;   basic block 4, loop depth 0
>   47   │ ;;    pred:       2
>   48   │ ;;                3
>   49   │   # _6 = PHI <sum_9(2), _14(3)>
>   50   │   return _6;
>   51   │ ;;    succ:       EXIT
>   52   │
>   53   │ }
>
> After this patch:
>    4   │ __attribute__((noinline))
>    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
>    6   │ {
>    7   │   int64_t _4;
>    8   │
>    9   │ ;;   basic block 2, loop depth 0
>   10   │ ;;    pred:       ENTRY
>   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   12   │   return _4;
>   13   │ ;;    succ:       EXIT
>   14   │
>   15   │ }
>
> The below test suites are passed for this patch.
> * The rv64gcv fully regression test.
> * The x86 bootstrap test.
> * The x86 fully regression test.
>
> gcc/ChangeLog:
>
>         * match.pd: Add the matching for signed .SAT_ADD.
>         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
>         matching func decl.
>         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
>         to ...
>         (match_saturation_add): ... here.
>         (math_opts_dom_walker::after_dom_children): Update the above renamed
>         func from caller.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd              | 18 ++++++++++++++++++
>  gcc/tree-ssa-math-opts.cc | 35 ++++++++++++++++++++++++++++++-----
>  2 files changed, 48 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 78f1957e8c7..b059e313415 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3192,6 +3192,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0))))
>
> +/* Signed saturation add, case 1:
> +   T sum = (UT)X + (UT)Y;
> +   SAT_S_ADD = (X ^ Y) < 0
> +     ? sum
> +     : (sum ^ x) >= 0
> +       ? sum
> +       : x < 0 ? MIN : MAX;
> +   T and UT are type pair like T=int8_t, UT=uint8_t.  */
> +(match (signed_integer_sat_add @0 @1)
> + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert@2 (plus:c (convert @0)
> +                                                       (convert @1))))

I think you want to use nop_convert here, for sure a truncation or
extension wouldn't be valid?

> +                      (bit_not (bit_xor:c @0 @1)))

I think you don't need :c on both the inner plus and the bit_xor here?

> +           integer_zerop)
> +       (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)

The comment above quotes 'MIN' but that's not present here - that is,
the comment quotes a source form while we match what we see on
GIMPLE?  I do expect the matching will be quite fragile when not
being isolated.

How do you select the cases you want to support?

> +       @2)
> + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* Unsigned saturation sub, case 1 (branch with gt):
>     SAT_U_SUB = X > Y ? X - Y : 0  */
>  (match (unsigned_integer_sat_sub @0 @1)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 8d96a4c964b..3c93fca5b53 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
>  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
>
> +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> +
>  static void
>  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
>                                     tree lhs, tree op_0, tree op_1)
> @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>  }
>
>  /*
> - * Try to match saturation unsigned add with PHI.
> + * Try to match saturation add with PHI.
> + * For unsigned integer:
>   *   <bb 2> :
>   *   _1 = x_3(D) + y_4(D);
>   *   if (_1 >= x_3(D))
> @@ -4086,10 +4089,31 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>   *   # _2 = PHI <255(2), _1(3)>
>   *   =>
>   *   <bb 4> [local count: 1073741824]:
> - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> + *
> + * For signed integer:
> + *   x.0_1 = (long unsigned int) x_7(D);
> + *   y.1_2 = (long unsigned int) y_8(D);
> + *   _3 = x.0_1 + y.1_2;
> + *   sum_9 = (int64_t) _3;
> + *   _4 = x_7(D) ^ y_8(D);
> + *   _5 = x_7(D) ^ sum_9;
> + *   _15 = ~_4;
> + *   _16 = _5 & _15;
> + *   if (_16 < 0)
> + *     goto <bb 3>; [41.00%]
> + *   else
> + *     goto <bb 4>; [59.00%]
> + *   _11 = x_7(D) < 0;
> + *   _12 = (long int) _11;
> + *   _13 = -_12;
> + *   _14 = _13 ^ 9223372036854775807;
> + *   # _6 = PHI <_14(3), sum_9(2)>
> + *   =>
> + *   _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
>
>  static void
> -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>  {
>    if (gimple_phi_num_args (phi) != 2)
>      return;
> @@ -4097,7 +4121,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
>    tree ops[2];
>    tree phi_result = gimple_phi_result (phi);
>
> -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
>      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
>                                         ops[0], ops[1]);
>  }
> @@ -6097,7 +6122,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>      gsi_next (&psi))
>      {
>        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> -      match_unsigned_saturation_add (&gsi, psi.phi ());
> +      match_saturation_add (&gsi, psi.phi ());
>        match_unsigned_saturation_sub (&gsi, psi.phi ());
>      }
>
> --
> 2.43.0
>
Richard Biener Aug. 27, 2024, 8:41 a.m. UTC | #3
On Tue, Aug 27, 2024 at 3:06 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard for comments.
>
> > I think you want to use nop_convert here, for sure a truncation or
> > extension wouldn't be valid?
>
> Oh, yes, should be nop_convert.
>
> > I think you don't need :c on both the inner plus and the bit_xor here?
>
> Sure, could you please help to explain more about when should I need to add :c?
> Liker inner plus/and/or ... etc, sometimes got confused for similar scenarios.

:c is required when you want to match up @0s and they appear in a commutative
operation and there's no canonicalization rule putting it into one or the other
position.  In your case you have two commutative operations you want to match
up, so it should be only necessary to try swapping one of it to get the match,
it's not required to swap both.  This reduces the number of generated patterns.

> > +           integer_zerop)
> > +       (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
>
> > The comment above quotes 'MIN' but that's not present here - that is,
> > the comment quotes a source form while we match what we see on
> > GIMPLE?  I do expect the matching will be quite fragile when not
> > being isolated.
>
> Got it, will update the comments to gimple.
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Monday, August 26, 2024 9:40 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v3] Match: Support form 1 for scalar signed integer .SAT_ADD
>
> On Mon, Aug 26, 2024 at 4:20 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to support the form 1 of the scalar signed
> > integer .SAT_ADD.  Aka below example:
> >
> > Form 1:
> >   #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
> >   T __attribute__((noinline))                  \
> >   sat_s_add_##T##_fmt_1 (T x, T y)             \
> >   {                                            \
> >     T sum = (UT)x + (UT)y;                     \
> >     return (x ^ y) < 0                         \
> >       ? sum                                    \
> >       : (sum ^ x) >= 0                         \
> >         ? sum                                  \
> >         : x < 0 ? MIN : MAX;                   \
> >   }
> >
> > DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
> >
> > We can tell the difference before and after this patch if backend
> > implemented the ssadd<m>3 pattern similar as below.
> >
> > Before this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   int64_t sum;
> >    8   │   long unsigned int x.0_1;
> >    9   │   long unsigned int y.1_2;
> >   10   │   long unsigned int _3;
> >   11   │   long int _4;
> >   12   │   long int _5;
> >   13   │   int64_t _6;
> >   14   │   _Bool _11;
> >   15   │   long int _12;
> >   16   │   long int _13;
> >   17   │   long int _14;
> >   18   │   long int _16;
> >   19   │   long int _17;
> >   20   │
> >   21   │ ;;   basic block 2, loop depth 0
> >   22   │ ;;    pred:       ENTRY
> >   23   │   x.0_1 = (long unsigned int) x_7(D);
> >   24   │   y.1_2 = (long unsigned int) y_8(D);
> >   25   │   _3 = x.0_1 + y.1_2;
> >   26   │   sum_9 = (int64_t) _3;
> >   27   │   _4 = x_7(D) ^ y_8(D);
> >   28   │   _5 = x_7(D) ^ sum_9;
> >   29   │   _17 = ~_4;
> >   30   │   _16 = _5 & _17;
> >   31   │   if (_16 < 0)
> >   32   │     goto <bb 3>; [41.00%]
> >   33   │   else
> >   34   │     goto <bb 4>; [59.00%]
> >   35   │ ;;    succ:       3
> >   36   │ ;;                4
> >   37   │
> >   38   │ ;;   basic block 3, loop depth 0
> >   39   │ ;;    pred:       2
> >   40   │   _11 = x_7(D) < 0;
> >   41   │   _12 = (long int) _11;
> >   42   │   _13 = -_12;
> >   43   │   _14 = _13 ^ 9223372036854775807;
> >   44   │ ;;    succ:       4
> >   45   │
> >   46   │ ;;   basic block 4, loop depth 0
> >   47   │ ;;    pred:       2
> >   48   │ ;;                3
> >   49   │   # _6 = PHI <sum_9(2), _14(3)>
> >   50   │   return _6;
> >   51   │ ;;    succ:       EXIT
> >   52   │
> >   53   │ }
> >
> > After this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   int64_t _4;
> >    8   │
> >    9   │ ;;   basic block 2, loop depth 0
> >   10   │ ;;    pred:       ENTRY
> >   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   12   │   return _4;
> >   13   │ ;;    succ:       EXIT
> >   14   │
> >   15   │ }
> >
> > The below test suites are passed for this patch.
> > * The rv64gcv fully regression test.
> > * The x86 bootstrap test.
> > * The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> >         * match.pd: Add the matching for signed .SAT_ADD.
> >         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> >         matching func decl.
> >         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> >         to ...
> >         (match_saturation_add): ... here.
> >         (math_opts_dom_walker::after_dom_children): Update the above renamed
> >         func from caller.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/match.pd              | 18 ++++++++++++++++++
> >  gcc/tree-ssa-math-opts.cc | 35 ++++++++++++++++++++++++++++++-----
> >  2 files changed, 48 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 78f1957e8c7..b059e313415 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3192,6 +3192,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0))))
> >
> > +/* Signed saturation add, case 1:
> > +   T sum = (UT)X + (UT)Y;
> > +   SAT_S_ADD = (X ^ Y) < 0
> > +     ? sum
> > +     : (sum ^ x) >= 0
> > +       ? sum
> > +       : x < 0 ? MIN : MAX;
> > +   T and UT are type pair like T=int8_t, UT=uint8_t.  */
> > +(match (signed_integer_sat_add @0 @1)
> > + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert@2 (plus:c (convert @0)
> > +                                                       (convert @1))))
>
> I think you want to use nop_convert here, for sure a truncation or
> extension wouldn't be valid?
>
> > +                      (bit_not (bit_xor:c @0 @1)))
>
> I think you don't need :c on both the inner plus and the bit_xor here?
>
> > +           integer_zerop)
> > +       (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
>
> The comment above quotes 'MIN' but that's not present here - that is,
> the comment quotes a source form while we match what we see on
> GIMPLE?  I do expect the matching will be quite fragile when not
> being isolated.
>
> How do you select the cases you want to support?
>
> > +       @2)
> > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* Unsigned saturation sub, case 1 (branch with gt):
> >     SAT_U_SUB = X > Y ? X - Y : 0  */
> >  (match (unsigned_integer_sat_sub @0 @1)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 8d96a4c964b..3c93fca5b53 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
> >
> > +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> >  static void
> >  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> >                                     tree lhs, tree op_0, tree op_1)
> > @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >  }
> >
> >  /*
> > - * Try to match saturation unsigned add with PHI.
> > + * Try to match saturation add with PHI.
> > + * For unsigned integer:
> >   *   <bb 2> :
> >   *   _1 = x_3(D) + y_4(D);
> >   *   if (_1 >= x_3(D))
> > @@ -4086,10 +4089,31 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >   *   # _2 = PHI <255(2), _1(3)>
> >   *   =>
> >   *   <bb 4> [local count: 1073741824]:
> > - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> > + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> > + *
> > + * For signed integer:
> > + *   x.0_1 = (long unsigned int) x_7(D);
> > + *   y.1_2 = (long unsigned int) y_8(D);
> > + *   _3 = x.0_1 + y.1_2;
> > + *   sum_9 = (int64_t) _3;
> > + *   _4 = x_7(D) ^ y_8(D);
> > + *   _5 = x_7(D) ^ sum_9;
> > + *   _15 = ~_4;
> > + *   _16 = _5 & _15;
> > + *   if (_16 < 0)
> > + *     goto <bb 3>; [41.00%]
> > + *   else
> > + *     goto <bb 4>; [59.00%]
> > + *   _11 = x_7(D) < 0;
> > + *   _12 = (long int) _11;
> > + *   _13 = -_12;
> > + *   _14 = _13 ^ 9223372036854775807;
> > + *   # _6 = PHI <_14(3), sum_9(2)>
> > + *   =>
> > + *   _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
> >
> >  static void
> > -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> > +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >  {
> >    if (gimple_phi_num_args (phi) != 2)
> >      return;
> > @@ -4097,7 +4121,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >    tree ops[2];
> >    tree phi_result = gimple_phi_result (phi);
> >
> > -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> > +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> > +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> >      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> >                                         ops[0], ops[1]);
> >  }
> > @@ -6097,7 +6122,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >      gsi_next (&psi))
> >      {
> >        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> > -      match_unsigned_saturation_add (&gsi, psi.phi ());
> > +      match_saturation_add (&gsi, psi.phi ());
> >        match_unsigned_saturation_sub (&gsi, psi.phi ());
> >      }
> >
> > --
> > 2.43.0
> >
Li, Pan2 Aug. 27, 2024, 11:25 a.m. UTC | #4
> :c is required when you want to match up @0s and they appear in a commutative
> operation and there's no canonicalization rule putting it into one or the other
> position.  In your case you have two commutative operations you want to match
> up, so it should be only necessary to try swapping one of it to get the match,
> it's not required to swap both.  This reduces the number of generated patterns.

Thanks Richard for the explanation. Got the point that the swap on captures for a op will
also effect on other op(s), will update in v4.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Tuesday, August 27, 2024 4:41 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v3] Match: Support form 1 for scalar signed integer .SAT_ADD

On Tue, Aug 27, 2024 at 3:06 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard for comments.
>
> > I think you want to use nop_convert here, for sure a truncation or
> > extension wouldn't be valid?
>
> Oh, yes, should be nop_convert.
>
> > I think you don't need :c on both the inner plus and the bit_xor here?
>
> Sure, could you please help to explain more about when should I need to add :c?
> Liker inner plus/and/or ... etc, sometimes got confused for similar scenarios.

:c is required when you want to match up @0s and they appear in a commutative
operation and there's no canonicalization rule putting it into one or the other
position.  In your case you have two commutative operations you want to match
up, so it should be only necessary to try swapping one of it to get the match,
it's not required to swap both.  This reduces the number of generated patterns.

> > +           integer_zerop)
> > +       (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
>
> > The comment above quotes 'MIN' but that's not present here - that is,
> > the comment quotes a source form while we match what we see on
> > GIMPLE?  I do expect the matching will be quite fragile when not
> > being isolated.
>
> Got it, will update the comments to gimple.
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Monday, August 26, 2024 9:40 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v3] Match: Support form 1 for scalar signed integer .SAT_ADD
>
> On Mon, Aug 26, 2024 at 4:20 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to support the form 1 of the scalar signed
> > integer .SAT_ADD.  Aka below example:
> >
> > Form 1:
> >   #define DEF_SAT_S_ADD_FMT_1(T, UT, MIN, MAX) \
> >   T __attribute__((noinline))                  \
> >   sat_s_add_##T##_fmt_1 (T x, T y)             \
> >   {                                            \
> >     T sum = (UT)x + (UT)y;                     \
> >     return (x ^ y) < 0                         \
> >       ? sum                                    \
> >       : (sum ^ x) >= 0                         \
> >         ? sum                                  \
> >         : x < 0 ? MIN : MAX;                   \
> >   }
> >
> > DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX)
> >
> > We can tell the difference before and after this patch if backend
> > implemented the ssadd<m>3 pattern similar as below.
> >
> > Before this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   int64_t sum;
> >    8   │   long unsigned int x.0_1;
> >    9   │   long unsigned int y.1_2;
> >   10   │   long unsigned int _3;
> >   11   │   long int _4;
> >   12   │   long int _5;
> >   13   │   int64_t _6;
> >   14   │   _Bool _11;
> >   15   │   long int _12;
> >   16   │   long int _13;
> >   17   │   long int _14;
> >   18   │   long int _16;
> >   19   │   long int _17;
> >   20   │
> >   21   │ ;;   basic block 2, loop depth 0
> >   22   │ ;;    pred:       ENTRY
> >   23   │   x.0_1 = (long unsigned int) x_7(D);
> >   24   │   y.1_2 = (long unsigned int) y_8(D);
> >   25   │   _3 = x.0_1 + y.1_2;
> >   26   │   sum_9 = (int64_t) _3;
> >   27   │   _4 = x_7(D) ^ y_8(D);
> >   28   │   _5 = x_7(D) ^ sum_9;
> >   29   │   _17 = ~_4;
> >   30   │   _16 = _5 & _17;
> >   31   │   if (_16 < 0)
> >   32   │     goto <bb 3>; [41.00%]
> >   33   │   else
> >   34   │     goto <bb 4>; [59.00%]
> >   35   │ ;;    succ:       3
> >   36   │ ;;                4
> >   37   │
> >   38   │ ;;   basic block 3, loop depth 0
> >   39   │ ;;    pred:       2
> >   40   │   _11 = x_7(D) < 0;
> >   41   │   _12 = (long int) _11;
> >   42   │   _13 = -_12;
> >   43   │   _14 = _13 ^ 9223372036854775807;
> >   44   │ ;;    succ:       4
> >   45   │
> >   46   │ ;;   basic block 4, loop depth 0
> >   47   │ ;;    pred:       2
> >   48   │ ;;                3
> >   49   │   # _6 = PHI <sum_9(2), _14(3)>
> >   50   │   return _6;
> >   51   │ ;;    succ:       EXIT
> >   52   │
> >   53   │ }
> >
> > After this patch:
> >    4   │ __attribute__((noinline))
> >    5   │ int64_t sat_s_add_int64_t_fmt_1 (int64_t x, int64_t y)
> >    6   │ {
> >    7   │   int64_t _4;
> >    8   │
> >    9   │ ;;   basic block 2, loop depth 0
> >   10   │ ;;    pred:       ENTRY
> >   11   │   _4 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
> >   12   │   return _4;
> >   13   │ ;;    succ:       EXIT
> >   14   │
> >   15   │ }
> >
> > The below test suites are passed for this patch.
> > * The rv64gcv fully regression test.
> > * The x86 bootstrap test.
> > * The x86 fully regression test.
> >
> > gcc/ChangeLog:
> >
> >         * match.pd: Add the matching for signed .SAT_ADD.
> >         * tree-ssa-math-opts.cc (gimple_signed_integer_sat_add): Add new
> >         matching func decl.
> >         (match_unsigned_saturation_add): Try signed .SAT_ADD and rename
> >         to ...
> >         (match_saturation_add): ... here.
> >         (math_opts_dom_walker::after_dom_children): Update the above renamed
> >         func from caller.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/match.pd              | 18 ++++++++++++++++++
> >  gcc/tree-ssa-math-opts.cc | 35 ++++++++++++++++++++++++++++++-----
> >  2 files changed, 48 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 78f1957e8c7..b059e313415 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3192,6 +3192,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >    (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0))))
> >
> > +/* Signed saturation add, case 1:
> > +   T sum = (UT)X + (UT)Y;
> > +   SAT_S_ADD = (X ^ Y) < 0
> > +     ? sum
> > +     : (sum ^ x) >= 0
> > +       ? sum
> > +       : x < 0 ? MIN : MAX;
> > +   T and UT are type pair like T=int8_t, UT=uint8_t.  */
> > +(match (signed_integer_sat_add @0 @1)
> > + (cond^ (lt (bit_and:c (bit_xor:c @0 (convert@2 (plus:c (convert @0)
> > +                                                       (convert @1))))
>
> I think you want to use nop_convert here, for sure a truncation or
> extension wouldn't be valid?
>
> > +                      (bit_not (bit_xor:c @0 @1)))
>
> I think you don't need :c on both the inner plus and the bit_xor here?
>
> > +           integer_zerop)
> > +       (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
>
> The comment above quotes 'MIN' but that's not present here - that is,
> the comment quotes a source form while we match what we see on
> GIMPLE?  I do expect the matching will be quite fragile when not
> being isolated.
>
> How do you select the cases you want to support?
>
> > +       @2)
> > + (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* Unsigned saturation sub, case 1 (branch with gt):
> >     SAT_U_SUB = X > Y ? X - Y : 0  */
> >  (match (unsigned_integer_sat_sub @0 @1)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 8d96a4c964b..3c93fca5b53 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4023,6 +4023,8 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> >  extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
> >
> > +extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
> > +
> >  static void
> >  build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> >                                     tree lhs, tree op_0, tree op_1)
> > @@ -4072,7 +4074,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >  }
> >
> >  /*
> > - * Try to match saturation unsigned add with PHI.
> > + * Try to match saturation add with PHI.
> > + * For unsigned integer:
> >   *   <bb 2> :
> >   *   _1 = x_3(D) + y_4(D);
> >   *   if (_1 >= x_3(D))
> > @@ -4086,10 +4089,31 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >   *   # _2 = PHI <255(2), _1(3)>
> >   *   =>
> >   *   <bb 4> [local count: 1073741824]:
> > - *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> > + *   _2 = .SAT_ADD (x_4(D), y_5(D));
> > + *
> > + * For signed integer:
> > + *   x.0_1 = (long unsigned int) x_7(D);
> > + *   y.1_2 = (long unsigned int) y_8(D);
> > + *   _3 = x.0_1 + y.1_2;
> > + *   sum_9 = (int64_t) _3;
> > + *   _4 = x_7(D) ^ y_8(D);
> > + *   _5 = x_7(D) ^ sum_9;
> > + *   _15 = ~_4;
> > + *   _16 = _5 & _15;
> > + *   if (_16 < 0)
> > + *     goto <bb 3>; [41.00%]
> > + *   else
> > + *     goto <bb 4>; [59.00%]
> > + *   _11 = x_7(D) < 0;
> > + *   _12 = (long int) _11;
> > + *   _13 = -_12;
> > + *   _14 = _13 ^ 9223372036854775807;
> > + *   # _6 = PHI <_14(3), sum_9(2)>
> > + *   =>
> > + *   _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
> >
> >  static void
> > -match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> > +match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >  {
> >    if (gimple_phi_num_args (phi) != 2)
> >      return;
> > @@ -4097,7 +4121,8 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
> >    tree ops[2];
> >    tree phi_result = gimple_phi_result (phi);
> >
> > -  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
> > +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> > +      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
> >      build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
> >                                         ops[0], ops[1]);
> >  }
> > @@ -6097,7 +6122,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >      gsi_next (&psi))
> >      {
> >        gimple_stmt_iterator gsi = gsi_after_labels (bb);
> > -      match_unsigned_saturation_add (&gsi, psi.phi ());
> > +      match_saturation_add (&gsi, psi.phi ());
> >        match_unsigned_saturation_sub (&gsi, psi.phi ());
> >      }
> >
> > --
> > 2.43.0
> >
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 78f1957e8c7..b059e313415 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3192,6 +3192,24 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0))))
 
+/* Signed saturation add, case 1:
+   T sum = (UT)X + (UT)Y;
+   SAT_S_ADD = (X ^ Y) < 0
+     ? sum
+     : (sum ^ x) >= 0
+       ? sum
+       : x < 0 ? MIN : MAX;
+   T and UT are type pair like T=int8_t, UT=uint8_t.  */
+(match (signed_integer_sat_add @0 @1)
+ (cond^ (lt (bit_and:c (bit_xor:c @0 (convert@2 (plus:c (convert @0)
+							(convert @1))))
+		       (bit_not (bit_xor:c @0 @1)))
+	    integer_zerop)
+	(bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+	@2)
+ (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
 /* Unsigned saturation sub, case 1 (branch with gt):
    SAT_U_SUB = X > Y ? X - Y : 0  */
 (match (unsigned_integer_sat_sub @0 @1)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 8d96a4c964b..3c93fca5b53 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4023,6 +4023,8 @@  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
 extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
+extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
+
 static void
 build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
 				    tree lhs, tree op_0, tree op_1)
@@ -4072,7 +4074,8 @@  match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
 }
 
 /*
- * Try to match saturation unsigned add with PHI.
+ * Try to match saturation add with PHI.
+ * For unsigned integer:
  *   <bb 2> :
  *   _1 = x_3(D) + y_4(D);
  *   if (_1 >= x_3(D))
@@ -4086,10 +4089,31 @@  match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
  *   # _2 = PHI <255(2), _1(3)>
  *   =>
  *   <bb 4> [local count: 1073741824]:
- *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
+ *   _2 = .SAT_ADD (x_4(D), y_5(D));
+ *
+ * For signed integer:
+ *   x.0_1 = (long unsigned int) x_7(D);
+ *   y.1_2 = (long unsigned int) y_8(D);
+ *   _3 = x.0_1 + y.1_2;
+ *   sum_9 = (int64_t) _3;
+ *   _4 = x_7(D) ^ y_8(D);
+ *   _5 = x_7(D) ^ sum_9;
+ *   _15 = ~_4;
+ *   _16 = _5 & _15;
+ *   if (_16 < 0)
+ *     goto <bb 3>; [41.00%]
+ *   else
+ *     goto <bb 4>; [59.00%]
+ *   _11 = x_7(D) < 0;
+ *   _12 = (long int) _11;
+ *   _13 = -_12;
+ *   _14 = _13 ^ 9223372036854775807;
+ *   # _6 = PHI <_14(3), sum_9(2)>
+ *   =>
+ *   _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call]  */
 
 static void
-match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
+match_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
 {
   if (gimple_phi_num_args (phi) != 2)
     return;
@@ -4097,7 +4121,8 @@  match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi)
   tree ops[2];
   tree phi_result = gimple_phi_result (phi);
 
-  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL))
+  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+      || gimple_signed_integer_sat_add (phi_result, ops, NULL))
     build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result,
 					ops[0], ops[1]);
 }
@@ -6097,7 +6122,7 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
     gsi_next (&psi))
     {
       gimple_stmt_iterator gsi = gsi_after_labels (bb);
-      match_unsigned_saturation_add (&gsi, psi.phi ());
+      match_saturation_add (&gsi, psi.phi ());
       match_unsigned_saturation_sub (&gsi, psi.phi ());
     }