diff mbox series

[v4] Match: Support more form for scalar unsigned SAT_ADD

Message ID 20240530133703.1304281-1-pan2.li@intel.com
State New
Headers show
Series [v4] Match: Support more form for scalar unsigned SAT_ADD | expand

Commit Message

Li, Pan2 May 30, 2024, 1:37 p.m. UTC
From: Pan Li <pan2.li@intel.com>

After we support one gassign form of the unsigned .SAT_ADD,  we
would like to support more forms including both the branch and
branchless.  There are 5 other forms of .SAT_ADD,  list as below:

Form 1:
  #define SAT_ADD_U_1(T) \
  T sat_add_u_1_##T(T x, T y) \
  { \
    return (T)(x + y) >= x ? (x + y) : -1; \
  }

Form 2:
  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
    T ret; \
    T overflow = __builtin_add_overflow (x, y, &ret); \
    return (T)(-overflow) | ret; \
  }

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
    T ret; \
    return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
  }

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
    T ret; \
    return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
  }

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
    return (T)(x + y) < x ? -1 : (x + y); \
  }

Take the forms 3 of above as example:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
}

Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
    goto <bb 4>; [35.00%]
  else
    goto <bb 3>; [65.00%]
;;    succ:       4
;;                3

;;   basic block 3, loop depth 0
;;    pred:       2
  _1 = REALPART_EXPR <_6>;
;;    succ:       4

;;   basic block 4, loop depth 0
;;    pred:       3
;;                2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;    succ:       EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;    succ:       EXIT
}

The flag '^' acts on cond_expr will generate matching code similar as below:

else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
  {
    basic_block _b1 = gimple_bb (_a1);
    if (gimple_phi_num_args (_a1) == 2)
      {
        basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
        basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
        basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : ...
        basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : ...
        gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
        if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
          && EDGE_COUNT (_other_db_1->succs) == 1
          && EDGE_PRED (_other_db_1, 0)->src == _db_1)
          {
            tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
            tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
            tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, ...);
            bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & EDGE_TRUE_VALUE;
            tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
            tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
            switch (TREE_CODE (_p0))
              ...

The below test suites are still running, will update it later.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

	* doc/match-and-simplify.texi: Add doc for the matching flag '^'.
	* genmatch.cc (enum expr_flag): Add new enum for expr flag.
	(dt_node::gen_kids_1): Add cond_expr and flag handling.
	(dt_operand::gen_phi_on_cond): Add new func to gen phi matching
	on cond_expr.
	(parser::parse_expr): Add handling for the expr flag '^'.
	* match.pd: Add more form for unsigned .SAT_ADD.
	* tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
	(match_assign_saturation_arith): Rename to.
	(match_phi_saturation_arith): Add new func impl to match the
	.SAT_ADD when phi.
	(math_opts_dom_walker::after_dom_children): Add phi matching
	try for all gimple phi stmt.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/doc/match-and-simplify.texi |  14 ++++
 gcc/genmatch.cc                 | 126 +++++++++++++++++++++++++++++++-
 gcc/match.pd                    |  43 ++++++++++-
 gcc/tree-ssa-math-opts.cc       |  51 ++++++++++++-
 4 files changed, 229 insertions(+), 5 deletions(-)

Comments

Richard Biener June 5, 2024, 8:50 a.m. UTC | #1
On Thu, May 30, 2024 at 3:37 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support one gassign form of the unsigned .SAT_ADD,  we
> would like to support more forms including both the branch and
> branchless.  There are 5 other forms of .SAT_ADD,  list as below:
>
> Form 1:
>   #define SAT_ADD_U_1(T) \
>   T sat_add_u_1_##T(T x, T y) \
>   { \
>     return (T)(x + y) >= x ? (x + y) : -1; \
>   }
>
> Form 2:
>   #define SAT_ADD_U_2(T) \
>   T sat_add_u_2_##T(T x, T y) \
>   { \
>     T ret; \
>     T overflow = __builtin_add_overflow (x, y, &ret); \
>     return (T)(-overflow) | ret; \
>   }
>
> Form 3:
>   #define SAT_ADD_U_3(T) \
>   T sat_add_u_3_##T (T x, T y) \
>   { \
>     T ret; \
>     return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
>   }
>
> Form 4:
>   #define SAT_ADD_U_4(T) \
>   T sat_add_u_4_##T (T x, T y) \
>   { \
>     T ret; \
>     return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
>   }
>
> Form 5:
>   #define SAT_ADD_U_5(T) \
>   T sat_add_u_5_##T(T x, T y) \
>   { \
>     return (T)(x + y) < x ? -1 : (x + y); \
>   }
>
> Take the forms 3 of above as example:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
>   uint64_t ret;
>   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Before this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   long unsigned int _2;
>   uint64_t _3;
>   __complex__ long unsigned int _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
>   _2 = IMAGPART_EXPR <_6>;
>   if (_2 != 0)
>     goto <bb 4>; [35.00%]
>   else
>     goto <bb 3>; [65.00%]
> ;;    succ:       4
> ;;                3
>
> ;;   basic block 3, loop depth 0
> ;;    pred:       2
>   _1 = REALPART_EXPR <_6>;
> ;;    succ:       4
>
> ;;   basic block 4, loop depth 0
> ;;    pred:       3
> ;;                2
>   # _3 = PHI <_1(3), 18446744073709551615(2)>
>   return _3;
> ;;    succ:       EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _12;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
>   return _12;
> ;;    succ:       EXIT
> }
>
> The flag '^' acts on cond_expr will generate matching code similar as below:
>
> else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
>   {
>     basic_block _b1 = gimple_bb (_a1);
>     if (gimple_phi_num_args (_a1) == 2)
>       {
>         basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
>         basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
>         basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : ...
>         basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : ...
>         gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
>         if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
>           && EDGE_COUNT (_other_db_1->succs) == 1
>           && EDGE_PRED (_other_db_1, 0)->src == _db_1)
>           {
>             tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
>             tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
>             tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, ...);
>             bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & EDGE_TRUE_VALUE;
>             tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
>             tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
>             switch (TREE_CODE (_p0))
>               ...
>
> The below test suites are still running, will update it later.
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
>         * doc/match-and-simplify.texi: Add doc for the matching flag '^'.
>         * genmatch.cc (enum expr_flag): Add new enum for expr flag.
>         (dt_node::gen_kids_1): Add cond_expr and flag handling.
>         (dt_operand::gen_phi_on_cond): Add new func to gen phi matching
>         on cond_expr.
>         (parser::parse_expr): Add handling for the expr flag '^'.
>         * match.pd: Add more form for unsigned .SAT_ADD.
>         * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
>         (match_assign_saturation_arith): Rename to.
>         (match_phi_saturation_arith): Add new func impl to match the
>         .SAT_ADD when phi.
>         (math_opts_dom_walker::after_dom_children): Add phi matching
>         try for all gimple phi stmt.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/doc/match-and-simplify.texi |  14 ++++
>  gcc/genmatch.cc                 | 126 +++++++++++++++++++++++++++++++-
>  gcc/match.pd                    |  43 ++++++++++-
>  gcc/tree-ssa-math-opts.cc       |  51 ++++++++++++-
>  4 files changed, 229 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
> index 01f19e2f62c..fc0cf6d7552 100644
> --- a/gcc/doc/match-and-simplify.texi
> +++ b/gcc/doc/match-and-simplify.texi
> @@ -361,6 +361,20 @@ Usually the types of the generated result expressions are
>  determined from the context, but sometimes like in the above case
>  it is required that you specify them explicitly.
>
> +Another modifier for generated expressions is @code{^} which
> +tells the machinery to try more matches for some special cases.
> +Normally the @code{cond} only allows the gimple assign for matching.
> +It will also try to match the gimple phi when appending the
> +@code{^} to the @code{cond}.  Aka @code{cond^}.
> +
> +@smallexample
> +(match (unsigned_sat_add @0 @1)
> + (cond^ (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep))
> +@end smallexample
> +
> +The above matching will generate @code{gimple_unsigned_sat_add} predicate
> +function that accepts both the gimple assign and gimple phi.
> +
>  Another modifier for generated expressions is @code{!} which
>  tells the machinery to only consider the simplification in case
>  the marked expression simplified to a simple operand.  Consider
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index f1e0e7abe0c..7355c2098cf 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -838,6 +838,11 @@ public:
>    predicate_id *p;
>  };
>
> +enum expr_flag {
> +  EFLAG_NONE,
> +  EFLAG_PHI_MATCH,
> +};
> +
>  /* An operand that constitutes an expression.  Expressions include
>     function calls and user-defined predicate invocations.  */
>
> @@ -848,12 +853,12 @@ public:
>      : operand (OP_EXPR, loc), operation (operation_),
>        ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
>        is_generic (false), force_single_use (false), force_leaf (false),
> -      opt_grp (0) {}
> +      opt_grp (0), eflag (EFLAG_NONE) {}
>    expr (expr *e)
>      : operand (OP_EXPR, e->location), operation (e->operation),
>        ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
>        is_generic (e->is_generic), force_single_use (e->force_single_use),
> -      force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
> +      force_leaf (e->force_leaf), opt_grp (e->opt_grp), eflag (e->eflag) {}
>    void append_op (operand *op) { ops.safe_push (op); }
>    /* The operator and its operands.  */
>    id_base *operation;
> @@ -873,6 +878,9 @@ public:
>    bool force_leaf;
>    /* If non-zero, the group for optional handling.  */
>    unsigned char opt_grp;
> +  /* Indicate the flag of the expr,  default is none.  */
> +  enum expr_flag eflag;

please simply add to the set of 'bool' flags above, like

      bool match_phi;

> +
>    void gen_transform (FILE *f, int, const char *, bool, int,
>                       const char *, capture_info *,
>                       dt_operand ** = 0, int = 0) override;
> @@ -1819,6 +1827,7 @@ public:
>
>    char *get_name (char *);
>    void gen_opname (char *, unsigned);
> +  void gen_phi_on_cond (FILE *, int, int);
>  };
>
>  /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
> @@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>                           depth);
>           indent += 4;
>           fprintf_indent (f, indent, "{\n");
> +         bool cond_expr_p = false, expr_flag_p = false;
>           for (unsigned i = 0; i < exprs_len; ++i)
>             {
>               expr *e = as_a <expr *> (gimple_exprs[i]->op);
> @@ -3262,6 +3272,8 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>               else
>                 {
>                   id_base *op = e->operation;
> +                 cond_expr_p |= *op == COND_EXPR;
> +                 expr_flag_p |= e->eflag != EFLAG_NONE;

So this would be

                     cond_expr_p |= (*op == COND_EXPR && e->match_phi);

>                   if (*op == CONVERT_EXPR || *op == NOP_EXPR)
>                     fprintf_indent (f, indent, "CASE_CONVERT:\n");
>                   else
> @@ -3275,6 +3287,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>           fprintf_indent (f, indent, "default:;\n");
>           fprintf_indent (f, indent, "}\n");
>           indent -= 4;
> +
> +         if (cond_expr_p && expr_flag_p)
> +           {
> +             fprintf_indent (f, indent,
> +               "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
> +               depth, depth);
> +             indent += 2;
> +             fprintf_indent (f, indent, "{\n");
> +             indent += 2;
> +
> +             for (unsigned i = 0; i < exprs_len; i++)
> +               {
> +                 expr *e = as_a <expr *> (gimple_exprs[i]->op);
> +                 if (*e->operation == COND_EXPR && e->eflag == EFLAG_PHI_MATCH)

Here you have that correct.

> +                   gimple_exprs[i]->gen_phi_on_cond (f, indent, depth);
> +               }
> +
> +             indent -= 2;
> +             fprintf_indent (f, indent, "}\n");
> +             indent -= 2;
> +           }
>         }
>
>        if (fns_len)
> @@ -3483,6 +3516,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
>      }
>  }
>
> +/* Generate matching code for the phi when meet COND_EXPR.  */
> +
> +void
> +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
> +{
> +  fprintf_indent (f, indent,
> +    "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
> +
> +  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
> +
> +  indent += 2;
> +  fprintf_indent (f, indent, "{\n");
> +  indent += 2;
> +
> +  fprintf_indent (f, indent,
> +    "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
> +    "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
> +    "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
> +    depth, depth, depth, depth);
> +
> +  fprintf_indent (f, indent,
> +    "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
> +    depth, depth);
> +  fprintf_indent (f, indent, "if (_ct_%d"
> +    " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
> +  fprintf_indent (f, indent,
> +    "  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
> +
> +  indent += 2;
> +  fprintf_indent (f, indent, "{\n");
> +  indent += 2;
> +
> +  fprintf_indent (f, indent,
> +    "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
> +
> +  char opname_0[20];
> +  char opname_1[20];
> +  char opname_2[20];
> +  gen_opname (opname_0, 0);
> +
> +  fprintf_indent (f, indent,
> +    "tree %s = build2 (gimple_cond_code (_ct_%d), "
> +    "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
> +    opname_0, depth, depth, depth);
> +
> +  fprintf_indent (f, indent,
> +    "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
> +    " & EDGE_TRUE_VALUE;\n", depth, depth);
> +
> +  gen_opname (opname_1, 1);
> +  fprintf_indent (f, indent,
> +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
> +    opname_1, depth, depth);
> +
> +  gen_opname (opname_2, 2);
> +  fprintf_indent (f, indent,
> +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
> +    opname_2, depth, depth);
> +
> +  gen_kids (f, indent, true, depth);
> +
> +  indent -= 2;
> +  fprintf_indent (f, indent, "}\n");
> +  indent -= 2;
> +
> +  indent -= 2;
> +  fprintf_indent (f, indent, "}\n");
> +  indent -= 2;
> +}
> +
>  /* Emit a logging call to the debug file to the file F, with the INDENT from
>     either the RESULT location or the S's match location if RESULT is null. */
>  static void
> @@ -4600,6 +4713,15 @@ parser::parse_expr ()
>        e->force_leaf = true;
>      }
>
> +  if (parsing_match_operand && token->type == CPP_XOR
> +    && !(token->flags & PREV_WHITE))
> +    {
> +      eat_token (CPP_XOR);

Can you please diagnose ^ on non-match or non-COND_EXPRs?

You need to amend the cmp_operand function to match up the ->match_phi
flag similar to the is_generic handling.

Otherwise the patch look good, I didn't double-check the generated
PHI matching code again though.

Thanks,
Richard.

> +
> +      if (*e->operation == COND_EXPR)
> +       e->eflag = EFLAG_PHI_MATCH;
> +    }
> +
>    if (token->type == CPP_COLON
>        && !(token->flags & PREV_WHITE))
>      {
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 480e36bbbaf..e3b5b8d3810 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
>
>  /* Unsigned Saturation Add */
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -((X + Y) < X)  */
>  (match (usadd_left_part_1 @0 @1)
>   (plus:c @0 @1)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (usadd_left_part_2 @0 @1)
>   (realpart (IFN_ADD_OVERFLOW:c @0 @1))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -((type)(X + Y) < X)  */
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (lt (plus:c @0 @1) @0)))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -(X > (X + Y))  */
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (gt @0 (plus:c @0 @1))))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (usadd_right_part_2 @0 @1)
>   (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
> +(match (usadd_right_part_2 @0 @1)
> + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
>     because the sub part of left_part_2 cannot work with right_part_1.
>     For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
>
> -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +/* Unsigned saturation add, case 3 (branch with ge):
> +   SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
> +
> +/* Unsigned saturation add, case 4 (branch with lt):
> +   SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> +
> +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> +  (usadd_left_part_2 @0 @1) integer_minus_onep))
> +
> +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
> +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> +  integer_minus_onep (usadd_left_part_2 @0 @1)))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 62da1c5ee08..8624d414347 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>   *      =>
>   *      _12 = .SAT_ADD (_4, _6);  */
>  static void
> -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
>  {
>    gcall *call = NULL;
>
> @@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
>      }
>  }
>
> +/*
> + * Try to match saturation arith pattern(s).
> + * From:
> + *   <bb 2> :
> + *   _1 = x_3(D) + y_4(D);
> + *   if (_1 >= x_3(D))
> + *     goto <bb 3>; [INV]
> + *   else
> + *     goto <bb 4>; [INV]
> + *
> + *   <bb 3> :
> + *
> + *   <bb 4> :
> + *   # _2 = PHI <255(2), _1(3)>
> + *
> + * To:
> + *   <bb 4> [local count: 1073741824]:
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> +
> +static void
> +match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> +  if (gimple_phi_num_args (phi) != 2)
> +    return;
> +
> +  tree ops[2];
> +  tree phi_result = gimple_phi_result (phi);
> +
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
> +                                        OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> +      gimple_call_set_lhs (call, phi_result);
> +      gsi_insert_before (gsi, call, GSI_SAME_STMT);
> +
> +      gimple_stmt_iterator psi = gsi_for_stmt (phi);
> +      remove_phi_node (&psi, false);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
>    fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
>
> +  for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> +    {
> +      gimple_stmt_iterator gsi = gsi_last_bb (bb);
> +      match_phi_saturation_arith (&gsi, psi.phi ());
> +    }
> +
>    for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
>      {
>        gimple *stmt = gsi_stmt (gsi);
> @@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               break;
>
>             case BIT_IOR_EXPR:
> -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +             match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> --
> 2.34.1
>
Li, Pan2 June 5, 2024, 1:44 p.m. UTC | #2
Thanks Richard for comments, will address the comments in v7, and looks like I also need to resolve conflict up to a point.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Wednesday, June 5, 2024 4:50 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD

On Thu, May 30, 2024 at 3:37 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support one gassign form of the unsigned .SAT_ADD,  we
> would like to support more forms including both the branch and
> branchless.  There are 5 other forms of .SAT_ADD,  list as below:
>
> Form 1:
>   #define SAT_ADD_U_1(T) \
>   T sat_add_u_1_##T(T x, T y) \
>   { \
>     return (T)(x + y) >= x ? (x + y) : -1; \
>   }
>
> Form 2:
>   #define SAT_ADD_U_2(T) \
>   T sat_add_u_2_##T(T x, T y) \
>   { \
>     T ret; \
>     T overflow = __builtin_add_overflow (x, y, &ret); \
>     return (T)(-overflow) | ret; \
>   }
>
> Form 3:
>   #define SAT_ADD_U_3(T) \
>   T sat_add_u_3_##T (T x, T y) \
>   { \
>     T ret; \
>     return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
>   }
>
> Form 4:
>   #define SAT_ADD_U_4(T) \
>   T sat_add_u_4_##T (T x, T y) \
>   { \
>     T ret; \
>     return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
>   }
>
> Form 5:
>   #define SAT_ADD_U_5(T) \
>   T sat_add_u_5_##T(T x, T y) \
>   { \
>     return (T)(x + y) < x ? -1 : (x + y); \
>   }
>
> Take the forms 3 of above as example:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
>   uint64_t ret;
>   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Before this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   long unsigned int _2;
>   uint64_t _3;
>   __complex__ long unsigned int _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
>   _2 = IMAGPART_EXPR <_6>;
>   if (_2 != 0)
>     goto <bb 4>; [35.00%]
>   else
>     goto <bb 3>; [65.00%]
> ;;    succ:       4
> ;;                3
>
> ;;   basic block 3, loop depth 0
> ;;    pred:       2
>   _1 = REALPART_EXPR <_6>;
> ;;    succ:       4
>
> ;;   basic block 4, loop depth 0
> ;;    pred:       3
> ;;                2
>   # _3 = PHI <_1(3), 18446744073709551615(2)>
>   return _3;
> ;;    succ:       EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _12;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
>   return _12;
> ;;    succ:       EXIT
> }
>
> The flag '^' acts on cond_expr will generate matching code similar as below:
>
> else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
>   {
>     basic_block _b1 = gimple_bb (_a1);
>     if (gimple_phi_num_args (_a1) == 2)
>       {
>         basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
>         basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
>         basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : ...
>         basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : ...
>         gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
>         if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
>           && EDGE_COUNT (_other_db_1->succs) == 1
>           && EDGE_PRED (_other_db_1, 0)->src == _db_1)
>           {
>             tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
>             tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
>             tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, ...);
>             bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & EDGE_TRUE_VALUE;
>             tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
>             tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
>             switch (TREE_CODE (_p0))
>               ...
>
> The below test suites are still running, will update it later.
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
>         * doc/match-and-simplify.texi: Add doc for the matching flag '^'.
>         * genmatch.cc (enum expr_flag): Add new enum for expr flag.
>         (dt_node::gen_kids_1): Add cond_expr and flag handling.
>         (dt_operand::gen_phi_on_cond): Add new func to gen phi matching
>         on cond_expr.
>         (parser::parse_expr): Add handling for the expr flag '^'.
>         * match.pd: Add more form for unsigned .SAT_ADD.
>         * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
>         (match_assign_saturation_arith): Rename to.
>         (match_phi_saturation_arith): Add new func impl to match the
>         .SAT_ADD when phi.
>         (math_opts_dom_walker::after_dom_children): Add phi matching
>         try for all gimple phi stmt.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/doc/match-and-simplify.texi |  14 ++++
>  gcc/genmatch.cc                 | 126 +++++++++++++++++++++++++++++++-
>  gcc/match.pd                    |  43 ++++++++++-
>  gcc/tree-ssa-math-opts.cc       |  51 ++++++++++++-
>  4 files changed, 229 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
> index 01f19e2f62c..fc0cf6d7552 100644
> --- a/gcc/doc/match-and-simplify.texi
> +++ b/gcc/doc/match-and-simplify.texi
> @@ -361,6 +361,20 @@ Usually the types of the generated result expressions are
>  determined from the context, but sometimes like in the above case
>  it is required that you specify them explicitly.
>
> +Another modifier for generated expressions is @code{^} which
> +tells the machinery to try more matches for some special cases.
> +Normally the @code{cond} only allows the gimple assign for matching.
> +It will also try to match the gimple phi when appending the
> +@code{^} to the @code{cond}.  Aka @code{cond^}.
> +
> +@smallexample
> +(match (unsigned_sat_add @0 @1)
> + (cond^ (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep))
> +@end smallexample
> +
> +The above matching will generate @code{gimple_unsigned_sat_add} predicate
> +function that accepts both the gimple assign and gimple phi.
> +
>  Another modifier for generated expressions is @code{!} which
>  tells the machinery to only consider the simplification in case
>  the marked expression simplified to a simple operand.  Consider
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index f1e0e7abe0c..7355c2098cf 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -838,6 +838,11 @@ public:
>    predicate_id *p;
>  };
>
> +enum expr_flag {
> +  EFLAG_NONE,
> +  EFLAG_PHI_MATCH,
> +};
> +
>  /* An operand that constitutes an expression.  Expressions include
>     function calls and user-defined predicate invocations.  */
>
> @@ -848,12 +853,12 @@ public:
>      : operand (OP_EXPR, loc), operation (operation_),
>        ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
>        is_generic (false), force_single_use (false), force_leaf (false),
> -      opt_grp (0) {}
> +      opt_grp (0), eflag (EFLAG_NONE) {}
>    expr (expr *e)
>      : operand (OP_EXPR, e->location), operation (e->operation),
>        ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
>        is_generic (e->is_generic), force_single_use (e->force_single_use),
> -      force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
> +      force_leaf (e->force_leaf), opt_grp (e->opt_grp), eflag (e->eflag) {}
>    void append_op (operand *op) { ops.safe_push (op); }
>    /* The operator and its operands.  */
>    id_base *operation;
> @@ -873,6 +878,9 @@ public:
>    bool force_leaf;
>    /* If non-zero, the group for optional handling.  */
>    unsigned char opt_grp;
> +  /* Indicate the flag of the expr,  default is none.  */
> +  enum expr_flag eflag;

please simply add to the set of 'bool' flags above, like

      bool match_phi;

> +
>    void gen_transform (FILE *f, int, const char *, bool, int,
>                       const char *, capture_info *,
>                       dt_operand ** = 0, int = 0) override;
> @@ -1819,6 +1827,7 @@ public:
>
>    char *get_name (char *);
>    void gen_opname (char *, unsigned);
> +  void gen_phi_on_cond (FILE *, int, int);
>  };
>
>  /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
> @@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>                           depth);
>           indent += 4;
>           fprintf_indent (f, indent, "{\n");
> +         bool cond_expr_p = false, expr_flag_p = false;
>           for (unsigned i = 0; i < exprs_len; ++i)
>             {
>               expr *e = as_a <expr *> (gimple_exprs[i]->op);
> @@ -3262,6 +3272,8 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>               else
>                 {
>                   id_base *op = e->operation;
> +                 cond_expr_p |= *op == COND_EXPR;
> +                 expr_flag_p |= e->eflag != EFLAG_NONE;

So this would be

                     cond_expr_p |= (*op == COND_EXPR && e->match_phi);

>                   if (*op == CONVERT_EXPR || *op == NOP_EXPR)
>                     fprintf_indent (f, indent, "CASE_CONVERT:\n");
>                   else
> @@ -3275,6 +3287,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>           fprintf_indent (f, indent, "default:;\n");
>           fprintf_indent (f, indent, "}\n");
>           indent -= 4;
> +
> +         if (cond_expr_p && expr_flag_p)
> +           {
> +             fprintf_indent (f, indent,
> +               "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
> +               depth, depth);
> +             indent += 2;
> +             fprintf_indent (f, indent, "{\n");
> +             indent += 2;
> +
> +             for (unsigned i = 0; i < exprs_len; i++)
> +               {
> +                 expr *e = as_a <expr *> (gimple_exprs[i]->op);
> +                 if (*e->operation == COND_EXPR && e->eflag == EFLAG_PHI_MATCH)

Here you have that correct.

> +                   gimple_exprs[i]->gen_phi_on_cond (f, indent, depth);
> +               }
> +
> +             indent -= 2;
> +             fprintf_indent (f, indent, "}\n");
> +             indent -= 2;
> +           }
>         }
>
>        if (fns_len)
> @@ -3483,6 +3516,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
>      }
>  }
>
> +/* Generate matching code for the phi when meet COND_EXPR.  */
> +
> +void
> +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
> +{
> +  fprintf_indent (f, indent,
> +    "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
> +
> +  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
> +
> +  indent += 2;
> +  fprintf_indent (f, indent, "{\n");
> +  indent += 2;
> +
> +  fprintf_indent (f, indent,
> +    "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
> +    "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
> +    "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
> +    depth, depth, depth, depth);
> +
> +  fprintf_indent (f, indent,
> +    "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
> +    depth, depth);
> +  fprintf_indent (f, indent, "if (_ct_%d"
> +    " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
> +  fprintf_indent (f, indent,
> +    "  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
> +
> +  indent += 2;
> +  fprintf_indent (f, indent, "{\n");
> +  indent += 2;
> +
> +  fprintf_indent (f, indent,
> +    "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
> +
> +  char opname_0[20];
> +  char opname_1[20];
> +  char opname_2[20];
> +  gen_opname (opname_0, 0);
> +
> +  fprintf_indent (f, indent,
> +    "tree %s = build2 (gimple_cond_code (_ct_%d), "
> +    "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
> +    opname_0, depth, depth, depth);
> +
> +  fprintf_indent (f, indent,
> +    "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
> +    " & EDGE_TRUE_VALUE;\n", depth, depth);
> +
> +  gen_opname (opname_1, 1);
> +  fprintf_indent (f, indent,
> +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
> +    opname_1, depth, depth);
> +
> +  gen_opname (opname_2, 2);
> +  fprintf_indent (f, indent,
> +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
> +    opname_2, depth, depth);
> +
> +  gen_kids (f, indent, true, depth);
> +
> +  indent -= 2;
> +  fprintf_indent (f, indent, "}\n");
> +  indent -= 2;
> +
> +  indent -= 2;
> +  fprintf_indent (f, indent, "}\n");
> +  indent -= 2;
> +}
> +
>  /* Emit a logging call to the debug file to the file F, with the INDENT from
>     either the RESULT location or the S's match location if RESULT is null. */
>  static void
> @@ -4600,6 +4713,15 @@ parser::parse_expr ()
>        e->force_leaf = true;
>      }
>
> +  if (parsing_match_operand && token->type == CPP_XOR
> +    && !(token->flags & PREV_WHITE))
> +    {
> +      eat_token (CPP_XOR);

Can you please diagnose ^ on non-match or non-COND_EXPRs?

You need to amend the cmp_operand function to match up the ->match_phi
flag similar to the is_generic handling.

Otherwise the patch look good, I didn't double-check the generated
PHI matching code again though.

Thanks,
Richard.

> +
> +      if (*e->operation == COND_EXPR)
> +       e->eflag = EFLAG_PHI_MATCH;
> +    }
> +
>    if (token->type == CPP_COLON
>        && !(token->flags & PREV_WHITE))
>      {
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 480e36bbbaf..e3b5b8d3810 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
>
>  /* Unsigned Saturation Add */
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -((X + Y) < X)  */
>  (match (usadd_left_part_1 @0 @1)
>   (plus:c @0 @1)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (usadd_left_part_2 @0 @1)
>   (realpart (IFN_ADD_OVERFLOW:c @0 @1))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -((type)(X + Y) < X)  */
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (lt (plus:c @0 @1) @0)))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -(X > (X + Y))  */
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (gt @0 (plus:c @0 @1))))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (usadd_right_part_2 @0 @1)
>   (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
> +(match (usadd_right_part_2 @0 @1)
> + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
>     because the sub part of left_part_2 cannot work with right_part_1.
>     For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
>
> -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +/* Unsigned saturation add, case 3 (branch with ge):
> +   SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
> +
> +/* Unsigned saturation add, case 4 (branch with lt):
> +   SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> +
> +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> +  (usadd_left_part_2 @0 @1) integer_minus_onep))
> +
> +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
> +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> +  integer_minus_onep (usadd_left_part_2 @0 @1)))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 62da1c5ee08..8624d414347 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>   *      =>
>   *      _12 = .SAT_ADD (_4, _6);  */
>  static void
> -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
>  {
>    gcall *call = NULL;
>
> @@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
>      }
>  }
>
> +/*
> + * Try to match saturation arith pattern(s).
> + * From:
> + *   <bb 2> :
> + *   _1 = x_3(D) + y_4(D);
> + *   if (_1 >= x_3(D))
> + *     goto <bb 3>; [INV]
> + *   else
> + *     goto <bb 4>; [INV]
> + *
> + *   <bb 3> :
> + *
> + *   <bb 4> :
> + *   # _2 = PHI <255(2), _1(3)>
> + *
> + * To:
> + *   <bb 4> [local count: 1073741824]:
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> +
> +static void
> +match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> +  if (gimple_phi_num_args (phi) != 2)
> +    return;
> +
> +  tree ops[2];
> +  tree phi_result = gimple_phi_result (phi);
> +
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
> +                                        OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> +      gimple_call_set_lhs (call, phi_result);
> +      gsi_insert_before (gsi, call, GSI_SAME_STMT);
> +
> +      gimple_stmt_iterator psi = gsi_for_stmt (phi);
> +      remove_phi_node (&psi, false);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
>    fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
>
> +  for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> +    {
> +      gimple_stmt_iterator gsi = gsi_last_bb (bb);
> +      match_phi_saturation_arith (&gsi, psi.phi ());
> +    }
> +
>    for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
>      {
>        gimple *stmt = gsi_stmt (gsi);
> @@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               break;
>
>             case BIT_IOR_EXPR:
> -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +             match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> --
> 2.34.1
>
Li, Pan2 June 6, 2024, 1:19 a.m. UTC | #3
Hi Richard,

After revisited all the comments of the mail thread, I would like to confirm if my understanding is correct according to the generated match code.
For now the generated code looks like below:

else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
  {
    basic_block _b1 = gimple_bb (_a1);
    if (gimple_phi_num_args (_a1) == 2)
      {
        basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
        basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
        basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1;
        basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1;
        gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
        if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
          && EDGE_COUNT (_other_db_1->succs) == 1
          && EDGE_PRED (_other_db_1, 0)->src == _db_1)
          {
            tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
            tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
            tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1);
            bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE;
            tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
            tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
            ....

The flow may look like below, or can only handling flow like below.

+------+
| cond |-------+
+------+       v
   |        +-------+
   |        | other |
   |        +-------+
   v           |
+-----+        |
| PHI | <------+ 
+-----+

Thus, I think it cannot handle the below 2 PHI flows (or even more complicated shapes)

+------+
| cond |-------+
+------+       |
   |           |
   v           |
+------+       |
| mid  |       v
+------+    +-------+
   |        | other |
   |        +-------+
   v           |
+-----+        |
| PHI | <------+ 
+-----+

+------+
| cond |-----------+
+------+           |
   |               v
   |            +-------+
   |            | mid-0 |--------+
   |            +-------+        |
   |               |             v
   |               |           +-------+
   |               |           | mid-1 |
   |               v           +-------+
   |            +-------+        |
   |            | other |<-------+
   |            +-------+        
   v               |
+-----+            |
| PHI | <----------+ 
+-----+

So I am not very sure if we need (or reasonable) to take care of all the PHI gimple flows (may impossible ?) Or keep the simplest one for now and add more case by case.
Thanks a lot.

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Wednesday, June 5, 2024 9:44 PM
To: Richard Biener <richard.guenther@gmail.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: RE: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD

Thanks Richard for comments, will address the comments in v7, and looks like I also need to resolve conflict up to a point.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Wednesday, June 5, 2024 4:50 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD

On Thu, May 30, 2024 at 3:37 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> After we support one gassign form of the unsigned .SAT_ADD,  we
> would like to support more forms including both the branch and
> branchless.  There are 5 other forms of .SAT_ADD,  list as below:
>
> Form 1:
>   #define SAT_ADD_U_1(T) \
>   T sat_add_u_1_##T(T x, T y) \
>   { \
>     return (T)(x + y) >= x ? (x + y) : -1; \
>   }
>
> Form 2:
>   #define SAT_ADD_U_2(T) \
>   T sat_add_u_2_##T(T x, T y) \
>   { \
>     T ret; \
>     T overflow = __builtin_add_overflow (x, y, &ret); \
>     return (T)(-overflow) | ret; \
>   }
>
> Form 3:
>   #define SAT_ADD_U_3(T) \
>   T sat_add_u_3_##T (T x, T y) \
>   { \
>     T ret; \
>     return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
>   }
>
> Form 4:
>   #define SAT_ADD_U_4(T) \
>   T sat_add_u_4_##T (T x, T y) \
>   { \
>     T ret; \
>     return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
>   }
>
> Form 5:
>   #define SAT_ADD_U_5(T) \
>   T sat_add_u_5_##T(T x, T y) \
>   { \
>     return (T)(x + y) < x ? -1 : (x + y); \
>   }
>
> Take the forms 3 of above as example:
>
> uint64_t
> sat_add (uint64_t x, uint64_t y)
> {
>   uint64_t ret;
>   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> }
>
> Before this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   long unsigned int _2;
>   uint64_t _3;
>   __complex__ long unsigned int _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
>   _2 = IMAGPART_EXPR <_6>;
>   if (_2 != 0)
>     goto <bb 4>; [35.00%]
>   else
>     goto <bb 3>; [65.00%]
> ;;    succ:       4
> ;;                3
>
> ;;   basic block 3, loop depth 0
> ;;    pred:       2
>   _1 = REALPART_EXPR <_6>;
> ;;    succ:       4
>
> ;;   basic block 4, loop depth 0
> ;;    pred:       3
> ;;                2
>   # _3 = PHI <_1(3), 18446744073709551615(2)>
>   return _3;
> ;;    succ:       EXIT
> }
>
> After this patch:
> uint64_t sat_add (uint64_t x, uint64_t y)
> {
>   long unsigned int _12;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
>   return _12;
> ;;    succ:       EXIT
> }
>
> The flag '^' acts on cond_expr will generate matching code similar as below:
>
> else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
>   {
>     basic_block _b1 = gimple_bb (_a1);
>     if (gimple_phi_num_args (_a1) == 2)
>       {
>         basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
>         basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
>         basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : ...
>         basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : ...
>         gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
>         if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
>           && EDGE_COUNT (_other_db_1->succs) == 1
>           && EDGE_PRED (_other_db_1, 0)->src == _db_1)
>           {
>             tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
>             tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
>             tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, ...);
>             bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & EDGE_TRUE_VALUE;
>             tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
>             tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
>             switch (TREE_CODE (_p0))
>               ...
>
> The below test suites are still running, will update it later.
> * The x86 bootstrap test.
> * The x86 fully regression test.
> * The riscv fully regression test.
>
> gcc/ChangeLog:
>
>         * doc/match-and-simplify.texi: Add doc for the matching flag '^'.
>         * genmatch.cc (enum expr_flag): Add new enum for expr flag.
>         (dt_node::gen_kids_1): Add cond_expr and flag handling.
>         (dt_operand::gen_phi_on_cond): Add new func to gen phi matching
>         on cond_expr.
>         (parser::parse_expr): Add handling for the expr flag '^'.
>         * match.pd: Add more form for unsigned .SAT_ADD.
>         * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
>         (match_assign_saturation_arith): Rename to.
>         (match_phi_saturation_arith): Add new func impl to match the
>         .SAT_ADD when phi.
>         (math_opts_dom_walker::after_dom_children): Add phi matching
>         try for all gimple phi stmt.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/doc/match-and-simplify.texi |  14 ++++
>  gcc/genmatch.cc                 | 126 +++++++++++++++++++++++++++++++-
>  gcc/match.pd                    |  43 ++++++++++-
>  gcc/tree-ssa-math-opts.cc       |  51 ++++++++++++-
>  4 files changed, 229 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
> index 01f19e2f62c..fc0cf6d7552 100644
> --- a/gcc/doc/match-and-simplify.texi
> +++ b/gcc/doc/match-and-simplify.texi
> @@ -361,6 +361,20 @@ Usually the types of the generated result expressions are
>  determined from the context, but sometimes like in the above case
>  it is required that you specify them explicitly.
>
> +Another modifier for generated expressions is @code{^} which
> +tells the machinery to try more matches for some special cases.
> +Normally the @code{cond} only allows the gimple assign for matching.
> +It will also try to match the gimple phi when appending the
> +@code{^} to the @code{cond}.  Aka @code{cond^}.
> +
> +@smallexample
> +(match (unsigned_sat_add @0 @1)
> + (cond^ (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep))
> +@end smallexample
> +
> +The above matching will generate @code{gimple_unsigned_sat_add} predicate
> +function that accepts both the gimple assign and gimple phi.
> +
>  Another modifier for generated expressions is @code{!} which
>  tells the machinery to only consider the simplification in case
>  the marked expression simplified to a simple operand.  Consider
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index f1e0e7abe0c..7355c2098cf 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -838,6 +838,11 @@ public:
>    predicate_id *p;
>  };
>
> +enum expr_flag {
> +  EFLAG_NONE,
> +  EFLAG_PHI_MATCH,
> +};
> +
>  /* An operand that constitutes an expression.  Expressions include
>     function calls and user-defined predicate invocations.  */
>
> @@ -848,12 +853,12 @@ public:
>      : operand (OP_EXPR, loc), operation (operation_),
>        ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
>        is_generic (false), force_single_use (false), force_leaf (false),
> -      opt_grp (0) {}
> +      opt_grp (0), eflag (EFLAG_NONE) {}
>    expr (expr *e)
>      : operand (OP_EXPR, e->location), operation (e->operation),
>        ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
>        is_generic (e->is_generic), force_single_use (e->force_single_use),
> -      force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
> +      force_leaf (e->force_leaf), opt_grp (e->opt_grp), eflag (e->eflag) {}
>    void append_op (operand *op) { ops.safe_push (op); }
>    /* The operator and its operands.  */
>    id_base *operation;
> @@ -873,6 +878,9 @@ public:
>    bool force_leaf;
>    /* If non-zero, the group for optional handling.  */
>    unsigned char opt_grp;
> +  /* Indicate the flag of the expr,  default is none.  */
> +  enum expr_flag eflag;

please simply add to the set of 'bool' flags above, like

      bool match_phi;

> +
>    void gen_transform (FILE *f, int, const char *, bool, int,
>                       const char *, capture_info *,
>                       dt_operand ** = 0, int = 0) override;
> @@ -1819,6 +1827,7 @@ public:
>
>    char *get_name (char *);
>    void gen_opname (char *, unsigned);
> +  void gen_phi_on_cond (FILE *, int, int);
>  };
>
>  /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
> @@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>                           depth);
>           indent += 4;
>           fprintf_indent (f, indent, "{\n");
> +         bool cond_expr_p = false, expr_flag_p = false;
>           for (unsigned i = 0; i < exprs_len; ++i)
>             {
>               expr *e = as_a <expr *> (gimple_exprs[i]->op);
> @@ -3262,6 +3272,8 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>               else
>                 {
>                   id_base *op = e->operation;
> +                 cond_expr_p |= *op == COND_EXPR;
> +                 expr_flag_p |= e->eflag != EFLAG_NONE;

So this would be

                     cond_expr_p |= (*op == COND_EXPR && e->match_phi);

>                   if (*op == CONVERT_EXPR || *op == NOP_EXPR)
>                     fprintf_indent (f, indent, "CASE_CONVERT:\n");
>                   else
> @@ -3275,6 +3287,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
>           fprintf_indent (f, indent, "default:;\n");
>           fprintf_indent (f, indent, "}\n");
>           indent -= 4;
> +
> +         if (cond_expr_p && expr_flag_p)
> +           {
> +             fprintf_indent (f, indent,
> +               "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
> +               depth, depth);
> +             indent += 2;
> +             fprintf_indent (f, indent, "{\n");
> +             indent += 2;
> +
> +             for (unsigned i = 0; i < exprs_len; i++)
> +               {
> +                 expr *e = as_a <expr *> (gimple_exprs[i]->op);
> +                 if (*e->operation == COND_EXPR && e->eflag == EFLAG_PHI_MATCH)

Here you have that correct.

> +                   gimple_exprs[i]->gen_phi_on_cond (f, indent, depth);
> +               }
> +
> +             indent -= 2;
> +             fprintf_indent (f, indent, "}\n");
> +             indent -= 2;
> +           }
>         }
>
>        if (fns_len)
> @@ -3483,6 +3516,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
>      }
>  }
>
> +/* Generate matching code for the phi when meet COND_EXPR.  */
> +
> +void
> +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
> +{
> +  fprintf_indent (f, indent,
> +    "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
> +
> +  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
> +
> +  indent += 2;
> +  fprintf_indent (f, indent, "{\n");
> +  indent += 2;
> +
> +  fprintf_indent (f, indent,
> +    "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
> +    "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
> +  fprintf_indent (f, indent,
> +    "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
> +    "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
> +    depth, depth, depth, depth);
> +
> +  fprintf_indent (f, indent,
> +    "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
> +    depth, depth);
> +  fprintf_indent (f, indent, "if (_ct_%d"
> +    " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
> +  fprintf_indent (f, indent,
> +    "  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
> +
> +  indent += 2;
> +  fprintf_indent (f, indent, "{\n");
> +  indent += 2;
> +
> +  fprintf_indent (f, indent,
> +    "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
> +  fprintf_indent (f, indent,
> +    "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
> +
> +  char opname_0[20];
> +  char opname_1[20];
> +  char opname_2[20];
> +  gen_opname (opname_0, 0);
> +
> +  fprintf_indent (f, indent,
> +    "tree %s = build2 (gimple_cond_code (_ct_%d), "
> +    "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
> +    opname_0, depth, depth, depth);
> +
> +  fprintf_indent (f, indent,
> +    "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
> +    " & EDGE_TRUE_VALUE;\n", depth, depth);
> +
> +  gen_opname (opname_1, 1);
> +  fprintf_indent (f, indent,
> +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
> +    opname_1, depth, depth);
> +
> +  gen_opname (opname_2, 2);
> +  fprintf_indent (f, indent,
> +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
> +    opname_2, depth, depth);
> +
> +  gen_kids (f, indent, true, depth);
> +
> +  indent -= 2;
> +  fprintf_indent (f, indent, "}\n");
> +  indent -= 2;
> +
> +  indent -= 2;
> +  fprintf_indent (f, indent, "}\n");
> +  indent -= 2;
> +}
> +
>  /* Emit a logging call to the debug file to the file F, with the INDENT from
>     either the RESULT location or the S's match location if RESULT is null. */
>  static void
> @@ -4600,6 +4713,15 @@ parser::parse_expr ()
>        e->force_leaf = true;
>      }
>
> +  if (parsing_match_operand && token->type == CPP_XOR
> +    && !(token->flags & PREV_WHITE))
> +    {
> +      eat_token (CPP_XOR);

Can you please diagnose ^ on non-match or non-COND_EXPRs?

You need to amend the cmp_operand function to match up the ->match_phi
flag similar to the is_generic handling.

Otherwise the patch look good, I didn't double-check the generated
PHI matching code again though.

Thanks,
Richard.

> +
> +      if (*e->operation == COND_EXPR)
> +       e->eflag = EFLAG_PHI_MATCH;
> +    }
> +
>    if (token->type == CPP_COLON
>        && !(token->flags & PREV_WHITE))
>      {
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 480e36bbbaf..e3b5b8d3810 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
>
>  /* Unsigned Saturation Add */
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -((X + Y) < X)  */
>  (match (usadd_left_part_1 @0 @1)
>   (plus:c @0 @1)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (usadd_left_part_2 @0 @1)
>   (realpart (IFN_ADD_OVERFLOW:c @0 @1))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -((type)(X + Y) < X)  */
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (lt (plus:c @0 @1) @0)))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> +   SAT_ADD = (X + Y) | -(X > (X + Y))  */
>  (match (usadd_right_part_1 @0 @1)
>   (negate (convert (gt @0 (plus:c @0 @1))))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (usadd_right_part_2 @0 @1)
>   (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
> +(match (usadd_right_part_2 @0 @1)
> + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
>     because the sub part of left_part_2 cannot work with right_part_1.
>     For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
>
> -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
> +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +/* Unsigned saturation add, case 3 (branch with ge):
> +   SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
> +
> +/* Unsigned saturation add, case 4 (branch with lt):
> +   SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> +
> +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> +  (usadd_left_part_2 @0 @1) integer_minus_onep))
> +
> +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
> +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW.  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> +  integer_minus_onep (usadd_left_part_2 @0 @1)))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 62da1c5ee08..8624d414347 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
>   *      =>
>   *      _12 = .SAT_ADD (_4, _6);  */
>  static void
> -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
>  {
>    gcall *call = NULL;
>
> @@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
>      }
>  }
>
> +/*
> + * Try to match saturation arith pattern(s).
> + * From:
> + *   <bb 2> :
> + *   _1 = x_3(D) + y_4(D);
> + *   if (_1 >= x_3(D))
> + *     goto <bb 3>; [INV]
> + *   else
> + *     goto <bb 4>; [INV]
> + *
> + *   <bb 3> :
> + *
> + *   <bb 4> :
> + *   # _2 = PHI <255(2), _1(3)>
> + *
> + * To:
> + *   <bb 4> [local count: 1073741824]:
> + *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> +
> +static void
> +match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
> +{
> +  if (gimple_phi_num_args (phi) != 2)
> +    return;
> +
> +  tree ops[2];
> +  tree phi_result = gimple_phi_result (phi);
> +
> +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
> +                                        OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> +      gimple_call_set_lhs (call, phi_result);
> +      gsi_insert_before (gsi, call, GSI_SAME_STMT);
> +
> +      gimple_stmt_iterator psi = gsi_for_stmt (phi);
> +      remove_phi_node (&psi, false);
> +    }
> +}
> +
>  /* Recognize for unsigned x
>     x = y - z;
>     if (x > y)
> @@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>
>    fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
>
> +  for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> +    {
> +      gimple_stmt_iterator gsi = gsi_last_bb (bb);
> +      match_phi_saturation_arith (&gsi, psi.phi ());
> +    }
> +
>    for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
>      {
>        gimple *stmt = gsi_stmt (gsi);
> @@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               break;
>
>             case BIT_IOR_EXPR:
> -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +             match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> --
> 2.34.1
>
Richard Biener June 6, 2024, 10:46 a.m. UTC | #4
On Thu, Jun 6, 2024 at 3:19 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> After revisited all the comments of the mail thread, I would like to confirm if my understanding is correct according to the generated match code.
> For now the generated code looks like below:
>
> else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
>   {
>     basic_block _b1 = gimple_bb (_a1);
>     if (gimple_phi_num_args (_a1) == 2)
>       {
>         basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
>         basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
>         basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1;
>         basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1;
>         gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
>         if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
>           && EDGE_COUNT (_other_db_1->succs) == 1
>           && EDGE_PRED (_other_db_1, 0)->src == _db_1)
>           {
>             tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
>             tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
>             tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1);
>             bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE;
>             tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
>             tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
>             ....
>
> The flow may look like below, or can only handling flow like below.
>
> +------+
> | cond |-------+
> +------+       v
>    |        +-------+
>    |        | other |
>    |        +-------+
>    v           |
> +-----+        |
> | PHI | <------+
> +-----+
>
> Thus, I think it cannot handle the below 2 PHI flows (or even more complicated shapes)
>
> +------+
> | cond |-------+
> +------+       |
>    |           |
>    v           |
> +------+       |
> | mid  |       v
> +------+    +-------+
>    |        | other |
>    |        +-------+
>    v           |
> +-----+        |
> | PHI | <------+
> +-----+
>
> +------+
> | cond |-----------+
> +------+           |
>    |               v
>    |            +-------+
>    |            | mid-0 |--------+
>    |            +-------+        |
>    |               |             v
>    |               |           +-------+
>    |               |           | mid-1 |
>    |               v           +-------+
>    |            +-------+        |
>    |            | other |<-------+
>    |            +-------+
>    v               |
> +-----+            |
> | PHI | <----------+
> +-----+

Correct.

> So I am not very sure if we need (or reasonable) to take care of all the PHI gimple flows (may impossible ?) Or keep the simplest one for now and add more case by case.
> Thanks a lot.

I'd only keep the simplest one for now.  More complex cases can be
handled easily
with using dominators but those might not always be available or up-to-date when
doing match queries.  So let's revisit when we run into a case where
the simple form
isn't enough.

Richard.

>
> Pan
>
> -----Original Message-----
> From: Li, Pan2
> Sent: Wednesday, June 5, 2024 9:44 PM
> To: Richard Biener <richard.guenther@gmail.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
> Subject: RE: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD
>
> Thanks Richard for comments, will address the comments in v7, and looks like I also need to resolve conflict up to a point.
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Wednesday, June 5, 2024 4:50 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
> Subject: Re: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD
>
> On Thu, May 30, 2024 at 3:37 PM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > After we support one gassign form of the unsigned .SAT_ADD,  we
> > would like to support more forms including both the branch and
> > branchless.  There are 5 other forms of .SAT_ADD,  list as below:
> >
> > Form 1:
> >   #define SAT_ADD_U_1(T) \
> >   T sat_add_u_1_##T(T x, T y) \
> >   { \
> >     return (T)(x + y) >= x ? (x + y) : -1; \
> >   }
> >
> > Form 2:
> >   #define SAT_ADD_U_2(T) \
> >   T sat_add_u_2_##T(T x, T y) \
> >   { \
> >     T ret; \
> >     T overflow = __builtin_add_overflow (x, y, &ret); \
> >     return (T)(-overflow) | ret; \
> >   }
> >
> > Form 3:
> >   #define SAT_ADD_U_3(T) \
> >   T sat_add_u_3_##T (T x, T y) \
> >   { \
> >     T ret; \
> >     return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
> >   }
> >
> > Form 4:
> >   #define SAT_ADD_U_4(T) \
> >   T sat_add_u_4_##T (T x, T y) \
> >   { \
> >     T ret; \
> >     return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
> >   }
> >
> > Form 5:
> >   #define SAT_ADD_U_5(T) \
> >   T sat_add_u_5_##T(T x, T y) \
> >   { \
> >     return (T)(x + y) < x ? -1 : (x + y); \
> >   }
> >
> > Take the forms 3 of above as example:
> >
> > uint64_t
> > sat_add (uint64_t x, uint64_t y)
> > {
> >   uint64_t ret;
> >   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> > }
> >
> > Before this patch:
> > uint64_t sat_add (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _1;
> >   long unsigned int _2;
> >   uint64_t _3;
> >   __complex__ long unsigned int _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
> >   _2 = IMAGPART_EXPR <_6>;
> >   if (_2 != 0)
> >     goto <bb 4>; [35.00%]
> >   else
> >     goto <bb 3>; [65.00%]
> > ;;    succ:       4
> > ;;                3
> >
> > ;;   basic block 3, loop depth 0
> > ;;    pred:       2
> >   _1 = REALPART_EXPR <_6>;
> > ;;    succ:       4
> >
> > ;;   basic block 4, loop depth 0
> > ;;    pred:       3
> > ;;                2
> >   # _3 = PHI <_1(3), 18446744073709551615(2)>
> >   return _3;
> > ;;    succ:       EXIT
> > }
> >
> > After this patch:
> > uint64_t sat_add (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _12;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
> >   return _12;
> > ;;    succ:       EXIT
> > }
> >
> > The flag '^' acts on cond_expr will generate matching code similar as below:
> >
> > else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
> >   {
> >     basic_block _b1 = gimple_bb (_a1);
> >     if (gimple_phi_num_args (_a1) == 2)
> >       {
> >         basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
> >         basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
> >         basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : ...
> >         basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : ...
> >         gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
> >         if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
> >           && EDGE_COUNT (_other_db_1->succs) == 1
> >           && EDGE_PRED (_other_db_1, 0)->src == _db_1)
> >           {
> >             tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
> >             tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
> >             tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, ...);
> >             bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & EDGE_TRUE_VALUE;
> >             tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
> >             tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
> >             switch (TREE_CODE (_p0))
> >               ...
> >
> > The below test suites are still running, will update it later.
> > * The x86 bootstrap test.
> > * The x86 fully regression test.
> > * The riscv fully regression test.
> >
> > gcc/ChangeLog:
> >
> >         * doc/match-and-simplify.texi: Add doc for the matching flag '^'.
> >         * genmatch.cc (enum expr_flag): Add new enum for expr flag.
> >         (dt_node::gen_kids_1): Add cond_expr and flag handling.
> >         (dt_operand::gen_phi_on_cond): Add new func to gen phi matching
> >         on cond_expr.
> >         (parser::parse_expr): Add handling for the expr flag '^'.
> >         * match.pd: Add more form for unsigned .SAT_ADD.
> >         * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
> >         (match_assign_saturation_arith): Rename to.
> >         (match_phi_saturation_arith): Add new func impl to match the
> >         .SAT_ADD when phi.
> >         (math_opts_dom_walker::after_dom_children): Add phi matching
> >         try for all gimple phi stmt.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/doc/match-and-simplify.texi |  14 ++++
> >  gcc/genmatch.cc                 | 126 +++++++++++++++++++++++++++++++-
> >  gcc/match.pd                    |  43 ++++++++++-
> >  gcc/tree-ssa-math-opts.cc       |  51 ++++++++++++-
> >  4 files changed, 229 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
> > index 01f19e2f62c..fc0cf6d7552 100644
> > --- a/gcc/doc/match-and-simplify.texi
> > +++ b/gcc/doc/match-and-simplify.texi
> > @@ -361,6 +361,20 @@ Usually the types of the generated result expressions are
> >  determined from the context, but sometimes like in the above case
> >  it is required that you specify them explicitly.
> >
> > +Another modifier for generated expressions is @code{^} which
> > +tells the machinery to try more matches for some special cases.
> > +Normally the @code{cond} only allows the gimple assign for matching.
> > +It will also try to match the gimple phi when appending the
> > +@code{^} to the @code{cond}.  Aka @code{cond^}.
> > +
> > +@smallexample
> > +(match (unsigned_sat_add @0 @1)
> > + (cond^ (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep))
> > +@end smallexample
> > +
> > +The above matching will generate @code{gimple_unsigned_sat_add} predicate
> > +function that accepts both the gimple assign and gimple phi.
> > +
> >  Another modifier for generated expressions is @code{!} which
> >  tells the machinery to only consider the simplification in case
> >  the marked expression simplified to a simple operand.  Consider
> > diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> > index f1e0e7abe0c..7355c2098cf 100644
> > --- a/gcc/genmatch.cc
> > +++ b/gcc/genmatch.cc
> > @@ -838,6 +838,11 @@ public:
> >    predicate_id *p;
> >  };
> >
> > +enum expr_flag {
> > +  EFLAG_NONE,
> > +  EFLAG_PHI_MATCH,
> > +};
> > +
> >  /* An operand that constitutes an expression.  Expressions include
> >     function calls and user-defined predicate invocations.  */
> >
> > @@ -848,12 +853,12 @@ public:
> >      : operand (OP_EXPR, loc), operation (operation_),
> >        ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
> >        is_generic (false), force_single_use (false), force_leaf (false),
> > -      opt_grp (0) {}
> > +      opt_grp (0), eflag (EFLAG_NONE) {}
> >    expr (expr *e)
> >      : operand (OP_EXPR, e->location), operation (e->operation),
> >        ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
> >        is_generic (e->is_generic), force_single_use (e->force_single_use),
> > -      force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
> > +      force_leaf (e->force_leaf), opt_grp (e->opt_grp), eflag (e->eflag) {}
> >    void append_op (operand *op) { ops.safe_push (op); }
> >    /* The operator and its operands.  */
> >    id_base *operation;
> > @@ -873,6 +878,9 @@ public:
> >    bool force_leaf;
> >    /* If non-zero, the group for optional handling.  */
> >    unsigned char opt_grp;
> > +  /* Indicate the flag of the expr,  default is none.  */
> > +  enum expr_flag eflag;
>
> please simply add to the set of 'bool' flags above, like
>
>       bool match_phi;
>
> > +
> >    void gen_transform (FILE *f, int, const char *, bool, int,
> >                       const char *, capture_info *,
> >                       dt_operand ** = 0, int = 0) override;
> > @@ -1819,6 +1827,7 @@ public:
> >
> >    char *get_name (char *);
> >    void gen_opname (char *, unsigned);
> > +  void gen_phi_on_cond (FILE *, int, int);
> >  };
> >
> >  /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
> > @@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> >                           depth);
> >           indent += 4;
> >           fprintf_indent (f, indent, "{\n");
> > +         bool cond_expr_p = false, expr_flag_p = false;
> >           for (unsigned i = 0; i < exprs_len; ++i)
> >             {
> >               expr *e = as_a <expr *> (gimple_exprs[i]->op);
> > @@ -3262,6 +3272,8 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> >               else
> >                 {
> >                   id_base *op = e->operation;
> > +                 cond_expr_p |= *op == COND_EXPR;
> > +                 expr_flag_p |= e->eflag != EFLAG_NONE;
>
> So this would be
>
>                      cond_expr_p |= (*op == COND_EXPR && e->match_phi);
>
> >                   if (*op == CONVERT_EXPR || *op == NOP_EXPR)
> >                     fprintf_indent (f, indent, "CASE_CONVERT:\n");
> >                   else
> > @@ -3275,6 +3287,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> >           fprintf_indent (f, indent, "default:;\n");
> >           fprintf_indent (f, indent, "}\n");
> >           indent -= 4;
> > +
> > +         if (cond_expr_p && expr_flag_p)
> > +           {
> > +             fprintf_indent (f, indent,
> > +               "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
> > +               depth, depth);
> > +             indent += 2;
> > +             fprintf_indent (f, indent, "{\n");
> > +             indent += 2;
> > +
> > +             for (unsigned i = 0; i < exprs_len; i++)
> > +               {
> > +                 expr *e = as_a <expr *> (gimple_exprs[i]->op);
> > +                 if (*e->operation == COND_EXPR && e->eflag == EFLAG_PHI_MATCH)
>
> Here you have that correct.
>
> > +                   gimple_exprs[i]->gen_phi_on_cond (f, indent, depth);
> > +               }
> > +
> > +             indent -= 2;
> > +             fprintf_indent (f, indent, "}\n");
> > +             indent -= 2;
> > +           }
> >         }
> >
> >        if (fns_len)
> > @@ -3483,6 +3516,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
> >      }
> >  }
> >
> > +/* Generate matching code for the phi when meet COND_EXPR.  */
> > +
> > +void
> > +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
> > +{
> > +  fprintf_indent (f, indent,
> > +    "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
> > +
> > +  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
> > +
> > +  indent += 2;
> > +  fprintf_indent (f, indent, "{\n");
> > +  indent += 2;
> > +
> > +  fprintf_indent (f, indent,
> > +    "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
> > +    "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
> > +    "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
> > +    depth, depth, depth, depth);
> > +
> > +  fprintf_indent (f, indent,
> > +    "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
> > +    depth, depth);
> > +  fprintf_indent (f, indent, "if (_ct_%d"
> > +    " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
> > +  fprintf_indent (f, indent,
> > +    "  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
> > +
> > +  indent += 2;
> > +  fprintf_indent (f, indent, "{\n");
> > +  indent += 2;
> > +
> > +  fprintf_indent (f, indent,
> > +    "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
> > +
> > +  char opname_0[20];
> > +  char opname_1[20];
> > +  char opname_2[20];
> > +  gen_opname (opname_0, 0);
> > +
> > +  fprintf_indent (f, indent,
> > +    "tree %s = build2 (gimple_cond_code (_ct_%d), "
> > +    "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
> > +    opname_0, depth, depth, depth);
> > +
> > +  fprintf_indent (f, indent,
> > +    "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
> > +    " & EDGE_TRUE_VALUE;\n", depth, depth);
> > +
> > +  gen_opname (opname_1, 1);
> > +  fprintf_indent (f, indent,
> > +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
> > +    opname_1, depth, depth);
> > +
> > +  gen_opname (opname_2, 2);
> > +  fprintf_indent (f, indent,
> > +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
> > +    opname_2, depth, depth);
> > +
> > +  gen_kids (f, indent, true, depth);
> > +
> > +  indent -= 2;
> > +  fprintf_indent (f, indent, "}\n");
> > +  indent -= 2;
> > +
> > +  indent -= 2;
> > +  fprintf_indent (f, indent, "}\n");
> > +  indent -= 2;
> > +}
> > +
> >  /* Emit a logging call to the debug file to the file F, with the INDENT from
> >     either the RESULT location or the S's match location if RESULT is null. */
> >  static void
> > @@ -4600,6 +4713,15 @@ parser::parse_expr ()
> >        e->force_leaf = true;
> >      }
> >
> > +  if (parsing_match_operand && token->type == CPP_XOR
> > +    && !(token->flags & PREV_WHITE))
> > +    {
> > +      eat_token (CPP_XOR);
>
> Can you please diagnose ^ on non-match or non-COND_EXPRs?
>
> You need to amend the cmp_operand function to match up the ->match_phi
> flag similar to the is_generic handling.
>
> Otherwise the patch look good, I didn't double-check the generated
> PHI matching code again though.
>
> Thanks,
> Richard.
>
> > +
> > +      if (*e->operation == COND_EXPR)
> > +       e->eflag = EFLAG_PHI_MATCH;
> > +    }
> > +
> >    if (token->type == CPP_COLON
> >        && !(token->flags & PREV_WHITE))
> >      {
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 480e36bbbaf..e3b5b8d3810 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> >
> >  /* Unsigned Saturation Add */
> > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> > +   SAT_ADD = (X + Y) | -((X + Y) < X)  */
> >  (match (usadd_left_part_1 @0 @1)
> >   (plus:c @0 @1)
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> >  (match (usadd_left_part_2 @0 @1)
> >   (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> > +   SAT_ADD = (X + Y) | -((type)(X + Y) < X)  */
> >  (match (usadd_right_part_1 @0 @1)
> >   (negate (convert (lt (plus:c @0 @1) @0)))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> > +   SAT_ADD = (X + Y) | -(X > (X + Y))  */
> >  (match (usadd_right_part_1 @0 @1)
> >   (negate (convert (gt @0 (plus:c @0 @1))))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> >  (match (usadd_right_part_2 @0 @1)
> >   (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
> > +(match (usadd_right_part_2 @0 @1)
> > + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> >     because the sub part of left_part_2 cannot work with right_part_1.
> >     For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  (match (unsigned_integer_sat_add @0 @1)
> >   (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> >
> > -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> >  (match (unsigned_integer_sat_add @0 @1)
> >   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> >
> > +/* Unsigned saturation add, case 3 (branch with ge):
> > +   SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
> > +
> > +/* Unsigned saturation add, case 4 (branch with lt):
> > +   SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> > +
> > +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> > +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1.  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> > +  (usadd_left_part_2 @0 @1) integer_minus_onep))
> > +
> > +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
> > +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW.  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> > +  integer_minus_onep (usadd_left_part_2 @0 @1)))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 62da1c5ee08..8624d414347 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> >   *      =>
> >   *      _12 = .SAT_ADD (_4, _6);  */
> >  static void
> > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> >  {
> >    gcall *call = NULL;
> >
> > @@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> >      }
> >  }
> >
> > +/*
> > + * Try to match saturation arith pattern(s).
> > + * From:
> > + *   <bb 2> :
> > + *   _1 = x_3(D) + y_4(D);
> > + *   if (_1 >= x_3(D))
> > + *     goto <bb 3>; [INV]
> > + *   else
> > + *     goto <bb 4>; [INV]
> > + *
> > + *   <bb 3> :
> > + *
> > + *   <bb 4> :
> > + *   # _2 = PHI <255(2), _1(3)>
> > + *
> > + * To:
> > + *   <bb 4> [local count: 1073741824]:
> > + *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> > +
> > +static void
> > +match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
> > +{
> > +  if (gimple_phi_num_args (phi) != 2)
> > +    return;
> > +
> > +  tree ops[2];
> > +  tree phi_result = gimple_phi_result (phi);
> > +
> > +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
> > +                                        OPTIMIZE_FOR_BOTH))
> > +    {
> > +      gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > +      gimple_call_set_lhs (call, phi_result);
> > +      gsi_insert_before (gsi, call, GSI_SAME_STMT);
> > +
> > +      gimple_stmt_iterator psi = gsi_for_stmt (phi);
> > +      remove_phi_node (&psi, false);
> > +    }
> > +}
> > +
> >  /* Recognize for unsigned x
> >     x = y - z;
> >     if (x > y)
> > @@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >
> >    fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
> >
> > +  for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> > +    {
> > +      gimple_stmt_iterator gsi = gsi_last_bb (bb);
> > +      match_phi_saturation_arith (&gsi, psi.phi ());
> > +    }
> > +
> >    for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> >      {
> >        gimple *stmt = gsi_stmt (gsi);
> > @@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >               break;
> >
> >             case BIT_IOR_EXPR:
> > -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +             match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
> >               /* fall-through  */
> >             case BIT_XOR_EXPR:
> >               match_uaddc_usubc (&gsi, stmt, code);
> > --
> > 2.34.1
> >
Li, Pan2 June 6, 2024, 11:19 a.m. UTC | #5
> I'd only keep the simplest one for now.  More complex cases can be
> handled easily
> with using dominators but those might not always be available or up-to-date when
> doing match queries.  So let's revisit when we run into a case where
> the simple form
> isn't enough.

Got it. Thanks, will send the v7 if no surprise from test suites.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Thursday, June 6, 2024 6:47 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD

On Thu, Jun 6, 2024 at 3:19 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Hi Richard,
>
> After revisited all the comments of the mail thread, I would like to confirm if my understanding is correct according to the generated match code.
> For now the generated code looks like below:
>
> else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
>   {
>     basic_block _b1 = gimple_bb (_a1);
>     if (gimple_phi_num_args (_a1) == 2)
>       {
>         basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
>         basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
>         basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : _pb_1_1;
>         basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : _pb_0_1;
>         gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
>         if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
>           && EDGE_COUNT (_other_db_1->succs) == 1
>           && EDGE_PRED (_other_db_1, 0)->src == _db_1)
>           {
>             tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
>             tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
>             tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, _cond_rhs_1);
>             bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE;
>             tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
>             tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
>             ....
>
> The flow may look like below, or can only handling flow like below.
>
> +------+
> | cond |-------+
> +------+       v
>    |        +-------+
>    |        | other |
>    |        +-------+
>    v           |
> +-----+        |
> | PHI | <------+
> +-----+
>
> Thus, I think it cannot handle the below 2 PHI flows (or even more complicated shapes)
>
> +------+
> | cond |-------+
> +------+       |
>    |           |
>    v           |
> +------+       |
> | mid  |       v
> +------+    +-------+
>    |        | other |
>    |        +-------+
>    v           |
> +-----+        |
> | PHI | <------+
> +-----+
>
> +------+
> | cond |-----------+
> +------+           |
>    |               v
>    |            +-------+
>    |            | mid-0 |--------+
>    |            +-------+        |
>    |               |             v
>    |               |           +-------+
>    |               |           | mid-1 |
>    |               v           +-------+
>    |            +-------+        |
>    |            | other |<-------+
>    |            +-------+
>    v               |
> +-----+            |
> | PHI | <----------+
> +-----+

Correct.

> So I am not very sure if we need (or reasonable) to take care of all the PHI gimple flows (may impossible ?) Or keep the simplest one for now and add more case by case.
> Thanks a lot.

I'd only keep the simplest one for now.  More complex cases can be
handled easily
with using dominators but those might not always be available or up-to-date when
doing match queries.  So let's revisit when we run into a case where
the simple form
isn't enough.

Richard.

>
> Pan
>
> -----Original Message-----
> From: Li, Pan2
> Sent: Wednesday, June 5, 2024 9:44 PM
> To: Richard Biener <richard.guenther@gmail.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
> Subject: RE: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD
>
> Thanks Richard for comments, will address the comments in v7, and looks like I also need to resolve conflict up to a point.
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Wednesday, June 5, 2024 4:50 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
> Subject: Re: [PATCH v4] Match: Support more form for scalar unsigned SAT_ADD
>
> On Thu, May 30, 2024 at 3:37 PM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > After we support one gassign form of the unsigned .SAT_ADD,  we
> > would like to support more forms including both the branch and
> > branchless.  There are 5 other forms of .SAT_ADD,  list as below:
> >
> > Form 1:
> >   #define SAT_ADD_U_1(T) \
> >   T sat_add_u_1_##T(T x, T y) \
> >   { \
> >     return (T)(x + y) >= x ? (x + y) : -1; \
> >   }
> >
> > Form 2:
> >   #define SAT_ADD_U_2(T) \
> >   T sat_add_u_2_##T(T x, T y) \
> >   { \
> >     T ret; \
> >     T overflow = __builtin_add_overflow (x, y, &ret); \
> >     return (T)(-overflow) | ret; \
> >   }
> >
> > Form 3:
> >   #define SAT_ADD_U_3(T) \
> >   T sat_add_u_3_##T (T x, T y) \
> >   { \
> >     T ret; \
> >     return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
> >   }
> >
> > Form 4:
> >   #define SAT_ADD_U_4(T) \
> >   T sat_add_u_4_##T (T x, T y) \
> >   { \
> >     T ret; \
> >     return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
> >   }
> >
> > Form 5:
> >   #define SAT_ADD_U_5(T) \
> >   T sat_add_u_5_##T(T x, T y) \
> >   { \
> >     return (T)(x + y) < x ? -1 : (x + y); \
> >   }
> >
> > Take the forms 3 of above as example:
> >
> > uint64_t
> > sat_add (uint64_t x, uint64_t y)
> > {
> >   uint64_t ret;
> >   return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
> > }
> >
> > Before this patch:
> > uint64_t sat_add (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _1;
> >   long unsigned int _2;
> >   uint64_t _3;
> >   __complex__ long unsigned int _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
> >   _2 = IMAGPART_EXPR <_6>;
> >   if (_2 != 0)
> >     goto <bb 4>; [35.00%]
> >   else
> >     goto <bb 3>; [65.00%]
> > ;;    succ:       4
> > ;;                3
> >
> > ;;   basic block 3, loop depth 0
> > ;;    pred:       2
> >   _1 = REALPART_EXPR <_6>;
> > ;;    succ:       4
> >
> > ;;   basic block 4, loop depth 0
> > ;;    pred:       3
> > ;;                2
> >   # _3 = PHI <_1(3), 18446744073709551615(2)>
> >   return _3;
> > ;;    succ:       EXIT
> > }
> >
> > After this patch:
> > uint64_t sat_add (uint64_t x, uint64_t y)
> > {
> >   long unsigned int _12;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
> >   return _12;
> > ;;    succ:       EXIT
> > }
> >
> > The flag '^' acts on cond_expr will generate matching code similar as below:
> >
> > else if (gphi *_a1 = dyn_cast <gphi *> (_d1))
> >   {
> >     basic_block _b1 = gimple_bb (_a1);
> >     if (gimple_phi_num_args (_a1) == 2)
> >       {
> >         basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src;
> >         basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src;
> >         basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_0_1 : ...
> >         basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) ? _pb_1_1 : ...
> >         gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1));
> >         if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1
> >           && EDGE_COUNT (_other_db_1->succs) == 1
> >           && EDGE_PRED (_other_db_1, 0)->src == _db_1)
> >           {
> >             tree _cond_lhs_1 = gimple_cond_lhs (_ct_1);
> >             tree _cond_rhs_1 = gimple_cond_rhs (_ct_1);
> >             tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, _cond_lhs_1, ...);
> >             bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags  & EDGE_TRUE_VALUE;
> >             tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1);
> >             tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0);
> >             switch (TREE_CODE (_p0))
> >               ...
> >
> > The below test suites are still running, will update it later.
> > * The x86 bootstrap test.
> > * The x86 fully regression test.
> > * The riscv fully regression test.
> >
> > gcc/ChangeLog:
> >
> >         * doc/match-and-simplify.texi: Add doc for the matching flag '^'.
> >         * genmatch.cc (enum expr_flag): Add new enum for expr flag.
> >         (dt_node::gen_kids_1): Add cond_expr and flag handling.
> >         (dt_operand::gen_phi_on_cond): Add new func to gen phi matching
> >         on cond_expr.
> >         (parser::parse_expr): Add handling for the expr flag '^'.
> >         * match.pd: Add more form for unsigned .SAT_ADD.
> >         * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
> >         (match_assign_saturation_arith): Rename to.
> >         (match_phi_saturation_arith): Add new func impl to match the
> >         .SAT_ADD when phi.
> >         (math_opts_dom_walker::after_dom_children): Add phi matching
> >         try for all gimple phi stmt.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/doc/match-and-simplify.texi |  14 ++++
> >  gcc/genmatch.cc                 | 126 +++++++++++++++++++++++++++++++-
> >  gcc/match.pd                    |  43 ++++++++++-
> >  gcc/tree-ssa-math-opts.cc       |  51 ++++++++++++-
> >  4 files changed, 229 insertions(+), 5 deletions(-)
> >
> > diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
> > index 01f19e2f62c..fc0cf6d7552 100644
> > --- a/gcc/doc/match-and-simplify.texi
> > +++ b/gcc/doc/match-and-simplify.texi
> > @@ -361,6 +361,20 @@ Usually the types of the generated result expressions are
> >  determined from the context, but sometimes like in the above case
> >  it is required that you specify them explicitly.
> >
> > +Another modifier for generated expressions is @code{^} which
> > +tells the machinery to try more matches for some special cases.
> > +Normally the @code{cond} only allows the gimple assign for matching.
> > +It will also try to match the gimple phi when appending the
> > +@code{^} to the @code{cond}.  Aka @code{cond^}.
> > +
> > +@smallexample
> > +(match (unsigned_sat_add @0 @1)
> > + (cond^ (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep))
> > +@end smallexample
> > +
> > +The above matching will generate @code{gimple_unsigned_sat_add} predicate
> > +function that accepts both the gimple assign and gimple phi.
> > +
> >  Another modifier for generated expressions is @code{!} which
> >  tells the machinery to only consider the simplification in case
> >  the marked expression simplified to a simple operand.  Consider
> > diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> > index f1e0e7abe0c..7355c2098cf 100644
> > --- a/gcc/genmatch.cc
> > +++ b/gcc/genmatch.cc
> > @@ -838,6 +838,11 @@ public:
> >    predicate_id *p;
> >  };
> >
> > +enum expr_flag {
> > +  EFLAG_NONE,
> > +  EFLAG_PHI_MATCH,
> > +};
> > +
> >  /* An operand that constitutes an expression.  Expressions include
> >     function calls and user-defined predicate invocations.  */
> >
> > @@ -848,12 +853,12 @@ public:
> >      : operand (OP_EXPR, loc), operation (operation_),
> >        ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
> >        is_generic (false), force_single_use (false), force_leaf (false),
> > -      opt_grp (0) {}
> > +      opt_grp (0), eflag (EFLAG_NONE) {}
> >    expr (expr *e)
> >      : operand (OP_EXPR, e->location), operation (e->operation),
> >        ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
> >        is_generic (e->is_generic), force_single_use (e->force_single_use),
> > -      force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
> > +      force_leaf (e->force_leaf), opt_grp (e->opt_grp), eflag (e->eflag) {}
> >    void append_op (operand *op) { ops.safe_push (op); }
> >    /* The operator and its operands.  */
> >    id_base *operation;
> > @@ -873,6 +878,9 @@ public:
> >    bool force_leaf;
> >    /* If non-zero, the group for optional handling.  */
> >    unsigned char opt_grp;
> > +  /* Indicate the flag of the expr,  default is none.  */
> > +  enum expr_flag eflag;
>
> please simply add to the set of 'bool' flags above, like
>
>       bool match_phi;
>
> > +
> >    void gen_transform (FILE *f, int, const char *, bool, int,
> >                       const char *, capture_info *,
> >                       dt_operand ** = 0, int = 0) override;
> > @@ -1819,6 +1827,7 @@ public:
> >
> >    char *get_name (char *);
> >    void gen_opname (char *, unsigned);
> > +  void gen_phi_on_cond (FILE *, int, int);
> >  };
> >
> >  /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
> > @@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> >                           depth);
> >           indent += 4;
> >           fprintf_indent (f, indent, "{\n");
> > +         bool cond_expr_p = false, expr_flag_p = false;
> >           for (unsigned i = 0; i < exprs_len; ++i)
> >             {
> >               expr *e = as_a <expr *> (gimple_exprs[i]->op);
> > @@ -3262,6 +3272,8 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> >               else
> >                 {
> >                   id_base *op = e->operation;
> > +                 cond_expr_p |= *op == COND_EXPR;
> > +                 expr_flag_p |= e->eflag != EFLAG_NONE;
>
> So this would be
>
>                      cond_expr_p |= (*op == COND_EXPR && e->match_phi);
>
> >                   if (*op == CONVERT_EXPR || *op == NOP_EXPR)
> >                     fprintf_indent (f, indent, "CASE_CONVERT:\n");
> >                   else
> > @@ -3275,6 +3287,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
> >           fprintf_indent (f, indent, "default:;\n");
> >           fprintf_indent (f, indent, "}\n");
> >           indent -= 4;
> > +
> > +         if (cond_expr_p && expr_flag_p)
> > +           {
> > +             fprintf_indent (f, indent,
> > +               "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
> > +               depth, depth);
> > +             indent += 2;
> > +             fprintf_indent (f, indent, "{\n");
> > +             indent += 2;
> > +
> > +             for (unsigned i = 0; i < exprs_len; i++)
> > +               {
> > +                 expr *e = as_a <expr *> (gimple_exprs[i]->op);
> > +                 if (*e->operation == COND_EXPR && e->eflag == EFLAG_PHI_MATCH)
>
> Here you have that correct.
>
> > +                   gimple_exprs[i]->gen_phi_on_cond (f, indent, depth);
> > +               }
> > +
> > +             indent -= 2;
> > +             fprintf_indent (f, indent, "}\n");
> > +             indent -= 2;
> > +           }
> >         }
> >
> >        if (fns_len)
> > @@ -3483,6 +3516,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
> >      }
> >  }
> >
> > +/* Generate matching code for the phi when meet COND_EXPR.  */
> > +
> > +void
> > +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
> > +{
> > +  fprintf_indent (f, indent,
> > +    "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
> > +
> > +  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
> > +
> > +  indent += 2;
> > +  fprintf_indent (f, indent, "{\n");
> > +  indent += 2;
> > +
> > +  fprintf_indent (f, indent,
> > +    "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
> > +    "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
> > +    "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
> > +    depth, depth, depth, depth);
> > +
> > +  fprintf_indent (f, indent,
> > +    "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
> > +    depth, depth);
> > +  fprintf_indent (f, indent, "if (_ct_%d"
> > +    " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
> > +  fprintf_indent (f, indent,
> > +    "  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
> > +
> > +  indent += 2;
> > +  fprintf_indent (f, indent, "{\n");
> > +  indent += 2;
> > +
> > +  fprintf_indent (f, indent,
> > +    "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
> > +  fprintf_indent (f, indent,
> > +    "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
> > +
> > +  char opname_0[20];
> > +  char opname_1[20];
> > +  char opname_2[20];
> > +  gen_opname (opname_0, 0);
> > +
> > +  fprintf_indent (f, indent,
> > +    "tree %s = build2 (gimple_cond_code (_ct_%d), "
> > +    "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
> > +    opname_0, depth, depth, depth);
> > +
> > +  fprintf_indent (f, indent,
> > +    "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
> > +    " & EDGE_TRUE_VALUE;\n", depth, depth);
> > +
> > +  gen_opname (opname_1, 1);
> > +  fprintf_indent (f, indent,
> > +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
> > +    opname_1, depth, depth);
> > +
> > +  gen_opname (opname_2, 2);
> > +  fprintf_indent (f, indent,
> > +    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
> > +    opname_2, depth, depth);
> > +
> > +  gen_kids (f, indent, true, depth);
> > +
> > +  indent -= 2;
> > +  fprintf_indent (f, indent, "}\n");
> > +  indent -= 2;
> > +
> > +  indent -= 2;
> > +  fprintf_indent (f, indent, "}\n");
> > +  indent -= 2;
> > +}
> > +
> >  /* Emit a logging call to the debug file to the file F, with the INDENT from
> >     either the RESULT location or the S's match location if RESULT is null. */
> >  static void
> > @@ -4600,6 +4713,15 @@ parser::parse_expr ()
> >        e->force_leaf = true;
> >      }
> >
> > +  if (parsing_match_operand && token->type == CPP_XOR
> > +    && !(token->flags & PREV_WHITE))
> > +    {
> > +      eat_token (CPP_XOR);
>
> Can you please diagnose ^ on non-match or non-COND_EXPRs?
>
> You need to amend the cmp_operand function to match up the ->match_phi
> flag similar to the is_generic handling.
>
> Otherwise the patch look good, I didn't double-check the generated
> PHI matching code again though.
>
> Thanks,
> Richard.
>
> > +
> > +      if (*e->operation == COND_EXPR)
> > +       e->eflag = EFLAG_PHI_MATCH;
> > +    }
> > +
> >    if (token->type == CPP_COLON
> >        && !(token->flags & PREV_WHITE))
> >      {
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 480e36bbbaf..e3b5b8d3810 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
> >
> >  /* Unsigned Saturation Add */
> > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> > +   SAT_ADD = (X + Y) | -((X + Y) < X)  */
> >  (match (usadd_left_part_1 @0 @1)
> >   (plus:c @0 @1)
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> >  (match (usadd_left_part_2 @0 @1)
> >   (realpart (IFN_ADD_OVERFLOW:c @0 @1))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> > +   SAT_ADD = (X + Y) | -((type)(X + Y) < X)  */
> >  (match (usadd_right_part_1 @0 @1)
> >   (negate (convert (lt (plus:c @0 @1) @0)))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
> > +   SAT_ADD = (X + Y) | -(X > (X + Y))  */
> >  (match (usadd_right_part_1 @0 @1)
> >   (negate (convert (gt @0 (plus:c @0 @1))))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> >  (match (usadd_right_part_2 @0 @1)
> >   (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
> > +(match (usadd_right_part_2 @0 @1)
> > + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
> >     because the sub part of left_part_2 cannot work with right_part_1.
> >     For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
> > @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  (match (unsigned_integer_sat_add @0 @1)
> >   (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
> >
> > -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
> > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
> > +   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
> >  (match (unsigned_integer_sat_add @0 @1)
> >   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> >
> > +/* Unsigned saturation add, case 3 (branch with ge):
> > +   SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
> > +
> > +/* Unsigned saturation add, case 4 (branch with lt):
> > +   SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
> > +
> > +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
> > +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1.  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> > +  (usadd_left_part_2 @0 @1) integer_minus_onep))
> > +
> > +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
> > +   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW.  */
> > +(match (unsigned_integer_sat_add @0 @1)
> > + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
> > +  integer_minus_onep (usadd_left_part_2 @0 @1)))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 62da1c5ee08..8624d414347 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> >   *      =>
> >   *      _12 = .SAT_ADD (_4, _6);  */
> >  static void
> > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> >  {
> >    gcall *call = NULL;
> >
> > @@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> >      }
> >  }
> >
> > +/*
> > + * Try to match saturation arith pattern(s).
> > + * From:
> > + *   <bb 2> :
> > + *   _1 = x_3(D) + y_4(D);
> > + *   if (_1 >= x_3(D))
> > + *     goto <bb 3>; [INV]
> > + *   else
> > + *     goto <bb 4>; [INV]
> > + *
> > + *   <bb 3> :
> > + *
> > + *   <bb 4> :
> > + *   # _2 = PHI <255(2), _1(3)>
> > + *
> > + * To:
> > + *   <bb 4> [local count: 1073741824]:
> > + *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
> > +
> > +static void
> > +match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
> > +{
> > +  if (gimple_phi_num_args (phi) != 2)
> > +    return;
> > +
> > +  tree ops[2];
> > +  tree phi_result = gimple_phi_result (phi);
> > +
> > +  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
> > +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
> > +                                        OPTIMIZE_FOR_BOTH))
> > +    {
> > +      gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > +      gimple_call_set_lhs (call, phi_result);
> > +      gsi_insert_before (gsi, call, GSI_SAME_STMT);
> > +
> > +      gimple_stmt_iterator psi = gsi_for_stmt (phi);
> > +      remove_phi_node (&psi, false);
> > +    }
> > +}
> > +
> >  /* Recognize for unsigned x
> >     x = y - z;
> >     if (x > y)
> > @@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >
> >    fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
> >
> > +  for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> > +    {
> > +      gimple_stmt_iterator gsi = gsi_last_bb (bb);
> > +      match_phi_saturation_arith (&gsi, psi.phi ());
> > +    }
> > +
> >    for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
> >      {
> >        gimple *stmt = gsi_stmt (gsi);
> > @@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >               break;
> >
> >             case BIT_IOR_EXPR:
> > -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +             match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
> >               /* fall-through  */
> >             case BIT_XOR_EXPR:
> >               match_uaddc_usubc (&gsi, stmt, code);
> > --
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi
index 01f19e2f62c..fc0cf6d7552 100644
--- a/gcc/doc/match-and-simplify.texi
+++ b/gcc/doc/match-and-simplify.texi
@@ -361,6 +361,20 @@  Usually the types of the generated result expressions are
 determined from the context, but sometimes like in the above case
 it is required that you specify them explicitly.
 
+Another modifier for generated expressions is @code{^} which
+tells the machinery to try more matches for some special cases.
+Normally the @code{cond} only allows the gimple assign for matching.
+It will also try to match the gimple phi when appending the
+@code{^} to the @code{cond}.  Aka @code{cond^}.
+
+@smallexample
+(match (unsigned_sat_add @0 @1)
+ (cond^ (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep))
+@end smallexample
+
+The above matching will generate @code{gimple_unsigned_sat_add} predicate
+function that accepts both the gimple assign and gimple phi.
+
 Another modifier for generated expressions is @code{!} which
 tells the machinery to only consider the simplification in case
 the marked expression simplified to a simple operand.  Consider
diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index f1e0e7abe0c..7355c2098cf 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -838,6 +838,11 @@  public:
   predicate_id *p;
 };
 
+enum expr_flag {
+  EFLAG_NONE,
+  EFLAG_PHI_MATCH,
+};
+
 /* An operand that constitutes an expression.  Expressions include
    function calls and user-defined predicate invocations.  */
 
@@ -848,12 +853,12 @@  public:
     : operand (OP_EXPR, loc), operation (operation_),
       ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
       is_generic (false), force_single_use (false), force_leaf (false),
-      opt_grp (0) {}
+      opt_grp (0), eflag (EFLAG_NONE) {}
   expr (expr *e)
     : operand (OP_EXPR, e->location), operation (e->operation),
       ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
       is_generic (e->is_generic), force_single_use (e->force_single_use),
-      force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
+      force_leaf (e->force_leaf), opt_grp (e->opt_grp), eflag (e->eflag) {}
   void append_op (operand *op) { ops.safe_push (op); }
   /* The operator and its operands.  */
   id_base *operation;
@@ -873,6 +878,9 @@  public:
   bool force_leaf;
   /* If non-zero, the group for optional handling.  */
   unsigned char opt_grp;
+  /* Indicate the flag of the expr,  default is none.  */
+  enum expr_flag eflag;
+
   void gen_transform (FILE *f, int, const char *, bool, int,
 		      const char *, capture_info *,
 		      dt_operand ** = 0, int = 0) override;
@@ -1819,6 +1827,7 @@  public:
 
   char *get_name (char *);
   void gen_opname (char *, unsigned);
+  void gen_phi_on_cond (FILE *, int, int);
 };
 
 /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
@@ -3251,6 +3260,7 @@  dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
 			  depth);
 	  indent += 4;
 	  fprintf_indent (f, indent, "{\n");
+	  bool cond_expr_p = false, expr_flag_p = false;
 	  for (unsigned i = 0; i < exprs_len; ++i)
 	    {
 	      expr *e = as_a <expr *> (gimple_exprs[i]->op);
@@ -3262,6 +3272,8 @@  dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
 	      else
 		{
 		  id_base *op = e->operation;
+		  cond_expr_p |= *op == COND_EXPR;
+		  expr_flag_p |= e->eflag != EFLAG_NONE;
 		  if (*op == CONVERT_EXPR || *op == NOP_EXPR)
 		    fprintf_indent (f, indent, "CASE_CONVERT:\n");
 		  else
@@ -3275,6 +3287,27 @@  dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
 	  fprintf_indent (f, indent, "default:;\n");
 	  fprintf_indent (f, indent, "}\n");
 	  indent -= 4;
+
+	  if (cond_expr_p && expr_flag_p)
+	    {
+	      fprintf_indent (f, indent,
+		"else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
+		depth, depth);
+	      indent += 2;
+	      fprintf_indent (f, indent, "{\n");
+	      indent += 2;
+
+	      for (unsigned i = 0; i < exprs_len; i++)
+		{
+		  expr *e = as_a <expr *> (gimple_exprs[i]->op);
+		  if (*e->operation == COND_EXPR && e->eflag == EFLAG_PHI_MATCH)
+		    gimple_exprs[i]->gen_phi_on_cond (f, indent, depth);
+		}
+
+	      indent -= 2;
+	      fprintf_indent (f, indent, "}\n");
+	      indent -= 2;
+	    }
 	}
 
       if (fns_len)
@@ -3483,6 +3516,86 @@  dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
     }
 }
 
+/* Generate matching code for the phi when meet COND_EXPR.  */
+
+void
+dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
+{
+  fprintf_indent (f, indent,
+    "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+
+  fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
+
+  indent += 2;
+  fprintf_indent (f, indent, "{\n");
+  indent += 2;
+
+  fprintf_indent (f, indent,
+    "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
+  fprintf_indent (f, indent,
+    "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
+  fprintf_indent (f, indent,
+    "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
+    "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+  fprintf_indent (f, indent,
+    "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
+    "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
+    depth, depth, depth, depth);
+
+  fprintf_indent (f, indent,
+    "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
+    depth, depth);
+  fprintf_indent (f, indent, "if (_ct_%d"
+    " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
+  fprintf_indent (f, indent,
+    "  && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
+  fprintf_indent (f, indent,
+    "  && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
+
+  indent += 2;
+  fprintf_indent (f, indent, "{\n");
+  indent += 2;
+
+  fprintf_indent (f, indent,
+    "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
+  fprintf_indent (f, indent,
+    "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
+
+  char opname_0[20];
+  char opname_1[20];
+  char opname_2[20];
+  gen_opname (opname_0, 0);
+
+  fprintf_indent (f, indent,
+    "tree %s = build2 (gimple_cond_code (_ct_%d), "
+    "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
+    opname_0, depth, depth, depth);
+
+  fprintf_indent (f, indent,
+    "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
+    " & EDGE_TRUE_VALUE;\n", depth, depth);
+
+  gen_opname (opname_1, 1);
+  fprintf_indent (f, indent,
+    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
+    opname_1, depth, depth);
+
+  gen_opname (opname_2, 2);
+  fprintf_indent (f, indent,
+    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
+    opname_2, depth, depth);
+
+  gen_kids (f, indent, true, depth);
+
+  indent -= 2;
+  fprintf_indent (f, indent, "}\n");
+  indent -= 2;
+
+  indent -= 2;
+  fprintf_indent (f, indent, "}\n");
+  indent -= 2;
+}
+
 /* Emit a logging call to the debug file to the file F, with the INDENT from
    either the RESULT location or the S's match location if RESULT is null. */
 static void
@@ -4600,6 +4713,15 @@  parser::parse_expr ()
       e->force_leaf = true;
     }
 
+  if (parsing_match_operand && token->type == CPP_XOR
+    && !(token->flags & PREV_WHITE))
+    {
+      eat_token (CPP_XOR);
+
+      if (*e->operation == COND_EXPR)
+	e->eflag = EFLAG_PHI_MATCH;
+    }
+
   if (token->type == CPP_COLON
       && !(token->flags & PREV_WHITE))
     {
diff --git a/gcc/match.pd b/gcc/match.pd
index 480e36bbbaf..e3b5b8d3810 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3056,31 +3056,48 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
 
 /* Unsigned Saturation Add */
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+   SAT_ADD = (X + Y) | -((X + Y) < X)  */
 (match (usadd_left_part_1 @0 @1)
  (plus:c @0 @1)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
 (match (usadd_left_part_2 @0 @1)
  (realpart (IFN_ADD_OVERFLOW:c @0 @1))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+   SAT_ADD = (X + Y) | -((type)(X + Y) < X)  */
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (lt (plus:c @0 @1) @0)))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+   SAT_ADD = (X + Y) | -(X > (X + Y))  */
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (gt @0 (plus:c @0 @1))))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
 (match (usadd_right_part_2 @0 @1)
  (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
+(match (usadd_right_part_2 @0 @1)
+ (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
 /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
    because the sub part of left_part_2 cannot work with right_part_1.
    For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
@@ -3092,10 +3109,34 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
 
-/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
+/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+/* Unsigned saturation add, case 3 (branch with ge):
+   SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
+
+/* Unsigned saturation add, case 4 (branch with lt):
+   SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
+
+/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
+   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+  (usadd_left_part_2 @0 @1) integer_minus_onep))
+
+/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
+   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+  integer_minus_onep (usadd_left_part_2 @0 @1)))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 62da1c5ee08..8624d414347 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4099,7 +4099,7 @@  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
  *      =>
  *      _12 = .SAT_ADD (_4, _6);  */
 static void
-match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
 {
   gcall *call = NULL;
 
@@ -4116,6 +4116,47 @@  match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
     }
 }
 
+/*
+ * Try to match saturation arith pattern(s).
+ * From:
+ *   <bb 2> :
+ *   _1 = x_3(D) + y_4(D);
+ *   if (_1 >= x_3(D))
+ *     goto <bb 3>; [INV]
+ *   else
+ *     goto <bb 4>; [INV]
+ *
+ *   <bb 3> :
+ *
+ *   <bb 4> :
+ *   # _2 = PHI <255(2), _1(3)>
+ *
+ * To:
+ *   <bb 4> [local count: 1073741824]:
+ *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
+
+static void
+match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
+{
+  if (gimple_phi_num_args (phi) != 2)
+    return;
+
+  tree ops[2];
+  tree phi_result = gimple_phi_result (phi);
+
+  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
+					 OPTIMIZE_FOR_BOTH))
+    {
+      gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
+      gimple_call_set_lhs (call, phi_result);
+      gsi_insert_before (gsi, call, GSI_SAME_STMT);
+
+      gimple_stmt_iterator psi = gsi_for_stmt (phi);
+      remove_phi_node (&psi, false);
+    }
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -6029,6 +6070,12 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
 
   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
 
+  for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
+    {
+      gimple_stmt_iterator gsi = gsi_last_bb (bb);
+      match_phi_saturation_arith (&gsi, psi.phi ());
+    }
+
   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
     {
       gimple *stmt = gsi_stmt (gsi);
@@ -6078,7 +6125,7 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
 	      break;
 
 	    case BIT_IOR_EXPR:
-	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+	      match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
 	      /* fall-through  */
 	    case BIT_XOR_EXPR:
 	      match_uaddc_usubc (&gsi, stmt, code);