Message ID | 20240606133718.2343188-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v7] Match: Support more form for scalar unsigned SAT_ADD | expand |
On Thu, Jun 6, 2024 at 3:37 PM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > After we support one gassign form of the unsigned .SAT_ADD, we > would like to support more forms including both the branch and > branchless. There are 5 other forms of .SAT_ADD, list as below: > > Form 1: > #define SAT_ADD_U_1(T) \ > T sat_add_u_1_##T(T x, T y) \ > { \ > return (T)(x + y) >= x ? (x + y) : -1; \ > } > > Form 2: > #define SAT_ADD_U_2(T) \ > T sat_add_u_2_##T(T x, T y) \ > { \ > T ret; \ > T overflow = __builtin_add_overflow (x, y, &ret); \ > return (T)(-overflow) | ret; \ > } > > Form 3: > #define SAT_ADD_U_3(T) \ > T sat_add_u_3_##T (T x, T y) \ > { \ > T ret; \ > return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \ > } > > Form 4: > #define SAT_ADD_U_4(T) \ > T sat_add_u_4_##T (T x, T y) \ > { \ > T ret; \ > return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \ > } > > Form 5: > #define SAT_ADD_U_5(T) \ > T sat_add_u_5_##T(T x, T y) \ > { \ > return (T)(x + y) < x ? -1 : (x + y); \ > } > > Take the forms 3 of above as example: > > uint64_t > sat_add (uint64_t x, uint64_t y) > { > uint64_t ret; > return __builtin_add_overflow (x, y, &ret) ? -1 : ret; > } > > Before this patch: > uint64_t sat_add (uint64_t x, uint64_t y) > { > long unsigned int _1; > long unsigned int _2; > uint64_t _3; > __complex__ long unsigned int _6; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); > _2 = IMAGPART_EXPR <_6>; > if (_2 != 0) > goto <bb 4>; [35.00%] > else > goto <bb 3>; [65.00%] > ;; succ: 4 > ;; 3 > > ;; basic block 3, loop depth 0 > ;; pred: 2 > _1 = REALPART_EXPR <_6>; > ;; succ: 4 > > ;; basic block 4, loop depth 0 > ;; pred: 3 > ;; 2 > # _3 = PHI <_1(3), 18446744073709551615(2)> > return _3; > ;; succ: EXIT > } > > After this patch: > uint64_t sat_add (uint64_t x, uint64_t y) > { > long unsigned int _12; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call] > return _12; > ;; succ: EXIT > } > > The flag '^' acts on cond_expr will generate matching code similar as below: > > else if (gphi *_a1 = dyn_cast <gphi *> (_d1)) > { > basic_block _b1 = gimple_bb (_a1); > if (gimple_phi_num_args (_a1) == 2) > { > basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src; > basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src; > basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) > ? _pb_0_1 : _pb_1_1; > basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) > ? _pb_1_1 : _pb_0_1; > gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1)); > if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 > && EDGE_COUNT (_other_db_1->succs) == 1 > && EDGE_PRED (_other_db_1, 0)->src == _db_1) > { > tree _cond_lhs_1 = gimple_cond_lhs (_ct_1); > tree _cond_rhs_1 = gimple_cond_rhs (_ct_1); > tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, > _cond_lhs_1, _cond_rhs_1); > bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE; > tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1); > tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0); > .... > > The below test suites are passed for this patch. > * The x86 bootstrap test. > * The x86 fully regression test. > * The riscv fully regression test. OK. Thanks, Richard. > gcc/ChangeLog: > > * doc/match-and-simplify.texi: Add doc for the matching flag '^'. > * genmatch.cc (cmp_operand): Add match_phi comparation. > (dt_node::gen_kids_1): Add cond_expr bool flag for phi match. > (dt_operand::gen_phi_on_cond): Add new func to gen phi matching > on cond_expr. > (parser::parse_expr): Add handling for the expr flag '^'. > * match.pd: Add more form for unsigned .SAT_ADD. > * tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Add > new func impl to build call for phi gimple. > (match_unsigned_saturation_add): Add new func impl to match the > .SAT_ADD for phi gimple. > (math_opts_dom_walker::after_dom_children): Add phi matching > try for all gimple phi stmt. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/doc/match-and-simplify.texi | 16 ++++ > gcc/genmatch.cc | 126 +++++++++++++++++++++++++++++++- > gcc/match.pd | 43 ++++++++++- > gcc/tree-ssa-math-opts.cc | 56 +++++++++++++- > 4 files changed, 236 insertions(+), 5 deletions(-) > > diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi > index 01f19e2f62c..63d5af159f5 100644 > --- a/gcc/doc/match-and-simplify.texi > +++ b/gcc/doc/match-and-simplify.texi > @@ -361,6 +361,22 @@ Usually the types of the generated result expressions are > determined from the context, but sometimes like in the above case > it is required that you specify them explicitly. > > +Another modifier for generated expressions is @code{^} which > +tells the machinery to try more matches for some special cases. > +For example, normally the @code{cond} only allows the gimple > +assign when matching. It will also try to match the gimple @code{PHI} > +besides gimple assign if appending the @code{^} to the @code{cond}. > +Aka @code{cond^}. Consider below example > + > +@smallexample > +(match (unsigned_sat_add @@0 @@1) > + (cond^ (ge (plus:c@@2 @@0 @@1) @@0) @@2 integer_minus_onep)) > +@end smallexample > + > +The above matching will generate the predicate function named > +@code{gimple_unsigned_sat_add} that accepts both the gimple > +assign and gimple @code{PHI}. > + > Another modifier for generated expressions is @code{!} which > tells the machinery to only consider the simplification in case > the marked expression simplified to a simple operand. Consider > diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc > index f1e0e7abe0c..a56bd90cb2c 100644 > --- a/gcc/genmatch.cc > +++ b/gcc/genmatch.cc > @@ -848,12 +848,13 @@ public: > : operand (OP_EXPR, loc), operation (operation_), > ops (vNULL), expr_type (NULL), is_commutative (is_commutative_), > is_generic (false), force_single_use (false), force_leaf (false), > - opt_grp (0) {} > + match_phi (false), opt_grp (0) {} > expr (expr *e) > : operand (OP_EXPR, e->location), operation (e->operation), > ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative), > is_generic (e->is_generic), force_single_use (e->force_single_use), > - force_leaf (e->force_leaf), opt_grp (e->opt_grp) {} > + force_leaf (e->force_leaf), match_phi (e->match_phi), > + opt_grp (e->opt_grp) {} > void append_op (operand *op) { ops.safe_push (op); } > /* The operator and its operands. */ > id_base *operation; > @@ -871,6 +872,8 @@ public: > /* Whether in the result expression this should be a leaf node > with any children simplified down to simple operands. */ > bool force_leaf; > + /* Whether the COND_EXPR is matching PHI gimple, default false. */ > + bool match_phi; > /* If non-zero, the group for optional handling. */ > unsigned char opt_grp; > void gen_transform (FILE *f, int, const char *, bool, int, > @@ -1819,6 +1822,7 @@ public: > > char *get_name (char *); > void gen_opname (char *, unsigned); > + void gen_phi_on_cond (FILE *, int, int); > }; > > /* Leaf node of the decision tree, used for DT_SIMPLIFY. */ > @@ -1898,7 +1902,8 @@ cmp_operand (operand *o1, operand *o2) > expr *e1 = static_cast<expr *>(o1); > expr *e2 = static_cast<expr *>(o2); > if (e1->operation != e2->operation > - || e1->is_generic != e2->is_generic) > + || e1->is_generic != e2->is_generic > + || e1->match_phi != e2->match_phi) > return false; > if (e1->operation->kind == id_base::FN > /* For function calls also compare number of arguments. */ > @@ -3251,6 +3256,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, > depth); > indent += 4; > fprintf_indent (f, indent, "{\n"); > + bool cond_expr_p = false; > for (unsigned i = 0; i < exprs_len; ++i) > { > expr *e = as_a <expr *> (gimple_exprs[i]->op); > @@ -3262,6 +3268,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, > else > { > id_base *op = e->operation; > + cond_expr_p |= (*op == COND_EXPR && e->match_phi); > if (*op == CONVERT_EXPR || *op == NOP_EXPR) > fprintf_indent (f, indent, "CASE_CONVERT:\n"); > else > @@ -3275,6 +3282,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, > fprintf_indent (f, indent, "default:;\n"); > fprintf_indent (f, indent, "}\n"); > indent -= 4; > + > + if (cond_expr_p) > + { > + fprintf_indent (f, indent, > + "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n", > + depth, depth); > + indent += 2; > + fprintf_indent (f, indent, "{\n"); > + indent += 2; > + > + for (unsigned i = 0; i < exprs_len; i++) > + { > + expr *e = as_a <expr *> (gimple_exprs[i]->op); > + if (*e->operation == COND_EXPR && e->match_phi) > + gimple_exprs[i]->gen_phi_on_cond (f, indent, depth); > + } > + > + indent -= 2; > + fprintf_indent (f, indent, "}\n"); > + indent -= 2; > + } > } > > if (fns_len) > @@ -3483,6 +3511,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth) > } > } > > +/* Generate matching code for the phi when meet COND_EXPR. */ > + > +void > +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) > +{ > + fprintf_indent (f, indent, > + "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); > + > + fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth); > + > + indent += 2; > + fprintf_indent (f, indent, "{\n"); > + indent += 2; > + > + fprintf_indent (f, indent, > + "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); > + fprintf_indent (f, indent, > + "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); > + fprintf_indent (f, indent, > + "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? " > + "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth); > + fprintf_indent (f, indent, > + "basic_block _other_db_%d = safe_dyn_cast <gcond *> " > + "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n", > + depth, depth, depth, depth); > + > + fprintf_indent (f, indent, > + "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n", > + depth, depth); > + fprintf_indent (f, indent, "if (_ct_%d" > + " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth); > + fprintf_indent (f, indent, > + " && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth); > + fprintf_indent (f, indent, > + " && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth); > + > + indent += 2; > + fprintf_indent (f, indent, "{\n"); > + indent += 2; > + > + fprintf_indent (f, indent, > + "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth); > + fprintf_indent (f, indent, > + "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth); > + > + char opname_0[20]; > + char opname_1[20]; > + char opname_2[20]; > + gen_opname (opname_0, 0); > + > + fprintf_indent (f, indent, > + "tree %s = build2 (gimple_cond_code (_ct_%d), " > + "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n", > + opname_0, depth, depth, depth); > + > + fprintf_indent (f, indent, > + "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags" > + " & EDGE_TRUE_VALUE;\n", depth, depth); > + > + gen_opname (opname_1, 1); > + fprintf_indent (f, indent, > + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n", > + opname_1, depth, depth); > + > + gen_opname (opname_2, 2); > + fprintf_indent (f, indent, > + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n", > + opname_2, depth, depth); > + > + gen_kids (f, indent, true, depth); > + > + indent -= 2; > + fprintf_indent (f, indent, "}\n"); > + indent -= 2; > + > + indent -= 2; > + fprintf_indent (f, indent, "}\n"); > + indent -= 2; > +} > + > /* Emit a logging call to the debug file to the file F, with the INDENT from > either the RESULT location or the S's match location if RESULT is null. */ > static void > @@ -4600,6 +4708,18 @@ parser::parse_expr () > e->force_leaf = true; > } > > + if (token->type == CPP_XOR && !(token->flags & PREV_WHITE)) > + { > + if (!parsing_match_operand) > + fatal_at (token, "modifier '^' is only valid in a match expression"); > + > + if (!(*e->operation == COND_EXPR)) > + fatal_at (token, "modifier '^' can only act on operation COND_EXPR"); > + > + eat_token (CPP_XOR); > + e->match_phi = true; > + } > + > if (token->type == CPP_COLON > && !(token->flags & PREV_WHITE)) > { > diff --git a/gcc/match.pd b/gcc/match.pd > index 7c1ad428a3c..5f36c1fac82 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > /* Unsigned Saturation Add */ > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: > + SAT_ADD = (X + Y) | -((X + Y) < X) */ > (match (usadd_left_part_1 @0 @1) > (plus:c @0 @1) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ > (match (usadd_left_part_2 @0 @1) > (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: > + SAT_ADD = (X + Y) | -((type)(X + Y) < X) */ > (match (usadd_right_part_1 @0 @1) > (negate (convert (lt (plus:c @0 @1) @0))) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: > + SAT_ADD = (X + Y) | -(X > (X + Y)) */ > (match (usadd_right_part_1 @0 @1) > (negate (convert (gt @0 (plus:c @0 @1)))) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ > (match (usadd_right_part_2 @0 @1) > (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop))) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */ > +(match (usadd_right_part_2 @0 @1) > + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1))) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) > + > /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > because the sub part of left_part_2 cannot work with right_part_1. > For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (match (unsigned_integer_sat_add @0 @1) > (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW): > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ > (match (unsigned_integer_sat_add @0 @1) > (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > +/* Unsigned saturation add, case 3 (branch with ge): > + SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep)) > + > +/* Unsigned saturation add, case 4 (branch with lt): > + SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) > + > +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): > + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) > + (usadd_left_part_2 @0 @1) integer_minus_onep)) > + > +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW): > + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW. */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) > + integer_minus_onep (usadd_left_part_2 @0 @1))) > + > /* Unsigned saturation sub, case 1 (branch with gt): > SAT_U_SUB = X > Y ? X - Y : 0 */ > (match (unsigned_integer_sat_sub @0 @1) > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index 469e34d828d..173b0366f5e 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4101,8 +4101,24 @@ build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, > } > } > > +static void > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, > + internal_fn fn, tree lhs, tree op_0, > + tree op_1) > +{ > + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) > + { > + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); > + gimple_call_set_lhs (call, lhs); > + gsi_insert_before (gsi, call, GSI_SAME_STMT); > + > + gimple_stmt_iterator psi = gsi_for_stmt (phi); > + remove_phi_node (&psi, /* release_lhs_p */ false); > + } > +} > + > /* > - * Try to match saturation unsigned add. > + * Try to match saturation unsigned add with assign. > * _7 = _4 + _6; > * _8 = _4 > _7; > * _9 = (long unsigned int) _8; > @@ -4121,6 +4137,37 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) > build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); > } > > +/* > + * Try to match saturation unsigned add with PHI. > + * <bb 2> : > + * _1 = x_3(D) + y_4(D); > + * if (_1 >= x_3(D)) > + * goto <bb 3>; [INV] > + * else > + * goto <bb 4>; [INV] > + * > + * <bb 3> : > + * > + * <bb 4> : > + * # _2 = PHI <255(2), _1(3)> > + * => > + * <bb 4> [local count: 1073741824]: > + * _2 = .SAT_ADD (x_4(D), y_5(D)); */ > + > +static void > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi) > +{ > + if (gimple_phi_num_args (phi) != 2) > + return; > + > + tree ops[2]; > + tree phi_result = gimple_phi_result (phi); > + > + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)) > + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result, > + ops[0], ops[1]); > +} > + > /* > * Try to match saturation unsigned sub. > * _1 = _4 >= _5; > @@ -6052,6 +6099,13 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > > fma_deferring_state fma_state (param_avoid_fma_max_bits > 0); > > + for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); > + gsi_next (&psi)) > + { > + gimple_stmt_iterator gsi = gsi_last_bb (bb); > + match_unsigned_saturation_add (&gsi, psi.phi ()); > + } > + > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) > { > gimple *stmt = gsi_stmt (gsi); > -- > 2.34.1 >
Committed, thanks Richard. Pan -----Original Message----- From: Richard Biener <richard.guenther@gmail.com> Sent: Thursday, June 6, 2024 10:04 PM To: Li, Pan2 <pan2.li@intel.com> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: Re: [PATCH v7] Match: Support more form for scalar unsigned SAT_ADD On Thu, Jun 6, 2024 at 3:37 PM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > After we support one gassign form of the unsigned .SAT_ADD, we > would like to support more forms including both the branch and > branchless. There are 5 other forms of .SAT_ADD, list as below: > > Form 1: > #define SAT_ADD_U_1(T) \ > T sat_add_u_1_##T(T x, T y) \ > { \ > return (T)(x + y) >= x ? (x + y) : -1; \ > } > > Form 2: > #define SAT_ADD_U_2(T) \ > T sat_add_u_2_##T(T x, T y) \ > { \ > T ret; \ > T overflow = __builtin_add_overflow (x, y, &ret); \ > return (T)(-overflow) | ret; \ > } > > Form 3: > #define SAT_ADD_U_3(T) \ > T sat_add_u_3_##T (T x, T y) \ > { \ > T ret; \ > return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \ > } > > Form 4: > #define SAT_ADD_U_4(T) \ > T sat_add_u_4_##T (T x, T y) \ > { \ > T ret; \ > return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \ > } > > Form 5: > #define SAT_ADD_U_5(T) \ > T sat_add_u_5_##T(T x, T y) \ > { \ > return (T)(x + y) < x ? -1 : (x + y); \ > } > > Take the forms 3 of above as example: > > uint64_t > sat_add (uint64_t x, uint64_t y) > { > uint64_t ret; > return __builtin_add_overflow (x, y, &ret) ? -1 : ret; > } > > Before this patch: > uint64_t sat_add (uint64_t x, uint64_t y) > { > long unsigned int _1; > long unsigned int _2; > uint64_t _3; > __complex__ long unsigned int _6; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _6 = .ADD_OVERFLOW (x_4(D), y_5(D)); > _2 = IMAGPART_EXPR <_6>; > if (_2 != 0) > goto <bb 4>; [35.00%] > else > goto <bb 3>; [65.00%] > ;; succ: 4 > ;; 3 > > ;; basic block 3, loop depth 0 > ;; pred: 2 > _1 = REALPART_EXPR <_6>; > ;; succ: 4 > > ;; basic block 4, loop depth 0 > ;; pred: 3 > ;; 2 > # _3 = PHI <_1(3), 18446744073709551615(2)> > return _3; > ;; succ: EXIT > } > > After this patch: > uint64_t sat_add (uint64_t x, uint64_t y) > { > long unsigned int _12; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call] > return _12; > ;; succ: EXIT > } > > The flag '^' acts on cond_expr will generate matching code similar as below: > > else if (gphi *_a1 = dyn_cast <gphi *> (_d1)) > { > basic_block _b1 = gimple_bb (_a1); > if (gimple_phi_num_args (_a1) == 2) > { > basic_block _pb_0_1 = EDGE_PRED (_b1, 0)->src; > basic_block _pb_1_1 = EDGE_PRED (_b1, 1)->src; > basic_block _db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) > ? _pb_0_1 : _pb_1_1; > basic_block _other_db_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_1)) > ? _pb_1_1 : _pb_0_1; > gcond *_ct_1 = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_1)); > if (_ct_1 && EDGE_COUNT (_other_db_1->preds) == 1 > && EDGE_COUNT (_other_db_1->succs) == 1 > && EDGE_PRED (_other_db_1, 0)->src == _db_1) > { > tree _cond_lhs_1 = gimple_cond_lhs (_ct_1); > tree _cond_rhs_1 = gimple_cond_rhs (_ct_1); > tree _p0 = build2 (gimple_cond_code (_ct_1), boolean_type_node, > _cond_lhs_1, _cond_rhs_1); > bool _arg_0_is_true_1 = gimple_phi_arg_edge (_a1, 0)->flags & EDGE_TRUE_VALUE; > tree _p1 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 0 : 1); > tree _p2 = gimple_phi_arg_def (_a1, _arg_0_is_true_1 ? 1 : 0); > .... > > The below test suites are passed for this patch. > * The x86 bootstrap test. > * The x86 fully regression test. > * The riscv fully regression test. OK. Thanks, Richard. > gcc/ChangeLog: > > * doc/match-and-simplify.texi: Add doc for the matching flag '^'. > * genmatch.cc (cmp_operand): Add match_phi comparation. > (dt_node::gen_kids_1): Add cond_expr bool flag for phi match. > (dt_operand::gen_phi_on_cond): Add new func to gen phi matching > on cond_expr. > (parser::parse_expr): Add handling for the expr flag '^'. > * match.pd: Add more form for unsigned .SAT_ADD. > * tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Add > new func impl to build call for phi gimple. > (match_unsigned_saturation_add): Add new func impl to match the > .SAT_ADD for phi gimple. > (math_opts_dom_walker::after_dom_children): Add phi matching > try for all gimple phi stmt. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/doc/match-and-simplify.texi | 16 ++++ > gcc/genmatch.cc | 126 +++++++++++++++++++++++++++++++- > gcc/match.pd | 43 ++++++++++- > gcc/tree-ssa-math-opts.cc | 56 +++++++++++++- > 4 files changed, 236 insertions(+), 5 deletions(-) > > diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi > index 01f19e2f62c..63d5af159f5 100644 > --- a/gcc/doc/match-and-simplify.texi > +++ b/gcc/doc/match-and-simplify.texi > @@ -361,6 +361,22 @@ Usually the types of the generated result expressions are > determined from the context, but sometimes like in the above case > it is required that you specify them explicitly. > > +Another modifier for generated expressions is @code{^} which > +tells the machinery to try more matches for some special cases. > +For example, normally the @code{cond} only allows the gimple > +assign when matching. It will also try to match the gimple @code{PHI} > +besides gimple assign if appending the @code{^} to the @code{cond}. > +Aka @code{cond^}. Consider below example > + > +@smallexample > +(match (unsigned_sat_add @@0 @@1) > + (cond^ (ge (plus:c@@2 @@0 @@1) @@0) @@2 integer_minus_onep)) > +@end smallexample > + > +The above matching will generate the predicate function named > +@code{gimple_unsigned_sat_add} that accepts both the gimple > +assign and gimple @code{PHI}. > + > Another modifier for generated expressions is @code{!} which > tells the machinery to only consider the simplification in case > the marked expression simplified to a simple operand. Consider > diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc > index f1e0e7abe0c..a56bd90cb2c 100644 > --- a/gcc/genmatch.cc > +++ b/gcc/genmatch.cc > @@ -848,12 +848,13 @@ public: > : operand (OP_EXPR, loc), operation (operation_), > ops (vNULL), expr_type (NULL), is_commutative (is_commutative_), > is_generic (false), force_single_use (false), force_leaf (false), > - opt_grp (0) {} > + match_phi (false), opt_grp (0) {} > expr (expr *e) > : operand (OP_EXPR, e->location), operation (e->operation), > ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative), > is_generic (e->is_generic), force_single_use (e->force_single_use), > - force_leaf (e->force_leaf), opt_grp (e->opt_grp) {} > + force_leaf (e->force_leaf), match_phi (e->match_phi), > + opt_grp (e->opt_grp) {} > void append_op (operand *op) { ops.safe_push (op); } > /* The operator and its operands. */ > id_base *operation; > @@ -871,6 +872,8 @@ public: > /* Whether in the result expression this should be a leaf node > with any children simplified down to simple operands. */ > bool force_leaf; > + /* Whether the COND_EXPR is matching PHI gimple, default false. */ > + bool match_phi; > /* If non-zero, the group for optional handling. */ > unsigned char opt_grp; > void gen_transform (FILE *f, int, const char *, bool, int, > @@ -1819,6 +1822,7 @@ public: > > char *get_name (char *); > void gen_opname (char *, unsigned); > + void gen_phi_on_cond (FILE *, int, int); > }; > > /* Leaf node of the decision tree, used for DT_SIMPLIFY. */ > @@ -1898,7 +1902,8 @@ cmp_operand (operand *o1, operand *o2) > expr *e1 = static_cast<expr *>(o1); > expr *e2 = static_cast<expr *>(o2); > if (e1->operation != e2->operation > - || e1->is_generic != e2->is_generic) > + || e1->is_generic != e2->is_generic > + || e1->match_phi != e2->match_phi) > return false; > if (e1->operation->kind == id_base::FN > /* For function calls also compare number of arguments. */ > @@ -3251,6 +3256,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, > depth); > indent += 4; > fprintf_indent (f, indent, "{\n"); > + bool cond_expr_p = false; > for (unsigned i = 0; i < exprs_len; ++i) > { > expr *e = as_a <expr *> (gimple_exprs[i]->op); > @@ -3262,6 +3268,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, > else > { > id_base *op = e->operation; > + cond_expr_p |= (*op == COND_EXPR && e->match_phi); > if (*op == CONVERT_EXPR || *op == NOP_EXPR) > fprintf_indent (f, indent, "CASE_CONVERT:\n"); > else > @@ -3275,6 +3282,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, > fprintf_indent (f, indent, "default:;\n"); > fprintf_indent (f, indent, "}\n"); > indent -= 4; > + > + if (cond_expr_p) > + { > + fprintf_indent (f, indent, > + "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n", > + depth, depth); > + indent += 2; > + fprintf_indent (f, indent, "{\n"); > + indent += 2; > + > + for (unsigned i = 0; i < exprs_len; i++) > + { > + expr *e = as_a <expr *> (gimple_exprs[i]->op); > + if (*e->operation == COND_EXPR && e->match_phi) > + gimple_exprs[i]->gen_phi_on_cond (f, indent, depth); > + } > + > + indent -= 2; > + fprintf_indent (f, indent, "}\n"); > + indent -= 2; > + } > } > > if (fns_len) > @@ -3483,6 +3511,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth) > } > } > > +/* Generate matching code for the phi when meet COND_EXPR. */ > + > +void > +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) > +{ > + fprintf_indent (f, indent, > + "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); > + > + fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth); > + > + indent += 2; > + fprintf_indent (f, indent, "{\n"); > + indent += 2; > + > + fprintf_indent (f, indent, > + "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); > + fprintf_indent (f, indent, > + "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); > + fprintf_indent (f, indent, > + "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? " > + "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth); > + fprintf_indent (f, indent, > + "basic_block _other_db_%d = safe_dyn_cast <gcond *> " > + "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n", > + depth, depth, depth, depth); > + > + fprintf_indent (f, indent, > + "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n", > + depth, depth); > + fprintf_indent (f, indent, "if (_ct_%d" > + " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth); > + fprintf_indent (f, indent, > + " && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth); > + fprintf_indent (f, indent, > + " && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth); > + > + indent += 2; > + fprintf_indent (f, indent, "{\n"); > + indent += 2; > + > + fprintf_indent (f, indent, > + "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth); > + fprintf_indent (f, indent, > + "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth); > + > + char opname_0[20]; > + char opname_1[20]; > + char opname_2[20]; > + gen_opname (opname_0, 0); > + > + fprintf_indent (f, indent, > + "tree %s = build2 (gimple_cond_code (_ct_%d), " > + "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n", > + opname_0, depth, depth, depth); > + > + fprintf_indent (f, indent, > + "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags" > + " & EDGE_TRUE_VALUE;\n", depth, depth); > + > + gen_opname (opname_1, 1); > + fprintf_indent (f, indent, > + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n", > + opname_1, depth, depth); > + > + gen_opname (opname_2, 2); > + fprintf_indent (f, indent, > + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n", > + opname_2, depth, depth); > + > + gen_kids (f, indent, true, depth); > + > + indent -= 2; > + fprintf_indent (f, indent, "}\n"); > + indent -= 2; > + > + indent -= 2; > + fprintf_indent (f, indent, "}\n"); > + indent -= 2; > +} > + > /* Emit a logging call to the debug file to the file F, with the INDENT from > either the RESULT location or the S's match location if RESULT is null. */ > static void > @@ -4600,6 +4708,18 @@ parser::parse_expr () > e->force_leaf = true; > } > > + if (token->type == CPP_XOR && !(token->flags & PREV_WHITE)) > + { > + if (!parsing_match_operand) > + fatal_at (token, "modifier '^' is only valid in a match expression"); > + > + if (!(*e->operation == COND_EXPR)) > + fatal_at (token, "modifier '^' can only act on operation COND_EXPR"); > + > + eat_token (CPP_XOR); > + e->match_phi = true; > + } > + > if (token->type == CPP_COLON > && !(token->flags & PREV_WHITE)) > { > diff --git a/gcc/match.pd b/gcc/match.pd > index 7c1ad428a3c..5f36c1fac82 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > /* Unsigned Saturation Add */ > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: > + SAT_ADD = (X + Y) | -((X + Y) < X) */ > (match (usadd_left_part_1 @0 @1) > (plus:c @0 @1) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ > (match (usadd_left_part_2 @0 @1) > (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: > + SAT_ADD = (X + Y) | -((type)(X + Y) < X) */ > (match (usadd_right_part_1 @0 @1) > (negate (convert (lt (plus:c @0 @1) @0))) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: > + SAT_ADD = (X + Y) | -(X > (X + Y)) */ > (match (usadd_right_part_1 @0 @1) > (negate (convert (gt @0 (plus:c @0 @1)))) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ > (match (usadd_right_part_2 @0 @1) > (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop))) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)))) > > +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */ > +(match (usadd_right_part_2 @0 @1) > + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1))) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) > + > /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > because the sub part of left_part_2 cannot work with right_part_1. > For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (match (unsigned_integer_sat_add @0 @1) > (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW): > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or > + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ > (match (unsigned_integer_sat_add @0 @1) > (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > +/* Unsigned saturation add, case 3 (branch with ge): > + SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep)) > + > +/* Unsigned saturation add, case 4 (branch with lt): > + SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) > + > +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): > + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) > + (usadd_left_part_2 @0 @1) integer_minus_onep)) > + > +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW): > + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW. */ > +(match (unsigned_integer_sat_add @0 @1) > + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) > + integer_minus_onep (usadd_left_part_2 @0 @1))) > + > /* Unsigned saturation sub, case 1 (branch with gt): > SAT_U_SUB = X > Y ? X - Y : 0 */ > (match (unsigned_integer_sat_sub @0 @1) > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index 469e34d828d..173b0366f5e 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4101,8 +4101,24 @@ build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, > } > } > > +static void > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, > + internal_fn fn, tree lhs, tree op_0, > + tree op_1) > +{ > + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) > + { > + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); > + gimple_call_set_lhs (call, lhs); > + gsi_insert_before (gsi, call, GSI_SAME_STMT); > + > + gimple_stmt_iterator psi = gsi_for_stmt (phi); > + remove_phi_node (&psi, /* release_lhs_p */ false); > + } > +} > + > /* > - * Try to match saturation unsigned add. > + * Try to match saturation unsigned add with assign. > * _7 = _4 + _6; > * _8 = _4 > _7; > * _9 = (long unsigned int) _8; > @@ -4121,6 +4137,37 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) > build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); > } > > +/* > + * Try to match saturation unsigned add with PHI. > + * <bb 2> : > + * _1 = x_3(D) + y_4(D); > + * if (_1 >= x_3(D)) > + * goto <bb 3>; [INV] > + * else > + * goto <bb 4>; [INV] > + * > + * <bb 3> : > + * > + * <bb 4> : > + * # _2 = PHI <255(2), _1(3)> > + * => > + * <bb 4> [local count: 1073741824]: > + * _2 = .SAT_ADD (x_4(D), y_5(D)); */ > + > +static void > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi) > +{ > + if (gimple_phi_num_args (phi) != 2) > + return; > + > + tree ops[2]; > + tree phi_result = gimple_phi_result (phi); > + > + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)) > + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result, > + ops[0], ops[1]); > +} > + > /* > * Try to match saturation unsigned sub. > * _1 = _4 >= _5; > @@ -6052,6 +6099,13 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > > fma_deferring_state fma_state (param_avoid_fma_max_bits > 0); > > + for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); > + gsi_next (&psi)) > + { > + gimple_stmt_iterator gsi = gsi_last_bb (bb); > + match_unsigned_saturation_add (&gsi, psi.phi ()); > + } > + > for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) > { > gimple *stmt = gsi_stmt (gsi); > -- > 2.34.1 >
On Thu, 6 Jun 2024, Li, Pan2 wrote:
> Committed, thanks Richard.
This has broken glibc building for the `riscv64-linux-gnu' target:
iovsprintf.c: In function '__vsprintf_internal':
iovsprintf.c:34:1: error: definition in block 5 follows the use
34 | __vsprintf_internal (char *string, size_t maxlen,
| ^~~~~~~~~~~~~~~~~~~
for SSA_NAME: _9 in statement:
prephitmp_36 = (char *) _9;
during GIMPLE pass: widening_mul
iovsprintf.c:34:1: internal compiler error: verify_ssa failed
0x7fffb1474bf7 generic_start_main
../csu/libc-start.c:308
0x7fffb1474de3 __libc_start_main
../sysdeps/unix/sysv/linux/powerpc/libc-start.c:116
Please submit a full bug report, with preprocessed source (by using -freport-bug).
Please include the complete backtrace with any bug report.
See <https://gcc.gnu.org/bugs/> for instructions.
Use just `riscv64-linux-gnu -O2' on the attached preprocessed source to
reproduce. This builds just fine up to e14afbe2d1c6^ and then breaks with
the message quoted.
Shall I investigate it further or will you be able to figure it out with
the information supplied what's wrong?
Maciej
Could you please help to update the upstream for another try ? Should be fixed by this commit https://github.com/gcc-mirror/gcc/commit/d03ff3fd3e2da1352a404e3c53fe61314569345c. Feel free to ping me if any questions or concerns. Pan -----Original Message----- From: Maciej W. Rozycki <macro@orcam.me.uk> Sent: Thursday, June 13, 2024 8:01 PM To: Li, Pan2 <pan2.li@intel.com> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: RE: [PATCH v7] Match: Support more form for scalar unsigned SAT_ADD On Thu, 6 Jun 2024, Li, Pan2 wrote: > Committed, thanks Richard. This has broken glibc building for the `riscv64-linux-gnu' target: iovsprintf.c: In function '__vsprintf_internal': iovsprintf.c:34:1: error: definition in block 5 follows the use 34 | __vsprintf_internal (char *string, size_t maxlen, | ^~~~~~~~~~~~~~~~~~~ for SSA_NAME: _9 in statement: prephitmp_36 = (char *) _9; during GIMPLE pass: widening_mul iovsprintf.c:34:1: internal compiler error: verify_ssa failed 0x7fffb1474bf7 generic_start_main ../csu/libc-start.c:308 0x7fffb1474de3 __libc_start_main ../sysdeps/unix/sysv/linux/powerpc/libc-start.c:116 Please submit a full bug report, with preprocessed source (by using -freport-bug). Please include the complete backtrace with any bug report. See <https://gcc.gnu.org/bugs/> for instructions. Use just `riscv64-linux-gnu -O2' on the attached preprocessed source to reproduce. This builds just fine up to e14afbe2d1c6^ and then breaks with the message quoted. Shall I investigate it further or will you be able to figure it out with the information supplied what's wrong? Maciej
On Thu, 13 Jun 2024, Li, Pan2 wrote: > Could you please help to update the upstream for another try ? > > Should be fixed by this commit https://github.com/gcc-mirror/gcc/commit/d03ff3fd3e2da1352a404e3c53fe61314569345c. > > Feel free to ping me if any questions or concerns. Upstream master (as at 609764a42f0c) doesn't build: In file included from .../gcc/gcc/coretypes.h:487, from .../gcc/gcc/tree-vect-stmts.cc:24: In member function 'bool poly_int<N, T>::is_constant() const [with unsigned int N = 2; C = long unsigned int]', inlined from 'C poly_int<N, T>::to_constant() const [with unsigned int N = 2; C = long unsigned int]' at .../gcc/gcc/poly-int.h:588:3, inlined from 'bool get_group_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2155:39, inlined from 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2387:38: .../gcc/gcc/poly-int.h:557:7: error: 'remain.poly_int<2, long unsigned int>::coeffs[1]' may be used uninitialized [-Werror=maybe-uninitialized] 557 | if (this->coeffs[i] != 0) | ^~ .../gcc/gcc/tree-vect-stmts.cc: In function 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)': .../gcc/gcc/tree-vect-stmts.cc:2115:23: note: 'remain.poly_int<2, long unsigned int>::coeffs[1]' was declared here 2115 | poly_uint64 remain; | ^~~~~~ In file included from .../gcc/gcc/system.h:1250, from .../gcc/gcc/tree-vect-stmts.cc:23: In function 'int ceil_log2(long unsigned int)', inlined from 'bool get_group_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2156:43, inlined from 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2387:38: .../gcc/gcc/hwint.h:266:17: error: 'remain.poly_int<2, long unsigned int>::coeffs[0]' may be used uninitialized [-Werror=maybe-uninitialized] 266 | return x == 0 ? 0 : floor_log2 (x - 1) + 1; | ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~ .../gcc/gcc/tree-vect-stmts.cc: In function 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)': .../gcc/gcc/tree-vect-stmts.cc:2115:23: note: 'remain.poly_int<2, long unsigned int>::coeffs[0]' was declared here 2115 | poly_uint64 remain; | ^~~~~~ cc1plus: all warnings being treated as errors make[2]: *** [Makefile:1198: tree-vect-stmts.o] Error 1 and actually e14afbe2d1c6^ doesn't build either (I guess it's just slipped through bisection as the file didn't have to be rebuild or something): In file included from .../gcc/gcc/rtl.h:3973, from .../gcc/gcc/config/riscv/riscv.cc:31: In function 'rtx_def* init_rtx_fmt_ee(rtx, machine_mode, rtx, rtx)', inlined from 'rtx_def* gen_rtx_fmt_ee_stat(rtx_code, machine_mode, rtx, rtx)' at ./genrtl.h:50:26, inlined from 'void riscv_move_integer(rtx, rtx, long int, machine_mode)' at .../gcc/gcc/config/riscv/riscv.cc:2786:10: ./genrtl.h:37:16: error: 'x' may be used uninitialized [-Werror=maybe-uninitialized] 37 | XEXP (rt, 0) = arg0; .../gcc/gcc/config/riscv/riscv.cc: In function 'void riscv_move_integer(rtx, rtx, long int, machine_mode)': .../gcc/gcc/config/riscv/riscv.cc:2723:7: note: 'x' was declared here 2723 | rtx x; | ^ cc1plus: all warnings being treated as errors make[2]: *** [Makefile:2563: riscv.o] Error 1 I hope you'll find this all useful. As it happens I don't need to verify my needs with a RISC-V target anymore, so I'm leaving it all up to you now as I need to switch back to Alpha, which has been my actual objective, and these rebuilds have taken a lot of my attention already. Thank you for your input. Maciej
Thanks for another try. Looks the build failure list below has nothing to do with this patch. I think the correlated owner will take care of this Werror build issue soon. Pan -----Original Message----- From: Maciej W. Rozycki <macro@orcam.me.uk> Sent: Friday, June 14, 2024 12:15 AM To: Li, Pan2 <pan2.li@intel.com> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: RE: [PATCH v7] Match: Support more form for scalar unsigned SAT_ADD On Thu, 13 Jun 2024, Li, Pan2 wrote: > Could you please help to update the upstream for another try ? > > Should be fixed by this commit https://github.com/gcc-mirror/gcc/commit/d03ff3fd3e2da1352a404e3c53fe61314569345c. > > Feel free to ping me if any questions or concerns. Upstream master (as at 609764a42f0c) doesn't build: In file included from .../gcc/gcc/coretypes.h:487, from .../gcc/gcc/tree-vect-stmts.cc:24: In member function 'bool poly_int<N, T>::is_constant() const [with unsigned int N = 2; C = long unsigned int]', inlined from 'C poly_int<N, T>::to_constant() const [with unsigned int N = 2; C = long unsigned int]' at .../gcc/gcc/poly-int.h:588:3, inlined from 'bool get_group_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2155:39, inlined from 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2387:38: .../gcc/gcc/poly-int.h:557:7: error: 'remain.poly_int<2, long unsigned int>::coeffs[1]' may be used uninitialized [-Werror=maybe-uninitialized] 557 | if (this->coeffs[i] != 0) | ^~ .../gcc/gcc/tree-vect-stmts.cc: In function 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)': .../gcc/gcc/tree-vect-stmts.cc:2115:23: note: 'remain.poly_int<2, long unsigned int>::coeffs[1]' was declared here 2115 | poly_uint64 remain; | ^~~~~~ In file included from .../gcc/gcc/system.h:1250, from .../gcc/gcc/tree-vect-stmts.cc:23: In function 'int ceil_log2(long unsigned int)', inlined from 'bool get_group_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2156:43, inlined from 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2387:38: .../gcc/gcc/hwint.h:266:17: error: 'remain.poly_int<2, long unsigned int>::coeffs[0]' may be used uninitialized [-Werror=maybe-uninitialized] 266 | return x == 0 ? 0 : floor_log2 (x - 1) + 1; | ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~ .../gcc/gcc/tree-vect-stmts.cc: In function 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)': .../gcc/gcc/tree-vect-stmts.cc:2115:23: note: 'remain.poly_int<2, long unsigned int>::coeffs[0]' was declared here 2115 | poly_uint64 remain; | ^~~~~~ cc1plus: all warnings being treated as errors make[2]: *** [Makefile:1198: tree-vect-stmts.o] Error 1 and actually e14afbe2d1c6^ doesn't build either (I guess it's just slipped through bisection as the file didn't have to be rebuild or something): In file included from .../gcc/gcc/rtl.h:3973, from .../gcc/gcc/config/riscv/riscv.cc:31: In function 'rtx_def* init_rtx_fmt_ee(rtx, machine_mode, rtx, rtx)', inlined from 'rtx_def* gen_rtx_fmt_ee_stat(rtx_code, machine_mode, rtx, rtx)' at ./genrtl.h:50:26, inlined from 'void riscv_move_integer(rtx, rtx, long int, machine_mode)' at .../gcc/gcc/config/riscv/riscv.cc:2786:10: ./genrtl.h:37:16: error: 'x' may be used uninitialized [-Werror=maybe-uninitialized] 37 | XEXP (rt, 0) = arg0; .../gcc/gcc/config/riscv/riscv.cc: In function 'void riscv_move_integer(rtx, rtx, long int, machine_mode)': .../gcc/gcc/config/riscv/riscv.cc:2723:7: note: 'x' was declared here 2723 | rtx x; | ^ cc1plus: all warnings being treated as errors make[2]: *** [Makefile:2563: riscv.o] Error 1 I hope you'll find this all useful. As it happens I don't need to verify my needs with a RISC-V target anymore, so I'm leaving it all up to you now as I need to switch back to Alpha, which has been my actual objective, and these rebuilds have taken a lot of my attention already. Thank you for your input. Maciej
On Fri, Jun 14, 2024 at 3:21 AM Li, Pan2 <pan2.li@intel.com> wrote: > > Thanks for another try. > > Looks the build failure list below has nothing to do with this patch. I think the correlated owner will take care of this Werror build issue soon. > > Pan > > -----Original Message----- > From: Maciej W. Rozycki <macro@orcam.me.uk> > Sent: Friday, June 14, 2024 12:15 AM > To: Li, Pan2 <pan2.li@intel.com> > Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com > Subject: RE: [PATCH v7] Match: Support more form for scalar unsigned SAT_ADD > > On Thu, 13 Jun 2024, Li, Pan2 wrote: > > > Could you please help to update the upstream for another try ? > > > > Should be fixed by this commit https://github.com/gcc-mirror/gcc/commit/d03ff3fd3e2da1352a404e3c53fe61314569345c. > > > > Feel free to ping me if any questions or concerns. > > Upstream master (as at 609764a42f0c) doesn't build: > > In file included from .../gcc/gcc/coretypes.h:487, > from .../gcc/gcc/tree-vect-stmts.cc:24: > In member function 'bool poly_int<N, T>::is_constant() const [with unsigned int N = 2; C = long unsigned int]', > inlined from 'C poly_int<N, T>::to_constant() const [with unsigned int N = 2; C = long unsigned int]' at .../gcc/gcc/poly-int.h:588:3, > inlined from 'bool get_group_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2155:39, > inlined from 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2387:38: > .../gcc/gcc/poly-int.h:557:7: error: 'remain.poly_int<2, long unsigned int>::coeffs[1]' may be used uninitialized [-Werror=maybe-uninitialized] > 557 | if (this->coeffs[i] != 0) > | ^~ > .../gcc/gcc/tree-vect-stmts.cc: In function 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)': > .../gcc/gcc/tree-vect-stmts.cc:2115:23: note: 'remain.poly_int<2, long unsigned int>::coeffs[1]' was declared here > 2115 | poly_uint64 remain; > | ^~~~~~ > In file included from .../gcc/gcc/system.h:1250, > from .../gcc/gcc/tree-vect-stmts.cc:23: > In function 'int ceil_log2(long unsigned int)', > inlined from 'bool get_group_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2156:43, > inlined from 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)' at .../gcc/gcc/tree-vect-stmts.cc:2387:38: > .../gcc/gcc/hwint.h:266:17: error: 'remain.poly_int<2, long unsigned int>::coeffs[0]' may be used uninitialized [-Werror=maybe-uninitialized] > 266 | return x == 0 ? 0 : floor_log2 (x - 1) + 1; > | ~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~ > .../gcc/gcc/tree-vect-stmts.cc: In function 'bool get_load_store_type(vec_info*, stmt_vec_info, tree, slp_tree, bool, vec_load_store_type, unsigned int, vect_memory_access_type*, poly_int64*, dr_alignment_support*, int*, gather_scatter_info*, internal_fn*)': > .../gcc/gcc/tree-vect-stmts.cc:2115:23: note: 'remain.poly_int<2, long unsigned int>::coeffs[0]' was declared here > 2115 | poly_uint64 remain; > | ^~~~~~ > cc1plus: all warnings being treated as errors > make[2]: *** [Makefile:1198: tree-vect-stmts.o] Error 1 > > and actually e14afbe2d1c6^ doesn't build either (I guess it's just slipped > through bisection as the file didn't have to be rebuild or something): > > In file included from .../gcc/gcc/rtl.h:3973, > from .../gcc/gcc/config/riscv/riscv.cc:31: > In function 'rtx_def* init_rtx_fmt_ee(rtx, machine_mode, rtx, rtx)', > inlined from 'rtx_def* gen_rtx_fmt_ee_stat(rtx_code, machine_mode, rtx, rtx)' at ./genrtl.h:50:26, > inlined from 'void riscv_move_integer(rtx, rtx, long int, machine_mode)' at .../gcc/gcc/config/riscv/riscv.cc:2786:10: > ./genrtl.h:37:16: error: 'x' may be used uninitialized [-Werror=maybe-uninitialized] > 37 | XEXP (rt, 0) = arg0; > .../gcc/gcc/config/riscv/riscv.cc: In function 'void riscv_move_integer(rtx, rtx, long int, machine_mode)': > .../gcc/gcc/config/riscv/riscv.cc:2723:7: note: 'x' was declared here > 2723 | rtx x; > | ^ > cc1plus: all warnings being treated as errors > make[2]: *** [Makefile:2563: riscv.o] Error 1 > > I hope you'll find this all useful. As it happens I don't need to verify > my needs with a RISC-V target anymore, so I'm leaving it all up to you now > as I need to switch back to Alpha, which has been my actual objective, and > these rebuilds have taken a lot of my attention already. I'm including @@ -2148,15 +2148,17 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, ... if (!nunits.is_constant (&cnunits) || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf) - || ((cremain = remain.to_constant (), true) + || (((cremain = group_size * cvf - gap % cnunits), true) && ((cpart_size = (1 << ceil_log2 (cremain))) != cnunits) in a patch I'm testing that should eventually fix the above bogus diagnostic. > Thank you for your input. > > Maciej
On Fri, 14 Jun 2024, Richard Biener wrote: > > I hope you'll find this all useful. As it happens I don't need to verify > > my needs with a RISC-V target anymore, so I'm leaving it all up to you now > > as I need to switch back to Alpha, which has been my actual objective, and > > these rebuilds have taken a lot of my attention already. > > I'm including > > @@ -2148,15 +2148,17 @@ get_group_load_store_type (vec_info *vinfo, > stmt_vec_info stmt_info, > ... > if (!nunits.is_constant (&cnunits) > || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf) > - || ((cremain = remain.to_constant (), true) > + || (((cremain = group_size * cvf - gap % cnunits), true) > && ((cpart_size = (1 << ceil_log2 (cremain))) != cnunits) > > in a patch I'm testing that should eventually fix the above bogus diagnostic. Thank you. I can see your fix went in with e575b5c56137 and I can confirm that with: <https://patchwork.sourceware.org/project/gcc/patch/20240612232026.41780-1-patrick@rivosinc.com/> applied (and its patched-autoconf artefacts removed) `riscv64-linux-gnu' target now succesfully builds for me as at 1bb2535c7cb2. Maciej
diff --git a/gcc/doc/match-and-simplify.texi b/gcc/doc/match-and-simplify.texi index 01f19e2f62c..63d5af159f5 100644 --- a/gcc/doc/match-and-simplify.texi +++ b/gcc/doc/match-and-simplify.texi @@ -361,6 +361,22 @@ Usually the types of the generated result expressions are determined from the context, but sometimes like in the above case it is required that you specify them explicitly. +Another modifier for generated expressions is @code{^} which +tells the machinery to try more matches for some special cases. +For example, normally the @code{cond} only allows the gimple +assign when matching. It will also try to match the gimple @code{PHI} +besides gimple assign if appending the @code{^} to the @code{cond}. +Aka @code{cond^}. Consider below example + +@smallexample +(match (unsigned_sat_add @@0 @@1) + (cond^ (ge (plus:c@@2 @@0 @@1) @@0) @@2 integer_minus_onep)) +@end smallexample + +The above matching will generate the predicate function named +@code{gimple_unsigned_sat_add} that accepts both the gimple +assign and gimple @code{PHI}. + Another modifier for generated expressions is @code{!} which tells the machinery to only consider the simplification in case the marked expression simplified to a simple operand. Consider diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index f1e0e7abe0c..a56bd90cb2c 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -848,12 +848,13 @@ public: : operand (OP_EXPR, loc), operation (operation_), ops (vNULL), expr_type (NULL), is_commutative (is_commutative_), is_generic (false), force_single_use (false), force_leaf (false), - opt_grp (0) {} + match_phi (false), opt_grp (0) {} expr (expr *e) : operand (OP_EXPR, e->location), operation (e->operation), ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative), is_generic (e->is_generic), force_single_use (e->force_single_use), - force_leaf (e->force_leaf), opt_grp (e->opt_grp) {} + force_leaf (e->force_leaf), match_phi (e->match_phi), + opt_grp (e->opt_grp) {} void append_op (operand *op) { ops.safe_push (op); } /* The operator and its operands. */ id_base *operation; @@ -871,6 +872,8 @@ public: /* Whether in the result expression this should be a leaf node with any children simplified down to simple operands. */ bool force_leaf; + /* Whether the COND_EXPR is matching PHI gimple, default false. */ + bool match_phi; /* If non-zero, the group for optional handling. */ unsigned char opt_grp; void gen_transform (FILE *f, int, const char *, bool, int, @@ -1819,6 +1822,7 @@ public: char *get_name (char *); void gen_opname (char *, unsigned); + void gen_phi_on_cond (FILE *, int, int); }; /* Leaf node of the decision tree, used for DT_SIMPLIFY. */ @@ -1898,7 +1902,8 @@ cmp_operand (operand *o1, operand *o2) expr *e1 = static_cast<expr *>(o1); expr *e2 = static_cast<expr *>(o2); if (e1->operation != e2->operation - || e1->is_generic != e2->is_generic) + || e1->is_generic != e2->is_generic + || e1->match_phi != e2->match_phi) return false; if (e1->operation->kind == id_base::FN /* For function calls also compare number of arguments. */ @@ -3251,6 +3256,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, depth); indent += 4; fprintf_indent (f, indent, "{\n"); + bool cond_expr_p = false; for (unsigned i = 0; i < exprs_len; ++i) { expr *e = as_a <expr *> (gimple_exprs[i]->op); @@ -3262,6 +3268,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, else { id_base *op = e->operation; + cond_expr_p |= (*op == COND_EXPR && e->match_phi); if (*op == CONVERT_EXPR || *op == NOP_EXPR) fprintf_indent (f, indent, "CASE_CONVERT:\n"); else @@ -3275,6 +3282,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth, fprintf_indent (f, indent, "default:;\n"); fprintf_indent (f, indent, "}\n"); indent -= 4; + + if (cond_expr_p) + { + fprintf_indent (f, indent, + "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n", + depth, depth); + indent += 2; + fprintf_indent (f, indent, "{\n"); + indent += 2; + + for (unsigned i = 0; i < exprs_len; i++) + { + expr *e = as_a <expr *> (gimple_exprs[i]->op); + if (*e->operation == COND_EXPR && e->match_phi) + gimple_exprs[i]->gen_phi_on_cond (f, indent, depth); + } + + indent -= 2; + fprintf_indent (f, indent, "}\n"); + indent -= 2; + } } if (fns_len) @@ -3483,6 +3511,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth) } } +/* Generate matching code for the phi when meet COND_EXPR. */ + +void +dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth) +{ + fprintf_indent (f, indent, + "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth); + + fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth); + + indent += 2; + fprintf_indent (f, indent, "{\n"); + indent += 2; + + fprintf_indent (f, indent, + "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth); + fprintf_indent (f, indent, + "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth); + fprintf_indent (f, indent, + "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? " + "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth); + fprintf_indent (f, indent, + "basic_block _other_db_%d = safe_dyn_cast <gcond *> " + "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n", + depth, depth, depth, depth); + + fprintf_indent (f, indent, + "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n", + depth, depth); + fprintf_indent (f, indent, "if (_ct_%d" + " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth); + fprintf_indent (f, indent, + " && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth); + fprintf_indent (f, indent, + " && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth); + + indent += 2; + fprintf_indent (f, indent, "{\n"); + indent += 2; + + fprintf_indent (f, indent, + "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth); + fprintf_indent (f, indent, + "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth); + + char opname_0[20]; + char opname_1[20]; + char opname_2[20]; + gen_opname (opname_0, 0); + + fprintf_indent (f, indent, + "tree %s = build2 (gimple_cond_code (_ct_%d), " + "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n", + opname_0, depth, depth, depth); + + fprintf_indent (f, indent, + "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags" + " & EDGE_TRUE_VALUE;\n", depth, depth); + + gen_opname (opname_1, 1); + fprintf_indent (f, indent, + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n", + opname_1, depth, depth); + + gen_opname (opname_2, 2); + fprintf_indent (f, indent, + "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n", + opname_2, depth, depth); + + gen_kids (f, indent, true, depth); + + indent -= 2; + fprintf_indent (f, indent, "}\n"); + indent -= 2; + + indent -= 2; + fprintf_indent (f, indent, "}\n"); + indent -= 2; +} + /* Emit a logging call to the debug file to the file F, with the INDENT from either the RESULT location or the S's match location if RESULT is null. */ static void @@ -4600,6 +4708,18 @@ parser::parse_expr () e->force_leaf = true; } + if (token->type == CPP_XOR && !(token->flags & PREV_WHITE)) + { + if (!parsing_match_operand) + fatal_at (token, "modifier '^' is only valid in a match expression"); + + if (!(*e->operation == COND_EXPR)) + fatal_at (token, "modifier '^' can only act on operation COND_EXPR"); + + eat_token (CPP_XOR); + e->match_phi = true; + } + if (token->type == CPP_COLON && !(token->flags & PREV_WHITE)) { diff --git a/gcc/match.pd b/gcc/match.pd index 7c1ad428a3c..5f36c1fac82 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) /* Unsigned Saturation Add */ +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: + SAT_ADD = (X + Y) | -((X + Y) < X) */ (match (usadd_left_part_1 @0 @1) (plus:c @0 @1) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ (match (usadd_left_part_2 @0 @1) (realpart (IFN_ADD_OVERFLOW:c @0 @1)) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: + SAT_ADD = (X + Y) | -((type)(X + Y) < X) */ (match (usadd_right_part_1 @0 @1) (negate (convert (lt (plus:c @0 @1) @0))) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka: + SAT_ADD = (X + Y) | -(X > (X + Y)) */ (match (usadd_right_part_1 @0 @1) (negate (convert (gt @0 (plus:c @0 @1)))) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ (match (usadd_right_part_2 @0 @1) (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop))) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka: + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */ +(match (usadd_right_part_2 @0 @1) + (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1))) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 because the sub part of left_part_2 cannot work with right_part_1. For example, left_part_2 pattern focus one .ADD_OVERFLOW but the @@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_add @0 @1) (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) -/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW): + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or + SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */ (match (unsigned_integer_sat_add @0 @1) (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) +/* Unsigned saturation add, case 3 (branch with ge): + SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */ +(match (unsigned_integer_sat_add @0 @1) + (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep)) + +/* Unsigned saturation add, case 4 (branch with lt): + SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ +(match (unsigned_integer_sat_add @0 @1) + (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) + +/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ +(match (unsigned_integer_sat_add @0 @1) + (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) + (usadd_left_part_2 @0 @1) integer_minus_onep)) + +/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW): + SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW. */ +(match (unsigned_integer_sat_add @0 @1) + (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop) + integer_minus_onep (usadd_left_part_2 @0 @1))) + /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 469e34d828d..173b0366f5e 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4101,8 +4101,24 @@ build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, } } +static void +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, + internal_fn fn, tree lhs, tree op_0, + tree op_1) +{ + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) + { + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); + gimple_call_set_lhs (call, lhs); + gsi_insert_before (gsi, call, GSI_SAME_STMT); + + gimple_stmt_iterator psi = gsi_for_stmt (phi); + remove_phi_node (&psi, /* release_lhs_p */ false); + } +} + /* - * Try to match saturation unsigned add. + * Try to match saturation unsigned add with assign. * _7 = _4 + _6; * _8 = _4 > _7; * _9 = (long unsigned int) _8; @@ -4121,6 +4137,37 @@ match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); } +/* + * Try to match saturation unsigned add with PHI. + * <bb 2> : + * _1 = x_3(D) + y_4(D); + * if (_1 >= x_3(D)) + * goto <bb 3>; [INV] + * else + * goto <bb 4>; [INV] + * + * <bb 3> : + * + * <bb 4> : + * # _2 = PHI <255(2), _1(3)> + * => + * <bb 4> [local count: 1073741824]: + * _2 = .SAT_ADD (x_4(D), y_5(D)); */ + +static void +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gphi *phi) +{ + if (gimple_phi_num_args (phi) != 2) + return; + + tree ops[2]; + tree phi_result = gimple_phi_result (phi); + + if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)) + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_ADD, phi_result, + ops[0], ops[1]); +} + /* * Try to match saturation unsigned sub. * _1 = _4 >= _5; @@ -6052,6 +6099,13 @@ math_opts_dom_walker::after_dom_children (basic_block bb) fma_deferring_state fma_state (param_avoid_fma_max_bits > 0); + for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); + gsi_next (&psi)) + { + gimple_stmt_iterator gsi = gsi_last_bb (bb); + match_unsigned_saturation_add (&gsi, psi.phi ()); + } + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) { gimple *stmt = gsi_stmt (gsi);