@@ -361,6 +361,22 @@ Usually the types of the generated result expressions are
determined from the context, but sometimes like in the above case
it is required that you specify them explicitly.
+Another modifier for generated expressions is @code{^} which
+tells the machinery to try more matches for some special cases.
+For example, normally the @code{cond} only allows the gimple
+assign when matching. It will also try to match the gimple @code{PHI}
+besides gimple assign if appending the @code{^} to the @code{cond}.
+Aka @code{cond^}.
+
+@smallexample
+(match (unsigned_sat_add @0 @1)
+ (cond^ (ge (plus:c@2 @0 @1) @0) @2 integer_minus_onep))
+@end smallexample
+
+The above matching will generate the predicate function named
+@code{gimple_unsigned_sat_add} that accepts both the gimple
+assign and gimple @code{PHI}.
+
Another modifier for generated expressions is @code{!} which
tells the machinery to only consider the simplification in case
the marked expression simplified to a simple operand. Consider
@@ -838,6 +838,11 @@ public:
predicate_id *p;
};
+enum expr_flag {
+ EFLAG_NONE,
+ EFLAG_PHI_MATCH,
+};
+
/* An operand that constitutes an expression. Expressions include
function calls and user-defined predicate invocations. */
@@ -848,12 +853,12 @@ public:
: operand (OP_EXPR, loc), operation (operation_),
ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
is_generic (false), force_single_use (false), force_leaf (false),
- opt_grp (0) {}
+ opt_grp (0), eflag (EFLAG_NONE) {}
expr (expr *e)
: operand (OP_EXPR, e->location), operation (e->operation),
ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
is_generic (e->is_generic), force_single_use (e->force_single_use),
- force_leaf (e->force_leaf), opt_grp (e->opt_grp) {}
+ force_leaf (e->force_leaf), opt_grp (e->opt_grp), eflag (e->eflag) {}
void append_op (operand *op) { ops.safe_push (op); }
/* The operator and its operands. */
id_base *operation;
@@ -873,6 +878,9 @@ public:
bool force_leaf;
/* If non-zero, the group for optional handling. */
unsigned char opt_grp;
+ /* Indicate the flag of the expr, default is none. */
+ enum expr_flag eflag;
+
void gen_transform (FILE *f, int, const char *, bool, int,
const char *, capture_info *,
dt_operand ** = 0, int = 0) override;
@@ -1819,6 +1827,7 @@ public:
char *get_name (char *);
void gen_opname (char *, unsigned);
+ void gen_phi_on_cond (FILE *, int, int);
};
/* Leaf node of the decision tree, used for DT_SIMPLIFY. */
@@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
depth);
indent += 4;
fprintf_indent (f, indent, "{\n");
+ bool cond_expr_p = false, expr_flag_p = false;
for (unsigned i = 0; i < exprs_len; ++i)
{
expr *e = as_a <expr *> (gimple_exprs[i]->op);
@@ -3262,6 +3272,8 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
else
{
id_base *op = e->operation;
+ cond_expr_p |= *op == COND_EXPR;
+ expr_flag_p |= e->eflag != EFLAG_NONE;
if (*op == CONVERT_EXPR || *op == NOP_EXPR)
fprintf_indent (f, indent, "CASE_CONVERT:\n");
else
@@ -3275,6 +3287,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
fprintf_indent (f, indent, "default:;\n");
fprintf_indent (f, indent, "}\n");
indent -= 4;
+
+ if (cond_expr_p && expr_flag_p)
+ {
+ fprintf_indent (f, indent,
+ "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
+ depth, depth);
+ indent += 2;
+ fprintf_indent (f, indent, "{\n");
+ indent += 2;
+
+ for (unsigned i = 0; i < exprs_len; i++)
+ {
+ expr *e = as_a <expr *> (gimple_exprs[i]->op);
+ if (*e->operation == COND_EXPR && e->eflag == EFLAG_PHI_MATCH)
+ gimple_exprs[i]->gen_phi_on_cond (f, indent, depth);
+ }
+
+ indent -= 2;
+ fprintf_indent (f, indent, "}\n");
+ indent -= 2;
+ }
}
if (fns_len)
@@ -3483,6 +3516,86 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int depth)
}
}
+/* Generate matching code for the phi when meet COND_EXPR. */
+
+void
+dt_operand::gen_phi_on_cond (FILE *f, int indent, int depth)
+{
+ fprintf_indent (f, indent,
+ "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+
+ fprintf_indent (f, indent, "if (gimple_phi_num_args (_a%d) == 2)\n", depth);
+
+ indent += 2;
+ fprintf_indent (f, indent, "{\n");
+ indent += 2;
+
+ fprintf_indent (f, indent,
+ "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
+ fprintf_indent (f, indent,
+ "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
+ fprintf_indent (f, indent,
+ "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
+ "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+ fprintf_indent (f, indent,
+ "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
+ "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
+ depth, depth, depth, depth);
+
+ fprintf_indent (f, indent,
+ "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
+ depth, depth);
+ fprintf_indent (f, indent, "if (_ct_%d"
+ " && EDGE_COUNT (_other_db_%d->preds) == 1\n", depth, depth);
+ fprintf_indent (f, indent,
+ " && EDGE_COUNT (_other_db_%d->succs) == 1\n", depth);
+ fprintf_indent (f, indent,
+ " && EDGE_PRED (_other_db_%d, 0)->src == _db_%d)\n", depth, depth);
+
+ indent += 2;
+ fprintf_indent (f, indent, "{\n");
+ indent += 2;
+
+ fprintf_indent (f, indent,
+ "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
+ fprintf_indent (f, indent,
+ "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
+
+ char opname_0[20];
+ char opname_1[20];
+ char opname_2[20];
+ gen_opname (opname_0, 0);
+
+ fprintf_indent (f, indent,
+ "tree %s = build2 (gimple_cond_code (_ct_%d), "
+ "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
+ opname_0, depth, depth, depth);
+
+ fprintf_indent (f, indent,
+ "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
+ " & EDGE_TRUE_VALUE;\n", depth, depth);
+
+ gen_opname (opname_1, 1);
+ fprintf_indent (f, indent,
+ "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
+ opname_1, depth, depth);
+
+ gen_opname (opname_2, 2);
+ fprintf_indent (f, indent,
+ "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
+ opname_2, depth, depth);
+
+ gen_kids (f, indent, true, depth);
+
+ indent -= 2;
+ fprintf_indent (f, indent, "}\n");
+ indent -= 2;
+
+ indent -= 2;
+ fprintf_indent (f, indent, "}\n");
+ indent -= 2;
+}
+
/* Emit a logging call to the debug file to the file F, with the INDENT from
either the RESULT location or the S's match location if RESULT is null. */
static void
@@ -4600,6 +4713,15 @@ parser::parse_expr ()
e->force_leaf = true;
}
+ if (parsing_match_operand && token->type == CPP_XOR
+ && !(token->flags & PREV_WHITE))
+ {
+ eat_token (CPP_XOR);
+
+ if (*e->operation == COND_EXPR)
+ e->eflag = EFLAG_PHI_MATCH;
+ }
+
if (token->type == CPP_COLON
&& !(token->flags & PREV_WHITE))
{
@@ -3056,31 +3056,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
/* Unsigned Saturation Add */
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+ SAT_ADD = (X + Y) | -((X + Y) < X) */
(match (usadd_left_part_1 @0 @1)
(plus:c @0 @1)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
(match (usadd_left_part_2 @0 @1)
(realpart (IFN_ADD_OVERFLOW:c @0 @1))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+ SAT_ADD = (X + Y) | -((type)(X + Y) < X) */
(match (usadd_right_part_1 @0 @1)
(negate (convert (lt (plus:c @0 @1) @0)))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+ SAT_ADD = (X + Y) | -(X > (X + Y)) */
(match (usadd_right_part_1 @0 @1)
(negate (convert (gt @0 (plus:c @0 @1))))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
(match (usadd_right_part_2 @0 @1)
(negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
+(match (usadd_right_part_2 @0 @1)
+ (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
because the sub part of left_part_2 cannot work with right_part_1.
For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
@@ -3092,10 +3109,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(match (unsigned_integer_sat_add @0 @1)
(bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
-/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */
+/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
+ SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 0) */
(match (unsigned_integer_sat_add @0 @1)
(bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
+/* Unsigned saturation add, case 3 (branch with ge):
+ SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1. */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
+
+/* Unsigned saturation add, case 4 (branch with lt):
+ SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
+
+/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
+ SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+ (usadd_left_part_2 @0 @1) integer_minus_onep))
+
+/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
+ SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW. */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+ integer_minus_onep (usadd_left_part_2 @0 @1)))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
@@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
* =>
* _12 = .SAT_ADD (_4, _6); */
static void
-match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
{
gcall *call = NULL;
@@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
}
}
+/*
+ * Try to match saturation arith pattern(s).
+ * From:
+ * <bb 2> :
+ * _1 = x_3(D) + y_4(D);
+ * if (_1 >= x_3(D))
+ * goto <bb 3>; [INV]
+ * else
+ * goto <bb 4>; [INV]
+ *
+ * <bb 3> :
+ *
+ * <bb 4> :
+ * # _2 = PHI <255(2), _1(3)>
+ *
+ * To:
+ * <bb 4> [local count: 1073741824]:
+ * _2 = .SAT_ADD (x_4(D), y_5(D)); */
+
+static void
+match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
+{
+ if (gimple_phi_num_args (phi) != 2)
+ return;
+
+ tree ops[2];
+ tree phi_result = gimple_phi_result (phi);
+
+ if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+ && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
+ OPTIMIZE_FOR_BOTH))
+ {
+ gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
+ gimple_call_set_lhs (call, phi_result);
+ gsi_insert_before (gsi, call, GSI_SAME_STMT);
+
+ gimple_stmt_iterator psi = gsi_for_stmt (phi);
+ remove_phi_node (&psi, false);
+ }
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
@@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
+ for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
+ {
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
+ match_phi_saturation_arith (&gsi, psi.phi ());
+ }
+
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
{
gimple *stmt = gsi_stmt (gsi);
@@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
break;
case BIT_IOR_EXPR:
- match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+ match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
/* fall-through */
case BIT_XOR_EXPR:
match_uaddc_usubc (&gsi, stmt, code);