Message ID | 20240528082937.3300351-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int | expand |
Kindly ping, almost the same but for subtract. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Tuesday, May 28, 2024 4:30 PM To: gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com; richard.guenther@gmail.com; Li, Pan2 <pan2.li@intel.com> Subject: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int From: Pan Li <pan2.li@intel.com> This patch would like to add the middle-end presentation for the saturation sub. Aka set the result of add to the min when downflow. It will take the pattern similar as below. SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)); For example for uint8_t, we have * SAT_SUB (255, 0) => 255 * SAT_SUB (1, 2) => 0 * SAT_SUB (254, 255) => 0 * SAT_SUB (0, 255) => 0 Given below SAT_SUB for uint64 uint64_t sat_sub_u64 (uint64_t x, uint64_t y) { return (x + y) & (- (uint64_t)((x >= y))); } Before this patch: uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) { _Bool _1; long unsigned int _3; uint64_t _6; ;; basic block 2, loop depth 0 ;; pred: ENTRY _1 = x_4(D) >= y_5(D); _3 = x_4(D) - y_5(D); _6 = _1 ? _3 : 0; return _6; ;; succ: EXIT } After this patch: uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) { uint64_t _6; ;; basic block 2, loop depth 0 ;; pred: ENTRY _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call] return _6; ;; succ: EXIT } The below tests are running for this patch: *. The riscv fully regression tests. *. The x86 bootstrap tests. *. The x86 fully regression tests. PR target/51492 PR target/112600 gcc/ChangeLog: * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB. * match.pd: Add new match for SAT_SUB. * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub. * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add new decl for generated in match.pd. (build_saturation_binary_arith_call): Add new helper function to build the gimple call to binary SAT alu. (match_saturation_arith): Rename from. (match_unsigned_saturation_add): Rename to. (match_unsigned_saturation_sub): Add new func to match the unsigned sat sub. (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching try when COND_EXPR. Signed-off-by: Pan Li <pan2.li@intel.com> --- gcc/internal-fn.def | 1 + gcc/match.pd | 14 ++++++++ gcc/optabs.def | 4 +-- gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------ 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 25badbb86e5..24539716e5b 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, smulhrs, umulhrs, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary) DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) diff --git a/gcc/match.pd b/gcc/match.pd index 024e3350465..3e334533ff8 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_add @0 @1) (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) +/* Unsigned saturation sub, case 1 (branch with gt): + SAT_U_SUB = X > Y ? X - Y : 0 */ +(match (unsigned_integer_sat_sub @0 @1) + (cond (gt @0 @1) (minus @0 @1) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + +/* Unsigned saturation sub, case 2 (branch with ge): + SAT_U_SUB = X >= Y ? X - Y : 0. */ +(match (unsigned_integer_sat_sub @0 @1) + (cond (ge @0 @1) (minus @0 @1) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/optabs.def b/gcc/optabs.def index 3f2cb46aff8..bc2611abdc2 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3") OPTAB_NX(sub_optab, "sub$Q$a3") OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc) OPTAB_VX(subv_optab, "sub$F$a3") -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc) OPTAB_NX(smul_optab, "mul$P$a3") OPTAB_NX(smul_optab, "mul$F$a3") diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 62da1c5ee08..4717302b728 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, } extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); + +static void +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, + tree lhs, tree op_0, tree op_1) +{ + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) + { + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); + gimple_call_set_lhs (call, lhs); + gsi_replace (gsi, call, /* update_eh_info */ true); + } +} /* - * Try to match saturation arith pattern(s). - * 1. SAT_ADD (unsigned) - * _7 = _4 + _6; - * _8 = _4 > _7; - * _9 = (long unsigned int) _8; - * _10 = -_9; - * _12 = _7 | _10; - * => - * _12 = .SAT_ADD (_4, _6); */ + * Try to match saturation unsigned add. + * _7 = _4 + _6; + * _8 = _4 > _7; + * _9 = (long unsigned int) _8; + * _10 = -_9; + * _12 = _7 | _10; + * => + * _12 = .SAT_ADD (_4, _6); */ + static void -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) { - gcall *call = NULL; + tree ops[2]; + tree lhs = gimple_assign_lhs (stmt); + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) + build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); +} + +/* + * Try to match saturation unsigned sub. + * _1 = _4 >= _5; + * _3 = _4 - _5; + * _6 = _1 ? _3 : 0; + * => + * _6 = .SAT_SUB (_4, _5); */ + +static void +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) +{ tree ops[2]; tree lhs = gimple_assign_lhs (stmt); - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) - && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), - OPTIMIZE_FOR_BOTH)) - { - call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); - gimple_call_set_lhs (call, lhs); - gsi_replace (gsi, call, true); - } + if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) + build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); } /* Recognize for unsigned x @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; case BIT_IOR_EXPR: - match_saturation_arith (&gsi, as_a<gassign *> (stmt)); + match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt)); /* fall-through */ case BIT_XOR_EXPR: match_uaddc_usubc (&gsi, stmt, code); @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb) match_single_bit_test (&gsi, stmt); break; + case COND_EXPR: + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); + break; + default:; } }
On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > This patch would like to add the middle-end presentation for the > saturation sub. Aka set the result of add to the min when downflow. > It will take the pattern similar as below. > > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)); > > For example for uint8_t, we have > > * SAT_SUB (255, 0) => 255 > * SAT_SUB (1, 2) => 0 > * SAT_SUB (254, 255) => 0 > * SAT_SUB (0, 255) => 0 > > Given below SAT_SUB for uint64 > > uint64_t sat_sub_u64 (uint64_t x, uint64_t y) > { > return (x + y) & (- (uint64_t)((x >= y))); > } > > Before this patch: > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > { > _Bool _1; > long unsigned int _3; > uint64_t _6; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _1 = x_4(D) >= y_5(D); > _3 = x_4(D) - y_5(D); > _6 = _1 ? _3 : 0; > return _6; > ;; succ: EXIT > } > > After this patch: > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > { > uint64_t _6; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call] > return _6; > ;; succ: EXIT > } > > The below tests are running for this patch: > *. The riscv fully regression tests. > *. The x86 bootstrap tests. > *. The x86 fully regression tests. OK. Thanks, Richard. > PR target/51492 > PR target/112600 > > gcc/ChangeLog: > > * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB. > * match.pd: Add new match for SAT_SUB. > * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub. > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add > new decl for generated in match.pd. > (build_saturation_binary_arith_call): Add new helper function > to build the gimple call to binary SAT alu. > (match_saturation_arith): Rename from. > (match_unsigned_saturation_add): Rename to. > (match_unsigned_saturation_sub): Add new func to match the > unsigned sat sub. > (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching > try when COND_EXPR. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/internal-fn.def | 1 + > gcc/match.pd | 14 ++++++++ > gcc/optabs.def | 4 +-- > gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------ > 4 files changed, 64 insertions(+), 22 deletions(-) > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 25badbb86e5..24539716e5b 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, > smulhrs, umulhrs, binary) > > DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary) > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > diff --git a/gcc/match.pd b/gcc/match.pd > index 024e3350465..3e334533ff8 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (match (unsigned_integer_sat_add @0 @1) > (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > +/* Unsigned saturation sub, case 1 (branch with gt): > + SAT_U_SUB = X > Y ? X - Y : 0 */ > +(match (unsigned_integer_sat_sub @0 @1) > + (cond (gt @0 @1) (minus @0 @1) integer_zerop) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) > + > +/* Unsigned saturation sub, case 2 (branch with ge): > + SAT_U_SUB = X >= Y ? X - Y : 0. */ > +(match (unsigned_integer_sat_sub @0 @1) > + (cond (ge @0 @1) (minus @0 @1) integer_zerop) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) > + > /* x > y && x != XXX_MIN --> x > y > x > y && x == XXX_MIN --> false . */ > (for eqne (eq ne) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index 3f2cb46aff8..bc2611abdc2 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3") > OPTAB_NX(sub_optab, "sub$Q$a3") > OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc) > OPTAB_VX(subv_optab, "sub$F$a3") > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc) > OPTAB_NX(smul_optab, "mul$P$a3") > OPTAB_NX(smul_optab, "mul$F$a3") > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index 62da1c5ee08..4717302b728 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, > } > > extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); > + > +static void > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, > + tree lhs, tree op_0, tree op_1) > +{ > + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) > + { > + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); > + gimple_call_set_lhs (call, lhs); > + gsi_replace (gsi, call, /* update_eh_info */ true); > + } > +} > > /* > - * Try to match saturation arith pattern(s). > - * 1. SAT_ADD (unsigned) > - * _7 = _4 + _6; > - * _8 = _4 > _7; > - * _9 = (long unsigned int) _8; > - * _10 = -_9; > - * _12 = _7 | _10; > - * => > - * _12 = .SAT_ADD (_4, _6); */ > + * Try to match saturation unsigned add. > + * _7 = _4 + _6; > + * _8 = _4 > _7; > + * _9 = (long unsigned int) _8; > + * _10 = -_9; > + * _12 = _7 | _10; > + * => > + * _12 = .SAT_ADD (_4, _6); */ > + > static void > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) > { > - gcall *call = NULL; > + tree ops[2]; > + tree lhs = gimple_assign_lhs (stmt); > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) > + build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); > +} > + > +/* > + * Try to match saturation unsigned sub. > + * _1 = _4 >= _5; > + * _3 = _4 - _5; > + * _6 = _1 ? _3 : 0; > + * => > + * _6 = .SAT_SUB (_4, _5); */ > + > +static void > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) > +{ > tree ops[2]; > tree lhs = gimple_assign_lhs (stmt); > > - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > - && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > - OPTIMIZE_FOR_BOTH)) > - { > - call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > - gimple_call_set_lhs (call, lhs); > - gsi_replace (gsi, call, true); > - } > + if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) > + build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); > } > > /* Recognize for unsigned x > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > break; > > case BIT_IOR_EXPR: > - match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > + match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt)); > /* fall-through */ > case BIT_XOR_EXPR: > match_uaddc_usubc (&gsi, stmt, code); > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > match_single_bit_test (&gsi, stmt); > break; > > + case COND_EXPR: > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); > + break; > + > default:; > } > } > -- > 2.34.1 >
Thanks Richard, will commit after the rebased pass the regression test. Pan -----Original Message----- From: Richard Biener <richard.guenther@gmail.com> Sent: Wednesday, June 5, 2024 3:19 PM To: Li, Pan2 <pan2.li@intel.com> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > This patch would like to add the middle-end presentation for the > saturation sub. Aka set the result of add to the min when downflow. > It will take the pattern similar as below. > > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)); > > For example for uint8_t, we have > > * SAT_SUB (255, 0) => 255 > * SAT_SUB (1, 2) => 0 > * SAT_SUB (254, 255) => 0 > * SAT_SUB (0, 255) => 0 > > Given below SAT_SUB for uint64 > > uint64_t sat_sub_u64 (uint64_t x, uint64_t y) > { > return (x + y) & (- (uint64_t)((x >= y))); > } > > Before this patch: > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > { > _Bool _1; > long unsigned int _3; > uint64_t _6; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _1 = x_4(D) >= y_5(D); > _3 = x_4(D) - y_5(D); > _6 = _1 ? _3 : 0; > return _6; > ;; succ: EXIT > } > > After this patch: > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > { > uint64_t _6; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call] > return _6; > ;; succ: EXIT > } > > The below tests are running for this patch: > *. The riscv fully regression tests. > *. The x86 bootstrap tests. > *. The x86 fully regression tests. OK. Thanks, Richard. > PR target/51492 > PR target/112600 > > gcc/ChangeLog: > > * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB. > * match.pd: Add new match for SAT_SUB. > * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub. > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add > new decl for generated in match.pd. > (build_saturation_binary_arith_call): Add new helper function > to build the gimple call to binary SAT alu. > (match_saturation_arith): Rename from. > (match_unsigned_saturation_add): Rename to. > (match_unsigned_saturation_sub): Add new func to match the > unsigned sat sub. > (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching > try when COND_EXPR. > > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/internal-fn.def | 1 + > gcc/match.pd | 14 ++++++++ > gcc/optabs.def | 4 +-- > gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------ > 4 files changed, 64 insertions(+), 22 deletions(-) > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 25badbb86e5..24539716e5b 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, > smulhrs, umulhrs, binary) > > DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary) > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > diff --git a/gcc/match.pd b/gcc/match.pd > index 024e3350465..3e334533ff8 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (match (unsigned_integer_sat_add @0 @1) > (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > +/* Unsigned saturation sub, case 1 (branch with gt): > + SAT_U_SUB = X > Y ? X - Y : 0 */ > +(match (unsigned_integer_sat_sub @0 @1) > + (cond (gt @0 @1) (minus @0 @1) integer_zerop) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) > + > +/* Unsigned saturation sub, case 2 (branch with ge): > + SAT_U_SUB = X >= Y ? X - Y : 0. */ > +(match (unsigned_integer_sat_sub @0 @1) > + (cond (ge @0 @1) (minus @0 @1) integer_zerop) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)))) > + > /* x > y && x != XXX_MIN --> x > y > x > y && x == XXX_MIN --> false . */ > (for eqne (eq ne) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index 3f2cb46aff8..bc2611abdc2 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3") > OPTAB_NX(sub_optab, "sub$Q$a3") > OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc) > OPTAB_VX(subv_optab, "sub$F$a3") > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc) > OPTAB_NX(smul_optab, "mul$P$a3") > OPTAB_NX(smul_optab, "mul$F$a3") > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index 62da1c5ee08..4717302b728 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, > } > > extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); > + > +static void > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, > + tree lhs, tree op_0, tree op_1) > +{ > + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) > + { > + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); > + gimple_call_set_lhs (call, lhs); > + gsi_replace (gsi, call, /* update_eh_info */ true); > + } > +} > > /* > - * Try to match saturation arith pattern(s). > - * 1. SAT_ADD (unsigned) > - * _7 = _4 + _6; > - * _8 = _4 > _7; > - * _9 = (long unsigned int) _8; > - * _10 = -_9; > - * _12 = _7 | _10; > - * => > - * _12 = .SAT_ADD (_4, _6); */ > + * Try to match saturation unsigned add. > + * _7 = _4 + _6; > + * _8 = _4 > _7; > + * _9 = (long unsigned int) _8; > + * _10 = -_9; > + * _12 = _7 | _10; > + * => > + * _12 = .SAT_ADD (_4, _6); */ > + > static void > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) > { > - gcall *call = NULL; > + tree ops[2]; > + tree lhs = gimple_assign_lhs (stmt); > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) > + build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); > +} > + > +/* > + * Try to match saturation unsigned sub. > + * _1 = _4 >= _5; > + * _3 = _4 - _5; > + * _6 = _1 ? _3 : 0; > + * => > + * _6 = .SAT_SUB (_4, _5); */ > + > +static void > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) > +{ > tree ops[2]; > tree lhs = gimple_assign_lhs (stmt); > > - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > - && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > - OPTIMIZE_FOR_BOTH)) > - { > - call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > - gimple_call_set_lhs (call, lhs); > - gsi_replace (gsi, call, true); > - } > + if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) > + build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); > } > > /* Recognize for unsigned x > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > break; > > case BIT_IOR_EXPR: > - match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > + match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt)); > /* fall-through */ > case BIT_XOR_EXPR: > match_uaddc_usubc (&gsi, stmt, code); > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > match_single_bit_test (&gsi, stmt); > break; > > + case COND_EXPR: > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); > + break; > + > default:; > } > } > -- > 2.34.1 >
On Wed, Jun 5, 2024 at 9:38 AM Li, Pan2 <pan2.li@intel.com> wrote: > > Thanks Richard, will commit after the rebased pass the regression test. > > Pan > > -----Original Message----- > From: Richard Biener <richard.guenther@gmail.com> > Sent: Wednesday, June 5, 2024 3:19 PM > To: Li, Pan2 <pan2.li@intel.com> > Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com > Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int > > On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote: > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch would like to add the middle-end presentation for the > > saturation sub. Aka set the result of add to the min when downflow. > > It will take the pattern similar as below. > > > > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)); > > > > For example for uint8_t, we have > > > > * SAT_SUB (255, 0) => 255 > > * SAT_SUB (1, 2) => 0 > > * SAT_SUB (254, 255) => 0 > > * SAT_SUB (0, 255) => 0 > > > > Given below SAT_SUB for uint64 > > > > uint64_t sat_sub_u64 (uint64_t x, uint64_t y) > > { > > return (x + y) & (- (uint64_t)((x >= y))); > > } Is the above testcase correct? You need "(x + y)" as the first term. BTW: After applying your patch, I'm not able to produce .SAT_SUB with x86_64 and the following testcase: --cut here-- typedef unsigned short T; void foo (T *out, T *x, T *y, int n) { int i; for (i = 0; i < n; i++) out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i])); } --cut here-- with gcc -O2 -ftree-vectorize -msse2 I think that all relevant optabs were added for x86 in https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b59de4113262f2bee14147eb17eb3592f03d9556 as part of the commit for PR112600, comment 8. Uros. > > > > Before this patch: > > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > > { > > _Bool _1; > > long unsigned int _3; > > uint64_t _6; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _1 = x_4(D) >= y_5(D); > > _3 = x_4(D) - y_5(D); > > _6 = _1 ? _3 : 0; > > return _6; > > ;; succ: EXIT > > } > > > > After this patch: > > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > > { > > uint64_t _6; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call] > > return _6; > > ;; succ: EXIT > > } > > > > The below tests are running for this patch: > > *. The riscv fully regression tests. > > *. The x86 bootstrap tests. > > *. The x86 fully regression tests. > > OK. > > Thanks, > Richard. > > > PR target/51492 > > PR target/112600 > > > > gcc/ChangeLog: > > > > * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB. > > * match.pd: Add new match for SAT_SUB. > > * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub. > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add > > new decl for generated in match.pd. > > (build_saturation_binary_arith_call): Add new helper function > > to build the gimple call to binary SAT alu. > > (match_saturation_arith): Rename from. > > (match_unsigned_saturation_add): Rename to. > > (match_unsigned_saturation_sub): Add new func to match the > > unsigned sat sub. > > (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching > > try when COND_EXPR. > > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/internal-fn.def | 1 + > > gcc/match.pd | 14 ++++++++ > > gcc/optabs.def | 4 +-- > > gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------ > > 4 files changed, 64 insertions(+), 22 deletions(-) > > > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 25badbb86e5..24539716e5b 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, > > smulhrs, umulhrs, binary) > > > > DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary) > > > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 024e3350465..3e334533ff8 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > (match (unsigned_integer_sat_add @0 @1) > > (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > > > +/* Unsigned saturation sub, case 1 (branch with gt): > > + SAT_U_SUB = X > Y ? X - Y : 0 */ > > +(match (unsigned_integer_sat_sub @0 @1) > > + (cond (gt @0 @1) (minus @0 @1) integer_zerop) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)))) > > + > > +/* Unsigned saturation sub, case 2 (branch with ge): > > + SAT_U_SUB = X >= Y ? X - Y : 0. */ > > +(match (unsigned_integer_sat_sub @0 @1) > > + (cond (ge @0 @1) (minus @0 @1) integer_zerop) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)))) > > + > > /* x > y && x != XXX_MIN --> x > y > > x > y && x == XXX_MIN --> false . */ > > (for eqne (eq ne) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index 3f2cb46aff8..bc2611abdc2 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3") > > OPTAB_NX(sub_optab, "sub$Q$a3") > > OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc) > > OPTAB_VX(subv_optab, "sub$F$a3") > > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > > OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc) > > OPTAB_NX(smul_optab, "mul$P$a3") > > OPTAB_NX(smul_optab, "mul$F$a3") > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index 62da1c5ee08..4717302b728 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, > > } > > > > extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); > > + > > +static void > > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, > > + tree lhs, tree op_0, tree op_1) > > +{ > > + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) > > + { > > + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); > > + gimple_call_set_lhs (call, lhs); > > + gsi_replace (gsi, call, /* update_eh_info */ true); > > + } > > +} > > > > /* > > - * Try to match saturation arith pattern(s). > > - * 1. SAT_ADD (unsigned) > > - * _7 = _4 + _6; > > - * _8 = _4 > _7; > > - * _9 = (long unsigned int) _8; > > - * _10 = -_9; > > - * _12 = _7 | _10; > > - * => > > - * _12 = .SAT_ADD (_4, _6); */ > > + * Try to match saturation unsigned add. > > + * _7 = _4 + _6; > > + * _8 = _4 > _7; > > + * _9 = (long unsigned int) _8; > > + * _10 = -_9; > > + * _12 = _7 | _10; > > + * => > > + * _12 = .SAT_ADD (_4, _6); */ > > + > > static void > > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) > > { > > - gcall *call = NULL; > > + tree ops[2]; > > + tree lhs = gimple_assign_lhs (stmt); > > > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) > > + build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); > > +} > > + > > +/* > > + * Try to match saturation unsigned sub. > > + * _1 = _4 >= _5; > > + * _3 = _4 - _5; > > + * _6 = _1 ? _3 : 0; > > + * => > > + * _6 = .SAT_SUB (_4, _5); */ > > + > > +static void > > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) > > +{ > > tree ops[2]; > > tree lhs = gimple_assign_lhs (stmt); > > > > - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > - && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > - OPTIMIZE_FOR_BOTH)) > > - { > > - call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > - gimple_call_set_lhs (call, lhs); > > - gsi_replace (gsi, call, true); > > - } > > + if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) > > + build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); > > } > > > > /* Recognize for unsigned x > > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > > break; > > > > case BIT_IOR_EXPR: > > - match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > + match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt)); > > /* fall-through */ > > case BIT_XOR_EXPR: > > match_uaddc_usubc (&gsi, stmt, code); > > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > > match_single_bit_test (&gsi, stmt); > > break; > > > > + case COND_EXPR: > > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); > > + break; > > + > > default:; > > } > > } > > -- > > 2.34.1 > >
> Is the above testcase correct? You need "(x + y)" as the first term. Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference. +#define DEF_SAT_U_SUB_FMT_1(T) \ +T __attribute__((noinline)) \ +sat_u_sub_##T##_fmt_1 (T x, T y) \ +{ \ + return (x - y) & (-(T)(x >= y)); \ +} + +#define DEF_SAT_U_SUB_FMT_2(T) \ +T __attribute__((noinline)) \ +sat_u_sub_##T##_fmt_2 (T x, T y) \ +{ \ + return (x - y) & (-(T)(x > y)); \ +} > BTW: After applying your patch, I'm not able to produce .SAT_SUB with > x86_64 and the following testcase: You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part. Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int? Of course, I will have a try later as in the middle of sth. https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html Pan -----Original Message----- From: Uros Bizjak <ubizjak@gmail.com> Sent: Wednesday, June 5, 2024 4:09 PM To: Li, Pan2 <pan2.li@intel.com> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int On Wed, Jun 5, 2024 at 9:38 AM Li, Pan2 <pan2.li@intel.com> wrote: > > Thanks Richard, will commit after the rebased pass the regression test. > > Pan > > -----Original Message----- > From: Richard Biener <richard.guenther@gmail.com> > Sent: Wednesday, June 5, 2024 3:19 PM > To: Li, Pan2 <pan2.li@intel.com> > Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com > Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int > > On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote: > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch would like to add the middle-end presentation for the > > saturation sub. Aka set the result of add to the min when downflow. > > It will take the pattern similar as below. > > > > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)); > > > > For example for uint8_t, we have > > > > * SAT_SUB (255, 0) => 255 > > * SAT_SUB (1, 2) => 0 > > * SAT_SUB (254, 255) => 0 > > * SAT_SUB (0, 255) => 0 > > > > Given below SAT_SUB for uint64 > > > > uint64_t sat_sub_u64 (uint64_t x, uint64_t y) > > { > > return (x + y) & (- (uint64_t)((x >= y))); > > } Is the above testcase correct? You need "(x + y)" as the first term. BTW: After applying your patch, I'm not able to produce .SAT_SUB with x86_64 and the following testcase: --cut here-- typedef unsigned short T; void foo (T *out, T *x, T *y, int n) { int i; for (i = 0; i < n; i++) out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i])); } --cut here-- with gcc -O2 -ftree-vectorize -msse2 I think that all relevant optabs were added for x86 in https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b59de4113262f2bee14147eb17eb3592f03d9556 as part of the commit for PR112600, comment 8. Uros. > > > > Before this patch: > > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > > { > > _Bool _1; > > long unsigned int _3; > > uint64_t _6; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _1 = x_4(D) >= y_5(D); > > _3 = x_4(D) - y_5(D); > > _6 = _1 ? _3 : 0; > > return _6; > > ;; succ: EXIT > > } > > > > After this patch: > > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y) > > { > > uint64_t _6; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call] > > return _6; > > ;; succ: EXIT > > } > > > > The below tests are running for this patch: > > *. The riscv fully regression tests. > > *. The x86 bootstrap tests. > > *. The x86 fully regression tests. > > OK. > > Thanks, > Richard. > > > PR target/51492 > > PR target/112600 > > > > gcc/ChangeLog: > > > > * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB. > > * match.pd: Add new match for SAT_SUB. > > * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub. > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add > > new decl for generated in match.pd. > > (build_saturation_binary_arith_call): Add new helper function > > to build the gimple call to binary SAT alu. > > (match_saturation_arith): Rename from. > > (match_unsigned_saturation_add): Rename to. > > (match_unsigned_saturation_sub): Add new func to match the > > unsigned sat sub. > > (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching > > try when COND_EXPR. > > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/internal-fn.def | 1 + > > gcc/match.pd | 14 ++++++++ > > gcc/optabs.def | 4 +-- > > gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------ > > 4 files changed, 64 insertions(+), 22 deletions(-) > > > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 25badbb86e5..24539716e5b 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, > > smulhrs, umulhrs, binary) > > > > DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary) > > > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 024e3350465..3e334533ff8 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > (match (unsigned_integer_sat_add @0 @1) > > (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > > > +/* Unsigned saturation sub, case 1 (branch with gt): > > + SAT_U_SUB = X > Y ? X - Y : 0 */ > > +(match (unsigned_integer_sat_sub @0 @1) > > + (cond (gt @0 @1) (minus @0 @1) integer_zerop) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)))) > > + > > +/* Unsigned saturation sub, case 2 (branch with ge): > > + SAT_U_SUB = X >= Y ? X - Y : 0. */ > > +(match (unsigned_integer_sat_sub @0 @1) > > + (cond (ge @0 @1) (minus @0 @1) integer_zerop) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)))) > > + > > /* x > y && x != XXX_MIN --> x > y > > x > y && x == XXX_MIN --> false . */ > > (for eqne (eq ne) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index 3f2cb46aff8..bc2611abdc2 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3") > > OPTAB_NX(sub_optab, "sub$Q$a3") > > OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc) > > OPTAB_VX(subv_optab, "sub$F$a3") > > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) > > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) > > OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc) > > OPTAB_NX(smul_optab, "mul$P$a3") > > OPTAB_NX(smul_optab, "mul$F$a3") > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index 62da1c5ee08..4717302b728 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, > > } > > > > extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); > > + > > +static void > > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, > > + tree lhs, tree op_0, tree op_1) > > +{ > > + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) > > + { > > + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); > > + gimple_call_set_lhs (call, lhs); > > + gsi_replace (gsi, call, /* update_eh_info */ true); > > + } > > +} > > > > /* > > - * Try to match saturation arith pattern(s). > > - * 1. SAT_ADD (unsigned) > > - * _7 = _4 + _6; > > - * _8 = _4 > _7; > > - * _9 = (long unsigned int) _8; > > - * _10 = -_9; > > - * _12 = _7 | _10; > > - * => > > - * _12 = .SAT_ADD (_4, _6); */ > > + * Try to match saturation unsigned add. > > + * _7 = _4 + _6; > > + * _8 = _4 > _7; > > + * _9 = (long unsigned int) _8; > > + * _10 = -_9; > > + * _12 = _7 | _10; > > + * => > > + * _12 = .SAT_ADD (_4, _6); */ > > + > > static void > > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) > > { > > - gcall *call = NULL; > > + tree ops[2]; > > + tree lhs = gimple_assign_lhs (stmt); > > > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) > > + build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); > > +} > > + > > +/* > > + * Try to match saturation unsigned sub. > > + * _1 = _4 >= _5; > > + * _3 = _4 - _5; > > + * _6 = _1 ? _3 : 0; > > + * => > > + * _6 = .SAT_SUB (_4, _5); */ > > + > > +static void > > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) > > +{ > > tree ops[2]; > > tree lhs = gimple_assign_lhs (stmt); > > > > - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > - && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > - OPTIMIZE_FOR_BOTH)) > > - { > > - call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > - gimple_call_set_lhs (call, lhs); > > - gsi_replace (gsi, call, true); > > - } > > + if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) > > + build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); > > } > > > > /* Recognize for unsigned x > > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > > break; > > > > case BIT_IOR_EXPR: > > - match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > + match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt)); > > /* fall-through */ > > case BIT_XOR_EXPR: > > match_uaddc_usubc (&gsi, stmt, code); > > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb) > > match_single_bit_test (&gsi, stmt); > > break; > > > > + case COND_EXPR: > > + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); > > + break; > > + > > default:; > > } > > } > > -- > > 2.34.1 > >
On Wed, Jun 5, 2024 at 10:22 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > Is the above testcase correct? You need "(x + y)" as the first term. > > Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference. > > +#define DEF_SAT_U_SUB_FMT_1(T) \ > +T __attribute__((noinline)) \ > +sat_u_sub_##T##_fmt_1 (T x, T y) \ > +{ \ > + return (x - y) & (-(T)(x >= y)); \ > +} > + > +#define DEF_SAT_U_SUB_FMT_2(T) \ > +T __attribute__((noinline)) \ > +sat_u_sub_##T##_fmt_2 (T x, T y) \ > +{ \ > + return (x - y) & (-(T)(x > y)); \ > +} > > > BTW: After applying your patch, I'm not able to produce .SAT_SUB with > > x86_64 and the following testcase: > > You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part. > Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int? > Of course, I will have a try later as in the middle of sth. > > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html I see. x86 doesn't have scalar saturating instructions, so the scalar version indeed can't be converted. I will amend x86 testcases after the vector part of your patch is committed. Thanks, Uros.
> I see. x86 doesn't have scalar saturating instructions, so the scalar > version indeed can't be converted. > I will amend x86 testcases after the vector part of your patch is committed. Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below. .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here. Pan -----Original Message----- From: Uros Bizjak <ubizjak@gmail.com> Sent: Wednesday, June 5, 2024 4:30 PM To: Li, Pan2 <pan2.li@intel.com> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int On Wed, Jun 5, 2024 at 10:22 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > Is the above testcase correct? You need "(x + y)" as the first term. > > Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference. > > +#define DEF_SAT_U_SUB_FMT_1(T) \ > +T __attribute__((noinline)) \ > +sat_u_sub_##T##_fmt_1 (T x, T y) \ > +{ \ > + return (x - y) & (-(T)(x >= y)); \ > +} > + > +#define DEF_SAT_U_SUB_FMT_2(T) \ > +T __attribute__((noinline)) \ > +sat_u_sub_##T##_fmt_2 (T x, T y) \ > +{ \ > + return (x - y) & (-(T)(x > y)); \ > +} > > > BTW: After applying your patch, I'm not able to produce .SAT_SUB with > > x86_64 and the following testcase: > > You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part. > Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int? > Of course, I will have a try later as in the middle of sth. > > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html I see. x86 doesn't have scalar saturating instructions, so the scalar version indeed can't be converted. I will amend x86 testcases after the vector part of your patch is committed. Thanks, Uros.
Committed with the example in commit log updated, thanks all. Pan -----Original Message----- From: Li, Pan2 <pan2.li@intel.com> Sent: Wednesday, June 5, 2024 4:38 PM To: Uros Bizjak <ubizjak@gmail.com> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: RE: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int > I see. x86 doesn't have scalar saturating instructions, so the scalar > version indeed can't be converted. > I will amend x86 testcases after the vector part of your patch is committed. Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below. .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here. Pan -----Original Message----- From: Uros Bizjak <ubizjak@gmail.com> Sent: Wednesday, June 5, 2024 4:30 PM To: Li, Pan2 <pan2.li@intel.com> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int On Wed, Jun 5, 2024 at 10:22 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > Is the above testcase correct? You need "(x + y)" as the first term. > > Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference. > > +#define DEF_SAT_U_SUB_FMT_1(T) \ > +T __attribute__((noinline)) \ > +sat_u_sub_##T##_fmt_1 (T x, T y) \ > +{ \ > + return (x - y) & (-(T)(x >= y)); \ > +} > + > +#define DEF_SAT_U_SUB_FMT_2(T) \ > +T __attribute__((noinline)) \ > +sat_u_sub_##T##_fmt_2 (T x, T y) \ > +{ \ > + return (x - y) & (-(T)(x > y)); \ > +} > > > BTW: After applying your patch, I'm not able to produce .SAT_SUB with > > x86_64 and the following testcase: > > You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part. > Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int? > Of course, I will have a try later as in the middle of sth. > > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html I see. x86 doesn't have scalar saturating instructions, so the scalar version indeed can't be converted. I will amend x86 testcases after the vector part of your patch is committed. Thanks, Uros.
On Wed, Jun 5, 2024 at 10:38 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > I see. x86 doesn't have scalar saturating instructions, so the scalar > > version indeed can't be converted. > > > I will amend x86 testcases after the vector part of your patch is committed. > > Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below. > > .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here > > It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here. x86 will emit cmove in the above case: movl %edi, %eax xorl %edx, %edx subl %esi, %eax cmpl %edi, %esi cmovnb %edx, %eax Maybe we can reuse flags from the subtraction here to avoid the compare. Uros.
Thanks for explaining. I see, cmove is well designed for such cases. Pan -----Original Message----- From: Uros Bizjak <ubizjak@gmail.com> Sent: Wednesday, June 5, 2024 4:46 PM To: Li, Pan2 <pan2.li@intel.com> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int On Wed, Jun 5, 2024 at 10:38 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > I see. x86 doesn't have scalar saturating instructions, so the scalar > > version indeed can't be converted. > > > I will amend x86 testcases after the vector part of your patch is committed. > > Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below. > > .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here > > It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here. x86 will emit cmove in the above case: movl %edi, %eax xorl %edx, %edx subl %esi, %eax cmpl %edi, %esi cmovnb %edx, %eax Maybe we can reuse flags from the subtraction here to avoid the compare. Uros.
On Wed, Jun 5, 2024 at 10:52 AM Li, Pan2 <pan2.li@intel.com> wrote: > > Thanks for explaining. I see, cmove is well designed for such cases. If the question is if it is worth it to convert using __builtin_sub_overflow here if the target doesn't provide scalar saturating optab, I think the answer is yes. For x86, the compare will be eliminated. Please consider this testcase: --cut here-- unsigned int __attribute__((noinline)) foo (unsigned int x, unsigned int y) { return x > y ? x - y : 0; } unsigned int __attribute__((noinline)) bar (unsigned int x, unsigned int y) { unsigned int z; return __builtin_sub_overflow (x, y, &z) ? 0 : z; } --cut here-- This will compile to: 0000000000000000 <foo>: 0: 89 f8 mov %edi,%eax 2: 31 d2 xor %edx,%edx 4: 29 f0 sub %esi,%eax 6: 39 fe cmp %edi,%esi 8: 0f 43 c2 cmovae %edx,%eax b: c3 ret c: 0f 1f 40 00 nopl 0x0(%rax) 0000000000000010 <bar>: 10: 29 f7 sub %esi,%edi 12: 72 03 jb 17 <bar+0x7> 14: 89 f8 mov %edi,%eax 16: c3 ret 17: 31 c0 xor %eax,%eax 19: c3 ret Please note that the compare was eliminated in the later test. So, if the target does not provide saturated optab but provides __builtin_sub_overflow, I think it is worth emitting .SAT_SUB via __builtin_sub_overflow (and in similar way for saturated add). Uros. > > Pan > > -----Original Message----- > From: Uros Bizjak <ubizjak@gmail.com> > Sent: Wednesday, June 5, 2024 4:46 PM > To: Li, Pan2 <pan2.li@intel.com> > Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com > Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int > > On Wed, Jun 5, 2024 at 10:38 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > > > I see. x86 doesn't have scalar saturating instructions, so the scalar > > > version indeed can't be converted. > > > > > I will amend x86 testcases after the vector part of your patch is committed. > > > > Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below. > > > > .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here > > > > It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here. > > x86 will emit cmove in the above case: > > movl %edi, %eax > xorl %edx, %edx > subl %esi, %eax > cmpl %edi, %esi > cmovnb %edx, %eax > > Maybe we can reuse flags from the subtraction here to avoid the compare. > > Uros.
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 25badbb86e5..24539716e5b 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, smulhrs, umulhrs, binary) DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary) DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) diff --git a/gcc/match.pd b/gcc/match.pd index 024e3350465..3e334533ff8 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_add @0 @1) (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) +/* Unsigned saturation sub, case 1 (branch with gt): + SAT_U_SUB = X > Y ? X - Y : 0 */ +(match (unsigned_integer_sat_sub @0 @1) + (cond (gt @0 @1) (minus @0 @1) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + +/* Unsigned saturation sub, case 2 (branch with ge): + SAT_U_SUB = X >= Y ? X - Y : 0. */ +(match (unsigned_integer_sat_sub @0 @1) + (cond (ge @0 @1) (minus @0 @1) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/optabs.def b/gcc/optabs.def index 3f2cb46aff8..bc2611abdc2 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3") OPTAB_NX(sub_optab, "sub$Q$a3") OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc) OPTAB_VX(subv_optab, "sub$F$a3") -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc) +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc) OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc) OPTAB_NX(smul_optab, "mul$P$a3") OPTAB_NX(smul_optab, "mul$F$a3") diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 62da1c5ee08..4717302b728 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, } extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); + +static void +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn, + tree lhs, tree op_0, tree op_1) +{ + if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) + { + gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); + gimple_call_set_lhs (call, lhs); + gsi_replace (gsi, call, /* update_eh_info */ true); + } +} /* - * Try to match saturation arith pattern(s). - * 1. SAT_ADD (unsigned) - * _7 = _4 + _6; - * _8 = _4 > _7; - * _9 = (long unsigned int) _8; - * _10 = -_9; - * _12 = _7 | _10; - * => - * _12 = .SAT_ADD (_4, _6); */ + * Try to match saturation unsigned add. + * _7 = _4 + _6; + * _8 = _4 > _7; + * _9 = (long unsigned int) _8; + * _10 = -_9; + * _12 = _7 | _10; + * => + * _12 = .SAT_ADD (_4, _6); */ + static void -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt) { - gcall *call = NULL; + tree ops[2]; + tree lhs = gimple_assign_lhs (stmt); + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)) + build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]); +} + +/* + * Try to match saturation unsigned sub. + * _1 = _4 >= _5; + * _3 = _4 - _5; + * _6 = _1 ? _3 : 0; + * => + * _6 = .SAT_SUB (_4, _5); */ + +static void +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) +{ tree ops[2]; tree lhs = gimple_assign_lhs (stmt); - if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) - && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), - OPTIMIZE_FOR_BOTH)) - { - call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); - gimple_call_set_lhs (call, lhs); - gsi_replace (gsi, call, true); - } + if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)) + build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); } /* Recognize for unsigned x @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; case BIT_IOR_EXPR: - match_saturation_arith (&gsi, as_a<gassign *> (stmt)); + match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt)); /* fall-through */ case BIT_XOR_EXPR: match_uaddc_usubc (&gsi, stmt, code); @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb) match_single_bit_test (&gsi, stmt); break; + case COND_EXPR: + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); + break; + default:; } }