Message ID | 20240515021407.1287623-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [v5,1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int | expand |
Hi Pan, Thanks! > -----Original Message----- > From: pan2.li@intel.com <pan2.li@intel.com> > Sent: Wednesday, May 15, 2024 3:14 AM > To: gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar > int > > From: Pan Li <pan2.li@intel.com> > > This patch would like to add the middle-end presentation for the > saturation add. Aka set the result of add to the max when overflow. > It will take the pattern similar as below. > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > Take uint8_t as example, we will have: > > * SAT_ADD (1, 254) => 255. > * SAT_ADD (1, 255) => 255. > * SAT_ADD (2, 255) => 255. > * SAT_ADD (255, 255) => 255. > > Given below example for the unsigned scalar integer uint64_t: > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > { > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > } > > Before this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > long unsigned int _1; > _Bool _2; > long unsigned int _3; > long unsigned int _4; > uint64_t _7; > long unsigned int _10; > __complex__ long unsigned int _11; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > _1 = REALPART_EXPR <_11>; > _10 = IMAGPART_EXPR <_11>; > _2 = _10 != 0; > _3 = (long unsigned int) _2; > _4 = -_3; > _7 = _1 | _4; > return _7; > ;; succ: EXIT > > } > > After this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > uint64_t _7; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > return _7; > ;; succ: EXIT > } > > The below tests are passed for this patch: > 1. The riscv fully regression tests. > 3. The x86 bootstrap tests. > 4. The x86 fully regression tests. > > PR target/51492 > PR target/112600 > > gcc/ChangeLog: > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > to the return true switch case(s). > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > * match.pd: Add unsigned SAT_ADD match(es). > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > us/ssadd. > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > extern func decl generated in match.pd match. > (match_saturation_arith): New func impl to match the saturation arith. > (math_opts_dom_walker::after_dom_children): Try match saturation > arith when IOR expr. > LGTM but you'll need an OK from Richard, Thanks for working on this! Tamar > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/internal-fn.cc | 1 + > gcc/internal-fn.def | 2 ++ > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > gcc/optabs.def | 4 +-- > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > 5 files changed, 88 insertions(+), 2 deletions(-) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 0a7053c2286..73045ca8c8c 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > case IFN_UBSAN_CHECK_MUL: > case IFN_ADD_OVERFLOW: > case IFN_MUL_OVERFLOW: > + case IFN_SAT_ADD: > case IFN_VEC_WIDEN_PLUS: > case IFN_VEC_WIDEN_PLUS_LO: > case IFN_VEC_WIDEN_PLUS_HI: > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 848bb9dbff3..25badbb86e5 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > | ECF_NOTHROW, first, > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > first, > smulhrs, umulhrs, binary) > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > binary) > + > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > diff --git a/gcc/match.pd b/gcc/match.pd > index 07e743ae464..0f9c34fa897 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > || POINTER_TYPE_P (itype)) > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > +/* Unsigned Saturation Add */ > +(match (usadd_left_part_1 @0 @1) > + (plus:c @0 @1) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_left_part_2 @0 @1) > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (lt (plus:c @0 @1) @0))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (gt @0 (plus:c @0 @1)))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_2 @0 @1) > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > integer_zerop))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > + because the sub part of left_part_2 cannot work with right_part_1. > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > + > +/* Unsigned saturation add, case 1 (branchless): > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > + > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > + > /* x > y && x != XXX_MIN --> x > y > x > y && x == XXX_MIN --> false . */ > (for eqne (eq ne) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index ad14f9328b9..3f2cb46aff8 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > OPTAB_NX(add_optab, "add$Q$a3") > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > OPTAB_VX(addv_optab, "add$F$a3") > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > OPTAB_NX(sub_optab, "sub$F$a3") > OPTAB_NX(sub_optab, "sub$Q$a3") > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index e8c804f09b7..62da1c5ee08 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > *cast_stmt, gimple *&use_stmt, > return 0; > } > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > + > +/* > + * Try to match saturation arith pattern(s). > + * 1. SAT_ADD (unsigned) > + * _7 = _4 + _6; > + * _8 = _4 > _7; > + * _9 = (long unsigned int) _8; > + * _10 = -_9; > + * _12 = _7 | _10; > + * => > + * _12 = .SAT_ADD (_4, _6); */ > +static void > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > +{ > + gcall *call = NULL; > + > + tree ops[2]; > + tree lhs = gimple_assign_lhs (stmt); > + > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > + OPTIMIZE_FOR_BOTH)) > + { > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > + gimple_call_set_lhs (call, lhs); > + gsi_replace (gsi, call, true); > + } > +} > + > /* Recognize for unsigned x > x = y - z; > if (x > y) > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > (basic_block bb) > break; > > case BIT_IOR_EXPR: > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > + /* fall-through */ > case BIT_XOR_EXPR: > match_uaddc_usubc (&gsi, stmt, code); > break; > -- > 2.34.1
> LGTM but you'll need an OK from Richard, > Thanks for working on this! Thanks Tamar for help and coaching, let's wait Richard for a while,😊! Pan -----Original Message----- From: Tamar Christina <Tamar.Christina@arm.com> Sent: Wednesday, May 15, 2024 5:12 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int Hi Pan, Thanks! > -----Original Message----- > From: pan2.li@intel.com <pan2.li@intel.com> > Sent: Wednesday, May 15, 2024 3:14 AM > To: gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar > int > > From: Pan Li <pan2.li@intel.com> > > This patch would like to add the middle-end presentation for the > saturation add. Aka set the result of add to the max when overflow. > It will take the pattern similar as below. > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > Take uint8_t as example, we will have: > > * SAT_ADD (1, 254) => 255. > * SAT_ADD (1, 255) => 255. > * SAT_ADD (2, 255) => 255. > * SAT_ADD (255, 255) => 255. > > Given below example for the unsigned scalar integer uint64_t: > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > { > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > } > > Before this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > long unsigned int _1; > _Bool _2; > long unsigned int _3; > long unsigned int _4; > uint64_t _7; > long unsigned int _10; > __complex__ long unsigned int _11; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > _1 = REALPART_EXPR <_11>; > _10 = IMAGPART_EXPR <_11>; > _2 = _10 != 0; > _3 = (long unsigned int) _2; > _4 = -_3; > _7 = _1 | _4; > return _7; > ;; succ: EXIT > > } > > After this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > uint64_t _7; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > return _7; > ;; succ: EXIT > } > > The below tests are passed for this patch: > 1. The riscv fully regression tests. > 3. The x86 bootstrap tests. > 4. The x86 fully regression tests. > > PR target/51492 > PR target/112600 > > gcc/ChangeLog: > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > to the return true switch case(s). > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > * match.pd: Add unsigned SAT_ADD match(es). > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > us/ssadd. > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > extern func decl generated in match.pd match. > (match_saturation_arith): New func impl to match the saturation arith. > (math_opts_dom_walker::after_dom_children): Try match saturation > arith when IOR expr. > LGTM but you'll need an OK from Richard, Thanks for working on this! Tamar > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/internal-fn.cc | 1 + > gcc/internal-fn.def | 2 ++ > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > gcc/optabs.def | 4 +-- > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > 5 files changed, 88 insertions(+), 2 deletions(-) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 0a7053c2286..73045ca8c8c 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > case IFN_UBSAN_CHECK_MUL: > case IFN_ADD_OVERFLOW: > case IFN_MUL_OVERFLOW: > + case IFN_SAT_ADD: > case IFN_VEC_WIDEN_PLUS: > case IFN_VEC_WIDEN_PLUS_LO: > case IFN_VEC_WIDEN_PLUS_HI: > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 848bb9dbff3..25badbb86e5 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > | ECF_NOTHROW, first, > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > first, > smulhrs, umulhrs, binary) > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > binary) > + > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > diff --git a/gcc/match.pd b/gcc/match.pd > index 07e743ae464..0f9c34fa897 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > || POINTER_TYPE_P (itype)) > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > +/* Unsigned Saturation Add */ > +(match (usadd_left_part_1 @0 @1) > + (plus:c @0 @1) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_left_part_2 @0 @1) > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (lt (plus:c @0 @1) @0))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (gt @0 (plus:c @0 @1)))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_2 @0 @1) > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > integer_zerop))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > + because the sub part of left_part_2 cannot work with right_part_1. > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > + > +/* Unsigned saturation add, case 1 (branchless): > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > + > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > + > /* x > y && x != XXX_MIN --> x > y > x > y && x == XXX_MIN --> false . */ > (for eqne (eq ne) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index ad14f9328b9..3f2cb46aff8 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > OPTAB_NX(add_optab, "add$Q$a3") > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > OPTAB_VX(addv_optab, "add$F$a3") > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > OPTAB_NX(sub_optab, "sub$F$a3") > OPTAB_NX(sub_optab, "sub$Q$a3") > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index e8c804f09b7..62da1c5ee08 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > *cast_stmt, gimple *&use_stmt, > return 0; > } > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > + > +/* > + * Try to match saturation arith pattern(s). > + * 1. SAT_ADD (unsigned) > + * _7 = _4 + _6; > + * _8 = _4 > _7; > + * _9 = (long unsigned int) _8; > + * _10 = -_9; > + * _12 = _7 | _10; > + * => > + * _12 = .SAT_ADD (_4, _6); */ > +static void > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > +{ > + gcall *call = NULL; > + > + tree ops[2]; > + tree lhs = gimple_assign_lhs (stmt); > + > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > + OPTIMIZE_FOR_BOTH)) > + { > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > + gimple_call_set_lhs (call, lhs); > + gsi_replace (gsi, call, true); > + } > +} > + > /* Recognize for unsigned x > x = y - z; > if (x > y) > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > (basic_block bb) > break; > > case BIT_IOR_EXPR: > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > + /* fall-through */ > case BIT_XOR_EXPR: > match_uaddc_usubc (&gsi, stmt, code); > break; > -- > 2.34.1
On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote: > > > LGTM but you'll need an OK from Richard, > > Thanks for working on this! > > Thanks Tamar for help and coaching, let's wait Richard for a while,😊! OK. Thanks for the patience, Richard. > Pan > > -----Original Message----- > From: Tamar Christina <Tamar.Christina@arm.com> > Sent: Wednesday, May 15, 2024 5:12 PM > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int > > Hi Pan, > > Thanks! > > > -----Original Message----- > > From: pan2.li@intel.com <pan2.li@intel.com> > > Sent: Wednesday, May 15, 2024 3:14 AM > > To: gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar > > int > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch would like to add the middle-end presentation for the > > saturation add. Aka set the result of add to the max when overflow. > > It will take the pattern similar as below. > > > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > > > Take uint8_t as example, we will have: > > > > * SAT_ADD (1, 254) => 255. > > * SAT_ADD (1, 255) => 255. > > * SAT_ADD (2, 255) => 255. > > * SAT_ADD (255, 255) => 255. > > > > Given below example for the unsigned scalar integer uint64_t: > > > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > > { > > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > > } > > > > Before this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > long unsigned int _1; > > _Bool _2; > > long unsigned int _3; > > long unsigned int _4; > > uint64_t _7; > > long unsigned int _10; > > __complex__ long unsigned int _11; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > > _1 = REALPART_EXPR <_11>; > > _10 = IMAGPART_EXPR <_11>; > > _2 = _10 != 0; > > _3 = (long unsigned int) _2; > > _4 = -_3; > > _7 = _1 | _4; > > return _7; > > ;; succ: EXIT > > > > } > > > > After this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > uint64_t _7; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > > return _7; > > ;; succ: EXIT > > } > > > > The below tests are passed for this patch: > > 1. The riscv fully regression tests. > > 3. The x86 bootstrap tests. > > 4. The x86 fully regression tests. > > > > PR target/51492 > > PR target/112600 > > > > gcc/ChangeLog: > > > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > > to the return true switch case(s). > > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > > * match.pd: Add unsigned SAT_ADD match(es). > > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > > us/ssadd. > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > > extern func decl generated in match.pd match. > > (match_saturation_arith): New func impl to match the saturation arith. > > (math_opts_dom_walker::after_dom_children): Try match saturation > > arith when IOR expr. > > > > LGTM but you'll need an OK from Richard, > > Thanks for working on this! > > Tamar > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/internal-fn.cc | 1 + > > gcc/internal-fn.def | 2 ++ > > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > > gcc/optabs.def | 4 +-- > > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > > 5 files changed, 88 insertions(+), 2 deletions(-) > > > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > > index 0a7053c2286..73045ca8c8c 100644 > > --- a/gcc/internal-fn.cc > > +++ b/gcc/internal-fn.cc > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > > case IFN_UBSAN_CHECK_MUL: > > case IFN_ADD_OVERFLOW: > > case IFN_MUL_OVERFLOW: > > + case IFN_SAT_ADD: > > case IFN_VEC_WIDEN_PLUS: > > case IFN_VEC_WIDEN_PLUS_LO: > > case IFN_VEC_WIDEN_PLUS_HI: > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 848bb9dbff3..25badbb86e5 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > > | ECF_NOTHROW, first, > > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > > first, > > smulhrs, umulhrs, binary) > > > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > > binary) > > + > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 07e743ae464..0f9c34fa897 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > || POINTER_TYPE_P (itype)) > > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > > > +/* Unsigned Saturation Add */ > > +(match (usadd_left_part_1 @0 @1) > > + (plus:c @0 @1) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_left_part_2 @0 @1) > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (lt (plus:c @0 @1) @0))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (gt @0 (plus:c @0 @1)))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_2 @0 @1) > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > > integer_zerop))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > > + because the sub part of left_part_2 cannot work with right_part_1. > > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > > + > > +/* Unsigned saturation add, case 1 (branchless): > > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > + > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > + > > /* x > y && x != XXX_MIN --> x > y > > x > y && x == XXX_MIN --> false . */ > > (for eqne (eq ne) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index ad14f9328b9..3f2cb46aff8 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > > OPTAB_NX(add_optab, "add$Q$a3") > > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > > OPTAB_VX(addv_optab, "add$F$a3") > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > > OPTAB_NX(sub_optab, "sub$F$a3") > > OPTAB_NX(sub_optab, "sub$Q$a3") > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index e8c804f09b7..62da1c5ee08 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > > *cast_stmt, gimple *&use_stmt, > > return 0; > > } > > > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > + > > +/* > > + * Try to match saturation arith pattern(s). > > + * 1. SAT_ADD (unsigned) > > + * _7 = _4 + _6; > > + * _8 = _4 > _7; > > + * _9 = (long unsigned int) _8; > > + * _10 = -_9; > > + * _12 = _7 | _10; > > + * => > > + * _12 = .SAT_ADD (_4, _6); */ > > +static void > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > +{ > > + gcall *call = NULL; > > + > > + tree ops[2]; > > + tree lhs = gimple_assign_lhs (stmt); > > + > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > + OPTIMIZE_FOR_BOTH)) > > + { > > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > + gimple_call_set_lhs (call, lhs); > > + gsi_replace (gsi, call, true); > > + } > > +} > > + > > /* Recognize for unsigned x > > x = y - z; > > if (x > y) > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > > (basic_block bb) > > break; > > > > case BIT_IOR_EXPR: > > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > + /* fall-through */ > > case BIT_XOR_EXPR: > > match_uaddc_usubc (&gsi, stmt, code); > > break; > > -- > > 2.34.1 >
> OK. Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end? Thanks again for reviewing and suggestions. Pan -----Original Message----- From: Richard Biener <richard.guenther@gmail.com> Sent: Thursday, May 16, 2024 4:10 PM To: Li, Pan2 <pan2.li@intel.com> Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote: > > > LGTM but you'll need an OK from Richard, > > Thanks for working on this! > > Thanks Tamar for help and coaching, let's wait Richard for a while,😊! OK. Thanks for the patience, Richard. > Pan > > -----Original Message----- > From: Tamar Christina <Tamar.Christina@arm.com> > Sent: Wednesday, May 15, 2024 5:12 PM > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int > > Hi Pan, > > Thanks! > > > -----Original Message----- > > From: pan2.li@intel.com <pan2.li@intel.com> > > Sent: Wednesday, May 15, 2024 3:14 AM > > To: gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar > > int > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch would like to add the middle-end presentation for the > > saturation add. Aka set the result of add to the max when overflow. > > It will take the pattern similar as below. > > > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > > > Take uint8_t as example, we will have: > > > > * SAT_ADD (1, 254) => 255. > > * SAT_ADD (1, 255) => 255. > > * SAT_ADD (2, 255) => 255. > > * SAT_ADD (255, 255) => 255. > > > > Given below example for the unsigned scalar integer uint64_t: > > > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > > { > > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > > } > > > > Before this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > long unsigned int _1; > > _Bool _2; > > long unsigned int _3; > > long unsigned int _4; > > uint64_t _7; > > long unsigned int _10; > > __complex__ long unsigned int _11; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > > _1 = REALPART_EXPR <_11>; > > _10 = IMAGPART_EXPR <_11>; > > _2 = _10 != 0; > > _3 = (long unsigned int) _2; > > _4 = -_3; > > _7 = _1 | _4; > > return _7; > > ;; succ: EXIT > > > > } > > > > After this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > uint64_t _7; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > > return _7; > > ;; succ: EXIT > > } > > > > The below tests are passed for this patch: > > 1. The riscv fully regression tests. > > 3. The x86 bootstrap tests. > > 4. The x86 fully regression tests. > > > > PR target/51492 > > PR target/112600 > > > > gcc/ChangeLog: > > > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > > to the return true switch case(s). > > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > > * match.pd: Add unsigned SAT_ADD match(es). > > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > > us/ssadd. > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > > extern func decl generated in match.pd match. > > (match_saturation_arith): New func impl to match the saturation arith. > > (math_opts_dom_walker::after_dom_children): Try match saturation > > arith when IOR expr. > > > > LGTM but you'll need an OK from Richard, > > Thanks for working on this! > > Tamar > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/internal-fn.cc | 1 + > > gcc/internal-fn.def | 2 ++ > > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > > gcc/optabs.def | 4 +-- > > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > > 5 files changed, 88 insertions(+), 2 deletions(-) > > > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > > index 0a7053c2286..73045ca8c8c 100644 > > --- a/gcc/internal-fn.cc > > +++ b/gcc/internal-fn.cc > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > > case IFN_UBSAN_CHECK_MUL: > > case IFN_ADD_OVERFLOW: > > case IFN_MUL_OVERFLOW: > > + case IFN_SAT_ADD: > > case IFN_VEC_WIDEN_PLUS: > > case IFN_VEC_WIDEN_PLUS_LO: > > case IFN_VEC_WIDEN_PLUS_HI: > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 848bb9dbff3..25badbb86e5 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > > | ECF_NOTHROW, first, > > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > > first, > > smulhrs, umulhrs, binary) > > > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > > binary) > > + > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 07e743ae464..0f9c34fa897 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > || POINTER_TYPE_P (itype)) > > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > > > +/* Unsigned Saturation Add */ > > +(match (usadd_left_part_1 @0 @1) > > + (plus:c @0 @1) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_left_part_2 @0 @1) > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (lt (plus:c @0 @1) @0))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (gt @0 (plus:c @0 @1)))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_2 @0 @1) > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > > integer_zerop))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > > + because the sub part of left_part_2 cannot work with right_part_1. > > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > > + > > +/* Unsigned saturation add, case 1 (branchless): > > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > + > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > + > > /* x > y && x != XXX_MIN --> x > y > > x > y && x == XXX_MIN --> false . */ > > (for eqne (eq ne) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index ad14f9328b9..3f2cb46aff8 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > > OPTAB_NX(add_optab, "add$Q$a3") > > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > > OPTAB_VX(addv_optab, "add$F$a3") > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > > OPTAB_NX(sub_optab, "sub$F$a3") > > OPTAB_NX(sub_optab, "sub$Q$a3") > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index e8c804f09b7..62da1c5ee08 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > > *cast_stmt, gimple *&use_stmt, > > return 0; > > } > > > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > + > > +/* > > + * Try to match saturation arith pattern(s). > > + * 1. SAT_ADD (unsigned) > > + * _7 = _4 + _6; > > + * _8 = _4 > _7; > > + * _9 = (long unsigned int) _8; > > + * _10 = -_9; > > + * _12 = _7 | _10; > > + * => > > + * _12 = .SAT_ADD (_4, _6); */ > > +static void > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > +{ > > + gcall *call = NULL; > > + > > + tree ops[2]; > > + tree lhs = gimple_assign_lhs (stmt); > > + > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > + OPTIMIZE_FOR_BOTH)) > > + { > > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > + gimple_call_set_lhs (call, lhs); > > + gsi_replace (gsi, call, true); > > + } > > +} > > + > > /* Recognize for unsigned x > > x = y - z; > > if (x > y) > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > > (basic_block bb) > > break; > > > > case BIT_IOR_EXPR: > > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > + /* fall-through */ > > case BIT_XOR_EXPR: > > match_uaddc_usubc (&gsi, stmt, code); > > break; > > -- > > 2.34.1 >
On Thu, May 16, 2024 at 11:35 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > OK. > > Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end? > Thanks again for reviewing and suggestions. For the series, the riscv specific part of course needs riscv approval. > Pan > > -----Original Message----- > From: Richard Biener <richard.guenther@gmail.com> > Sent: Thursday, May 16, 2024 4:10 PM > To: Li, Pan2 <pan2.li@intel.com> > Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> > Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int > > On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote: > > > > > LGTM but you'll need an OK from Richard, > > > Thanks for working on this! > > > > Thanks Tamar for help and coaching, let's wait Richard for a while,😊! > > OK. > > Thanks for the patience, > Richard. > > > Pan > > > > -----Original Message----- > > From: Tamar Christina <Tamar.Christina@arm.com> > > Sent: Wednesday, May 15, 2024 5:12 PM > > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> > > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int > > > > Hi Pan, > > > > Thanks! > > > > > -----Original Message----- > > > From: pan2.li@intel.com <pan2.li@intel.com> > > > Sent: Wednesday, May 15, 2024 3:14 AM > > > To: gcc-patches@gcc.gnu.org > > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar > > > int > > > > > > From: Pan Li <pan2.li@intel.com> > > > > > > This patch would like to add the middle-end presentation for the > > > saturation add. Aka set the result of add to the max when overflow. > > > It will take the pattern similar as below. > > > > > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > > > > > Take uint8_t as example, we will have: > > > > > > * SAT_ADD (1, 254) => 255. > > > * SAT_ADD (1, 255) => 255. > > > * SAT_ADD (2, 255) => 255. > > > * SAT_ADD (255, 255) => 255. > > > > > > Given below example for the unsigned scalar integer uint64_t: > > > > > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > > > { > > > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > > > } > > > > > > Before this patch: > > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > > { > > > long unsigned int _1; > > > _Bool _2; > > > long unsigned int _3; > > > long unsigned int _4; > > > uint64_t _7; > > > long unsigned int _10; > > > __complex__ long unsigned int _11; > > > > > > ;; basic block 2, loop depth 0 > > > ;; pred: ENTRY > > > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > > > _1 = REALPART_EXPR <_11>; > > > _10 = IMAGPART_EXPR <_11>; > > > _2 = _10 != 0; > > > _3 = (long unsigned int) _2; > > > _4 = -_3; > > > _7 = _1 | _4; > > > return _7; > > > ;; succ: EXIT > > > > > > } > > > > > > After this patch: > > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > > { > > > uint64_t _7; > > > > > > ;; basic block 2, loop depth 0 > > > ;; pred: ENTRY > > > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > > > return _7; > > > ;; succ: EXIT > > > } > > > > > > The below tests are passed for this patch: > > > 1. The riscv fully regression tests. > > > 3. The x86 bootstrap tests. > > > 4. The x86 fully regression tests. > > > > > > PR target/51492 > > > PR target/112600 > > > > > > gcc/ChangeLog: > > > > > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > > > to the return true switch case(s). > > > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > > > * match.pd: Add unsigned SAT_ADD match(es). > > > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > > > us/ssadd. > > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > > > extern func decl generated in match.pd match. > > > (match_saturation_arith): New func impl to match the saturation arith. > > > (math_opts_dom_walker::after_dom_children): Try match saturation > > > arith when IOR expr. > > > > > > > LGTM but you'll need an OK from Richard, > > > > Thanks for working on this! > > > > Tamar > > > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > > --- > > > gcc/internal-fn.cc | 1 + > > > gcc/internal-fn.def | 2 ++ > > > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > > > gcc/optabs.def | 4 +-- > > > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > > > 5 files changed, 88 insertions(+), 2 deletions(-) > > > > > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > > > index 0a7053c2286..73045ca8c8c 100644 > > > --- a/gcc/internal-fn.cc > > > +++ b/gcc/internal-fn.cc > > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > > > case IFN_UBSAN_CHECK_MUL: > > > case IFN_ADD_OVERFLOW: > > > case IFN_MUL_OVERFLOW: > > > + case IFN_SAT_ADD: > > > case IFN_VEC_WIDEN_PLUS: > > > case IFN_VEC_WIDEN_PLUS_LO: > > > case IFN_VEC_WIDEN_PLUS_HI: > > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > > index 848bb9dbff3..25badbb86e5 100644 > > > --- a/gcc/internal-fn.def > > > +++ b/gcc/internal-fn.def > > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > > > | ECF_NOTHROW, first, > > > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > > > first, > > > smulhrs, umulhrs, binary) > > > > > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > > > binary) > > > + > > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > index 07e743ae464..0f9c34fa897 100644 > > > --- a/gcc/match.pd > > > +++ b/gcc/match.pd > > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > > || POINTER_TYPE_P (itype)) > > > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > > > > > +/* Unsigned Saturation Add */ > > > +(match (usadd_left_part_1 @0 @1) > > > + (plus:c @0 @1) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_left_part_2 @0 @1) > > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_right_part_1 @0 @1) > > > + (negate (convert (lt (plus:c @0 @1) @0))) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_right_part_1 @0 @1) > > > + (negate (convert (gt @0 (plus:c @0 @1)))) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_right_part_2 @0 @1) > > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > > > integer_zerop))) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > > > + because the sub part of left_part_2 cannot work with right_part_1. > > > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > > > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > > > + > > > +/* Unsigned saturation add, case 1 (branchless): > > > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > > > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > > > +(match (unsigned_integer_sat_add @0 @1) > > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > > + > > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > > > +(match (unsigned_integer_sat_add @0 @1) > > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > > + > > > /* x > y && x != XXX_MIN --> x > y > > > x > y && x == XXX_MIN --> false . */ > > > (for eqne (eq ne) > > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > > index ad14f9328b9..3f2cb46aff8 100644 > > > --- a/gcc/optabs.def > > > +++ b/gcc/optabs.def > > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > > > OPTAB_NX(add_optab, "add$Q$a3") > > > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > > > OPTAB_VX(addv_optab, "add$F$a3") > > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > > > gen_signed_fixed_libfunc) > > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > > > gen_unsigned_fixed_libfunc) > > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > > > gen_signed_fixed_libfunc) > > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > > > gen_unsigned_fixed_libfunc) > > > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > > > OPTAB_NX(sub_optab, "sub$F$a3") > > > OPTAB_NX(sub_optab, "sub$Q$a3") > > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > > index e8c804f09b7..62da1c5ee08 100644 > > > --- a/gcc/tree-ssa-math-opts.cc > > > +++ b/gcc/tree-ssa-math-opts.cc > > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > > > *cast_stmt, gimple *&use_stmt, > > > return 0; > > > } > > > > > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > > + > > > +/* > > > + * Try to match saturation arith pattern(s). > > > + * 1. SAT_ADD (unsigned) > > > + * _7 = _4 + _6; > > > + * _8 = _4 > _7; > > > + * _9 = (long unsigned int) _8; > > > + * _10 = -_9; > > > + * _12 = _7 | _10; > > > + * => > > > + * _12 = .SAT_ADD (_4, _6); */ > > > +static void > > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > > +{ > > > + gcall *call = NULL; > > > + > > > + tree ops[2]; > > > + tree lhs = gimple_assign_lhs (stmt); > > > + > > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > > + OPTIMIZE_FOR_BOTH)) > > > + { > > > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > > + gimple_call_set_lhs (call, lhs); > > > + gsi_replace (gsi, call, true); > > > + } > > > +} > > > + > > > /* Recognize for unsigned x > > > x = y - z; > > > if (x > y) > > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > > > (basic_block bb) > > > break; > > > > > > case BIT_IOR_EXPR: > > > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > > + /* fall-through */ > > > case BIT_XOR_EXPR: > > > match_uaddc_usubc (&gsi, stmt, code); > > > break; > > > -- > > > 2.34.1 > >
> For the series, the riscv specific part of course needs riscv approval. Thanks a lot, have a nice day! Pan -----Original Message----- From: Richard Biener <richard.guenther@gmail.com> Sent: Thursday, May 16, 2024 7:59 PM To: Li, Pan2 <pan2.li@intel.com> Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int On Thu, May 16, 2024 at 11:35 AM Li, Pan2 <pan2.li@intel.com> wrote: > > > OK. > > Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end? > Thanks again for reviewing and suggestions. For the series, the riscv specific part of course needs riscv approval. > Pan > > -----Original Message----- > From: Richard Biener <richard.guenther@gmail.com> > Sent: Thursday, May 16, 2024 4:10 PM > To: Li, Pan2 <pan2.li@intel.com> > Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> > Subject: Re: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int > > On Wed, May 15, 2024 at 1:36 PM Li, Pan2 <pan2.li@intel.com> wrote: > > > > > LGTM but you'll need an OK from Richard, > > > Thanks for working on this! > > > > Thanks Tamar for help and coaching, let's wait Richard for a while,😊! > > OK. > > Thanks for the patience, > Richard. > > > Pan > > > > -----Original Message----- > > From: Tamar Christina <Tamar.Christina@arm.com> > > Sent: Wednesday, May 15, 2024 5:12 PM > > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> > > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int > > > > Hi Pan, > > > > Thanks! > > > > > -----Original Message----- > > > From: pan2.li@intel.com <pan2.li@intel.com> > > > Sent: Wednesday, May 15, 2024 3:14 AM > > > To: gcc-patches@gcc.gnu.org > > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar > > > int > > > > > > From: Pan Li <pan2.li@intel.com> > > > > > > This patch would like to add the middle-end presentation for the > > > saturation add. Aka set the result of add to the max when overflow. > > > It will take the pattern similar as below. > > > > > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > > > > > Take uint8_t as example, we will have: > > > > > > * SAT_ADD (1, 254) => 255. > > > * SAT_ADD (1, 255) => 255. > > > * SAT_ADD (2, 255) => 255. > > > * SAT_ADD (255, 255) => 255. > > > > > > Given below example for the unsigned scalar integer uint64_t: > > > > > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > > > { > > > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > > > } > > > > > > Before this patch: > > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > > { > > > long unsigned int _1; > > > _Bool _2; > > > long unsigned int _3; > > > long unsigned int _4; > > > uint64_t _7; > > > long unsigned int _10; > > > __complex__ long unsigned int _11; > > > > > > ;; basic block 2, loop depth 0 > > > ;; pred: ENTRY > > > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > > > _1 = REALPART_EXPR <_11>; > > > _10 = IMAGPART_EXPR <_11>; > > > _2 = _10 != 0; > > > _3 = (long unsigned int) _2; > > > _4 = -_3; > > > _7 = _1 | _4; > > > return _7; > > > ;; succ: EXIT > > > > > > } > > > > > > After this patch: > > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > > { > > > uint64_t _7; > > > > > > ;; basic block 2, loop depth 0 > > > ;; pred: ENTRY > > > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > > > return _7; > > > ;; succ: EXIT > > > } > > > > > > The below tests are passed for this patch: > > > 1. The riscv fully regression tests. > > > 3. The x86 bootstrap tests. > > > 4. The x86 fully regression tests. > > > > > > PR target/51492 > > > PR target/112600 > > > > > > gcc/ChangeLog: > > > > > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > > > to the return true switch case(s). > > > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > > > * match.pd: Add unsigned SAT_ADD match(es). > > > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > > > us/ssadd. > > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > > > extern func decl generated in match.pd match. > > > (match_saturation_arith): New func impl to match the saturation arith. > > > (math_opts_dom_walker::after_dom_children): Try match saturation > > > arith when IOR expr. > > > > > > > LGTM but you'll need an OK from Richard, > > > > Thanks for working on this! > > > > Tamar > > > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > > --- > > > gcc/internal-fn.cc | 1 + > > > gcc/internal-fn.def | 2 ++ > > > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > > > gcc/optabs.def | 4 +-- > > > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > > > 5 files changed, 88 insertions(+), 2 deletions(-) > > > > > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > > > index 0a7053c2286..73045ca8c8c 100644 > > > --- a/gcc/internal-fn.cc > > > +++ b/gcc/internal-fn.cc > > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > > > case IFN_UBSAN_CHECK_MUL: > > > case IFN_ADD_OVERFLOW: > > > case IFN_MUL_OVERFLOW: > > > + case IFN_SAT_ADD: > > > case IFN_VEC_WIDEN_PLUS: > > > case IFN_VEC_WIDEN_PLUS_LO: > > > case IFN_VEC_WIDEN_PLUS_HI: > > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > > index 848bb9dbff3..25badbb86e5 100644 > > > --- a/gcc/internal-fn.def > > > +++ b/gcc/internal-fn.def > > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > > > | ECF_NOTHROW, first, > > > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > > > first, > > > smulhrs, umulhrs, binary) > > > > > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > > > binary) > > > + > > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > > > diff --git a/gcc/match.pd b/gcc/match.pd > > > index 07e743ae464..0f9c34fa897 100644 > > > --- a/gcc/match.pd > > > +++ b/gcc/match.pd > > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > > || POINTER_TYPE_P (itype)) > > > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > > > > > +/* Unsigned Saturation Add */ > > > +(match (usadd_left_part_1 @0 @1) > > > + (plus:c @0 @1) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_left_part_2 @0 @1) > > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_right_part_1 @0 @1) > > > + (negate (convert (lt (plus:c @0 @1) @0))) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_right_part_1 @0 @1) > > > + (negate (convert (gt @0 (plus:c @0 @1)))) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +(match (usadd_right_part_2 @0 @1) > > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > > > integer_zerop))) > > > + (if (INTEGRAL_TYPE_P (type) > > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@0)) > > > + && types_match (type, TREE_TYPE (@1))))) > > > + > > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > > > + because the sub part of left_part_2 cannot work with right_part_1. > > > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > > > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > > > + > > > +/* Unsigned saturation add, case 1 (branchless): > > > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > > > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > > > +(match (unsigned_integer_sat_add @0 @1) > > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > > + > > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > > > +(match (unsigned_integer_sat_add @0 @1) > > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > > + > > > /* x > y && x != XXX_MIN --> x > y > > > x > y && x == XXX_MIN --> false . */ > > > (for eqne (eq ne) > > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > > index ad14f9328b9..3f2cb46aff8 100644 > > > --- a/gcc/optabs.def > > > +++ b/gcc/optabs.def > > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > > > OPTAB_NX(add_optab, "add$Q$a3") > > > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > > > OPTAB_VX(addv_optab, "add$F$a3") > > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > > > gen_signed_fixed_libfunc) > > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > > > gen_unsigned_fixed_libfunc) > > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > > > gen_signed_fixed_libfunc) > > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > > > gen_unsigned_fixed_libfunc) > > > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > > > OPTAB_NX(sub_optab, "sub$F$a3") > > > OPTAB_NX(sub_optab, "sub$Q$a3") > > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > > index e8c804f09b7..62da1c5ee08 100644 > > > --- a/gcc/tree-ssa-math-opts.cc > > > +++ b/gcc/tree-ssa-math-opts.cc > > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > > > *cast_stmt, gimple *&use_stmt, > > > return 0; > > > } > > > > > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > > + > > > +/* > > > + * Try to match saturation arith pattern(s). > > > + * 1. SAT_ADD (unsigned) > > > + * _7 = _4 + _6; > > > + * _8 = _4 > _7; > > > + * _9 = (long unsigned int) _8; > > > + * _10 = -_9; > > > + * _12 = _7 | _10; > > > + * => > > > + * _12 = .SAT_ADD (_4, _6); */ > > > +static void > > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > > +{ > > > + gcall *call = NULL; > > > + > > > + tree ops[2]; > > > + tree lhs = gimple_assign_lhs (stmt); > > > + > > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > > + OPTIMIZE_FOR_BOTH)) > > > + { > > > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > > + gimple_call_set_lhs (call, lhs); > > > + gsi_replace (gsi, call, true); > > > + } > > > +} > > > + > > > /* Recognize for unsigned x > > > x = y - z; > > > if (x > y) > > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > > > (basic_block bb) > > > break; > > > > > > case BIT_IOR_EXPR: > > > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > > + /* fall-through */ > > > case BIT_XOR_EXPR: > > > match_uaddc_usubc (&gsi, stmt, code); > > > break; > > > -- > > > 2.34.1 > >
On 5/16/24 5:58 AM, Richard Biener wrote: > On Thu, May 16, 2024 at 11:35 AM Li, Pan2 <pan2.li@intel.com> wrote: >> >>> OK. >> >> Thanks Richard for help and coaching. To double confirm, are you OK with this patch only or for the series patch(es) of SAT middle-end? >> Thanks again for reviewing and suggestions. > > For the series, the riscv specific part of course needs riscv approval. Yea, we'll take a look at it. Tons of stuff to go through, but this is definitely on the list. jeff
Hi Tamar, I am trying to add more shape(s) like below branch version for SAT_ADD. I suspect that widening_mul may not be the best place to take care of this shape. Because after_dom_children almost works on bb but we actually need to find the def/use cross the bb. Thus, is there any suggestion for branch shape? Add new simplify to match.pd works well but it is not recommended per previous discussion. Thanks a lot for help! Pan -------Source code--------- #define SAT_ADD_U_1(T) \ T sat_add_u_1_##T(T x, T y) \ { \ return (T)(x + y) >= x ? (x + y) : -1; \ } SAT_ADD_U_1(uint16_t) -------Gimple--------- uint16_t sat_add_u_1_uint16_t (uint16_t x, uint16_t y) { short unsigned int _1; uint16_t _2; <bb 2> [local count: 1073741824]: _1 = x_3(D) + y_4(D); if (_1 >= x_3(D)) goto <bb 3>; [65.00%] else goto <bb 4>; [35.00%] <bb 3> [local count: 697932184]: <bb 4> [local count: 1073741824]: # _2 = PHI <65535(2), _1(3)> return _2; } Pan -----Original Message----- From: Tamar Christina <Tamar.Christina@arm.com> Sent: Wednesday, May 15, 2024 5:12 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int Hi Pan, Thanks! > -----Original Message----- > From: pan2.li@intel.com <pan2.li@intel.com> > Sent: Wednesday, May 15, 2024 3:14 AM > To: gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar > int > > From: Pan Li <pan2.li@intel.com> > > This patch would like to add the middle-end presentation for the > saturation add. Aka set the result of add to the max when overflow. > It will take the pattern similar as below. > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > Take uint8_t as example, we will have: > > * SAT_ADD (1, 254) => 255. > * SAT_ADD (1, 255) => 255. > * SAT_ADD (2, 255) => 255. > * SAT_ADD (255, 255) => 255. > > Given below example for the unsigned scalar integer uint64_t: > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > { > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > } > > Before this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > long unsigned int _1; > _Bool _2; > long unsigned int _3; > long unsigned int _4; > uint64_t _7; > long unsigned int _10; > __complex__ long unsigned int _11; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > _1 = REALPART_EXPR <_11>; > _10 = IMAGPART_EXPR <_11>; > _2 = _10 != 0; > _3 = (long unsigned int) _2; > _4 = -_3; > _7 = _1 | _4; > return _7; > ;; succ: EXIT > > } > > After this patch: > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > { > uint64_t _7; > > ;; basic block 2, loop depth 0 > ;; pred: ENTRY > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > return _7; > ;; succ: EXIT > } > > The below tests are passed for this patch: > 1. The riscv fully regression tests. > 3. The x86 bootstrap tests. > 4. The x86 fully regression tests. > > PR target/51492 > PR target/112600 > > gcc/ChangeLog: > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > to the return true switch case(s). > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > * match.pd: Add unsigned SAT_ADD match(es). > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > us/ssadd. > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > extern func decl generated in match.pd match. > (match_saturation_arith): New func impl to match the saturation arith. > (math_opts_dom_walker::after_dom_children): Try match saturation > arith when IOR expr. > LGTM but you'll need an OK from Richard, Thanks for working on this! Tamar > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/internal-fn.cc | 1 + > gcc/internal-fn.def | 2 ++ > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > gcc/optabs.def | 4 +-- > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > 5 files changed, 88 insertions(+), 2 deletions(-) > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index 0a7053c2286..73045ca8c8c 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > case IFN_UBSAN_CHECK_MUL: > case IFN_ADD_OVERFLOW: > case IFN_MUL_OVERFLOW: > + case IFN_SAT_ADD: > case IFN_VEC_WIDEN_PLUS: > case IFN_VEC_WIDEN_PLUS_LO: > case IFN_VEC_WIDEN_PLUS_HI: > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index 848bb9dbff3..25badbb86e5 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST > | ECF_NOTHROW, first, > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > first, > smulhrs, umulhrs, binary) > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > binary) > + > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > diff --git a/gcc/match.pd b/gcc/match.pd > index 07e743ae464..0f9c34fa897 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > || POINTER_TYPE_P (itype)) > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > +/* Unsigned Saturation Add */ > +(match (usadd_left_part_1 @0 @1) > + (plus:c @0 @1) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_left_part_2 @0 @1) > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (lt (plus:c @0 @1) @0))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_1 @0 @1) > + (negate (convert (gt @0 (plus:c @0 @1)))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +(match (usadd_right_part_2 @0 @1) > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > integer_zerop))) > + (if (INTEGRAL_TYPE_P (type) > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@0)) > + && types_match (type, TREE_TYPE (@1))))) > + > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > + because the sub part of left_part_2 cannot work with right_part_1. > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > + > +/* Unsigned saturation add, case 1 (branchless): > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > + > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > +(match (unsigned_integer_sat_add @0 @1) > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > + > /* x > y && x != XXX_MIN --> x > y > x > y && x == XXX_MIN --> false . */ > (for eqne (eq ne) > diff --git a/gcc/optabs.def b/gcc/optabs.def > index ad14f9328b9..3f2cb46aff8 100644 > --- a/gcc/optabs.def > +++ b/gcc/optabs.def > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > OPTAB_NX(add_optab, "add$Q$a3") > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > OPTAB_VX(addv_optab, "add$F$a3") > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > gen_signed_fixed_libfunc) > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > gen_unsigned_fixed_libfunc) > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) > OPTAB_NX(sub_optab, "sub$F$a3") > OPTAB_NX(sub_optab, "sub$Q$a3") > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index e8c804f09b7..62da1c5ee08 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > *cast_stmt, gimple *&use_stmt, > return 0; > } > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > + > +/* > + * Try to match saturation arith pattern(s). > + * 1. SAT_ADD (unsigned) > + * _7 = _4 + _6; > + * _8 = _4 > _7; > + * _9 = (long unsigned int) _8; > + * _10 = -_9; > + * _12 = _7 | _10; > + * => > + * _12 = .SAT_ADD (_4, _6); */ > +static void > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > +{ > + gcall *call = NULL; > + > + tree ops[2]; > + tree lhs = gimple_assign_lhs (stmt); > + > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > + OPTIMIZE_FOR_BOTH)) > + { > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > + gimple_call_set_lhs (call, lhs); > + gsi_replace (gsi, call, true); > + } > +} > + > /* Recognize for unsigned x > x = y - z; > if (x > y) > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > (basic_block bb) > break; > > case BIT_IOR_EXPR: > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > + /* fall-through */ > case BIT_XOR_EXPR: > match_uaddc_usubc (&gsi, stmt, code); > break; > -- > 2.34.1
Hi Pan, > > Hi Tamar, > > I am trying to add more shape(s) like below branch version for SAT_ADD. I suspect > that widening_mul may not be the best place to take care of this shape. > Because after_dom_children almost works on bb but we actually need to find the > def/use cross the bb. It actually already does this, see for example optimize_spaceship which optimizes across basic blocks. However... > > Thus, is there any suggestion for branch shape? Add new simplify to match.pd > works well but it is not recommended per previous discussion. The objection previously was not to introduce the IFNs at match.pd, it doesn't mean we can't use match.pd to force the versions with branches to banchless code so the existing patterns can deal with them as is. ...in this case something like this: #if GIMPLE (simplify (cond (ge (plus:c@3 @0 @1) @0) @3 integer_minus_onep) (if (direct_internal_fn_supported_p (...)) (bit_ior @3 (negate (...))))) #endif Works better I think. That is, for targets we know we can optimize it later on, or do something with it in the vectorizer we canonicalize it. The reason I have it guarded with the IFN is that some target maintainers objected to replacing the branch code with branchless code as their targets can more optimally deal with branches. Cheers, Tamar > > Thanks a lot for help! > > Pan > > -------Source code--------- > > #define SAT_ADD_U_1(T) \ > T sat_add_u_1_##T(T x, T y) \ > { \ > return (T)(x + y) >= x ? (x + y) : -1; \ > } > > SAT_ADD_U_1(uint16_t) > > -------Gimple--------- > > uint16_t sat_add_u_1_uint16_t (uint16_t x, uint16_t y) > { > short unsigned int _1; > uint16_t _2; > > <bb 2> [local count: 1073741824]: > _1 = x_3(D) + y_4(D); > if (_1 >= x_3(D)) > goto <bb 3>; [65.00%] > else > goto <bb 4>; [35.00%] > > <bb 3> [local count: 697932184]: > > <bb 4> [local count: 1073741824]: > # _2 = PHI <65535(2), _1(3)> > return _2; > } > > Pan > > -----Original Message----- > From: Tamar Christina <Tamar.Christina@arm.com> > Sent: Wednesday, May 15, 2024 5:12 PM > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; > Liu, Hongtao <hongtao.liu@intel.com> > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned > scalar int > > Hi Pan, > > Thanks! > > > -----Original Message----- > > From: pan2.li@intel.com <pan2.li@intel.com> > > Sent: Wednesday, May 15, 2024 3:14 AM > > To: gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned > scalar > > int > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch would like to add the middle-end presentation for the > > saturation add. Aka set the result of add to the max when overflow. > > It will take the pattern similar as below. > > > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > > > Take uint8_t as example, we will have: > > > > * SAT_ADD (1, 254) => 255. > > * SAT_ADD (1, 255) => 255. > > * SAT_ADD (2, 255) => 255. > > * SAT_ADD (255, 255) => 255. > > > > Given below example for the unsigned scalar integer uint64_t: > > > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > > { > > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > > } > > > > Before this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > long unsigned int _1; > > _Bool _2; > > long unsigned int _3; > > long unsigned int _4; > > uint64_t _7; > > long unsigned int _10; > > __complex__ long unsigned int _11; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > > _1 = REALPART_EXPR <_11>; > > _10 = IMAGPART_EXPR <_11>; > > _2 = _10 != 0; > > _3 = (long unsigned int) _2; > > _4 = -_3; > > _7 = _1 | _4; > > return _7; > > ;; succ: EXIT > > > > } > > > > After this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > uint64_t _7; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > > return _7; > > ;; succ: EXIT > > } > > > > The below tests are passed for this patch: > > 1. The riscv fully regression tests. > > 3. The x86 bootstrap tests. > > 4. The x86 fully regression tests. > > > > PR target/51492 > > PR target/112600 > > > > gcc/ChangeLog: > > > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > > to the return true switch case(s). > > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > > * match.pd: Add unsigned SAT_ADD match(es). > > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > > us/ssadd. > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > > extern func decl generated in match.pd match. > > (match_saturation_arith): New func impl to match the saturation arith. > > (math_opts_dom_walker::after_dom_children): Try match saturation > > arith when IOR expr. > > > > LGTM but you'll need an OK from Richard, > > Thanks for working on this! > > Tamar > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/internal-fn.cc | 1 + > > gcc/internal-fn.def | 2 ++ > > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > > gcc/optabs.def | 4 +-- > > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > > 5 files changed, 88 insertions(+), 2 deletions(-) > > > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > > index 0a7053c2286..73045ca8c8c 100644 > > --- a/gcc/internal-fn.cc > > +++ b/gcc/internal-fn.cc > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > > case IFN_UBSAN_CHECK_MUL: > > case IFN_ADD_OVERFLOW: > > case IFN_MUL_OVERFLOW: > > + case IFN_SAT_ADD: > > case IFN_VEC_WIDEN_PLUS: > > case IFN_VEC_WIDEN_PLUS_LO: > > case IFN_VEC_WIDEN_PLUS_HI: > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 848bb9dbff3..25badbb86e5 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, > ECF_CONST > > | ECF_NOTHROW, first, > > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > > first, > > smulhrs, umulhrs, binary) > > > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > > binary) > > + > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 07e743ae464..0f9c34fa897 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > || POINTER_TYPE_P (itype)) > > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > > > +/* Unsigned Saturation Add */ > > +(match (usadd_left_part_1 @0 @1) > > + (plus:c @0 @1) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_left_part_2 @0 @1) > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (lt (plus:c @0 @1) @0))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (gt @0 (plus:c @0 @1)))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_2 @0 @1) > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > > integer_zerop))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > > + because the sub part of left_part_2 cannot work with right_part_1. > > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > > + > > +/* Unsigned saturation add, case 1 (branchless): > > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > + > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > + > > /* x > y && x != XXX_MIN --> x > y > > x > y && x == XXX_MIN --> false . */ > > (for eqne (eq ne) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index ad14f9328b9..3f2cb46aff8 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > > OPTAB_NX(add_optab, "add$Q$a3") > > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > > OPTAB_VX(addv_optab, "add$F$a3") > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', > gen_int_fp_fixed_libfunc) > > OPTAB_NX(sub_optab, "sub$F$a3") > > OPTAB_NX(sub_optab, "sub$Q$a3") > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index e8c804f09b7..62da1c5ee08 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > > *cast_stmt, gimple *&use_stmt, > > return 0; > > } > > > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > + > > +/* > > + * Try to match saturation arith pattern(s). > > + * 1. SAT_ADD (unsigned) > > + * _7 = _4 + _6; > > + * _8 = _4 > _7; > > + * _9 = (long unsigned int) _8; > > + * _10 = -_9; > > + * _12 = _7 | _10; > > + * => > > + * _12 = .SAT_ADD (_4, _6); */ > > +static void > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > +{ > > + gcall *call = NULL; > > + > > + tree ops[2]; > > + tree lhs = gimple_assign_lhs (stmt); > > + > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > + OPTIMIZE_FOR_BOTH)) > > + { > > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > + gimple_call_set_lhs (call, lhs); > > + gsi_replace (gsi, call, true); > > + } > > +} > > + > > /* Recognize for unsigned x > > x = y - z; > > if (x > y) > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > > (basic_block bb) > > break; > > > > case BIT_IOR_EXPR: > > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > + /* fall-through */ > > case BIT_XOR_EXPR: > > match_uaddc_usubc (&gsi, stmt, code); > > break; > > -- > > 2.34.1
Thanks Tamer for enlightening, will have a try for the ingenious idea! Pan -----Original Message----- From: Tamar Christina <Tamar.Christina@arm.com> Sent: Friday, May 17, 2024 10:46 PM To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; Liu, Hongtao <hongtao.liu@intel.com> Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int Hi Pan, > > Hi Tamar, > > I am trying to add more shape(s) like below branch version for SAT_ADD. I suspect > that widening_mul may not be the best place to take care of this shape. > Because after_dom_children almost works on bb but we actually need to find the > def/use cross the bb. It actually already does this, see for example optimize_spaceship which optimizes across basic blocks. However... > > Thus, is there any suggestion for branch shape? Add new simplify to match.pd > works well but it is not recommended per previous discussion. The objection previously was not to introduce the IFNs at match.pd, it doesn't mean we can't use match.pd to force the versions with branches to banchless code so the existing patterns can deal with them as is. ...in this case something like this: #if GIMPLE (simplify (cond (ge (plus:c@3 @0 @1) @0) @3 integer_minus_onep) (if (direct_internal_fn_supported_p (...)) (bit_ior @3 (negate (...))))) #endif Works better I think. That is, for targets we know we can optimize it later on, or do something with it in the vectorizer we canonicalize it. The reason I have it guarded with the IFN is that some target maintainers objected to replacing the branch code with branchless code as their targets can more optimally deal with branches. Cheers, Tamar > > Thanks a lot for help! > > Pan > > -------Source code--------- > > #define SAT_ADD_U_1(T) \ > T sat_add_u_1_##T(T x, T y) \ > { \ > return (T)(x + y) >= x ? (x + y) : -1; \ > } > > SAT_ADD_U_1(uint16_t) > > -------Gimple--------- > > uint16_t sat_add_u_1_uint16_t (uint16_t x, uint16_t y) > { > short unsigned int _1; > uint16_t _2; > > <bb 2> [local count: 1073741824]: > _1 = x_3(D) + y_4(D); > if (_1 >= x_3(D)) > goto <bb 3>; [65.00%] > else > goto <bb 4>; [35.00%] > > <bb 3> [local count: 697932184]: > > <bb 4> [local count: 1073741824]: > # _2 = PHI <65535(2), _1(3)> > return _2; > } > > Pan > > -----Original Message----- > From: Tamar Christina <Tamar.Christina@arm.com> > Sent: Wednesday, May 15, 2024 5:12 PM > To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; > Liu, Hongtao <hongtao.liu@intel.com> > Subject: RE: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned > scalar int > > Hi Pan, > > Thanks! > > > -----Original Message----- > > From: pan2.li@intel.com <pan2.li@intel.com> > > Sent: Wednesday, May 15, 2024 3:14 AM > > To: gcc-patches@gcc.gnu.org > > Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; Tamar Christina > > <Tamar.Christina@arm.com>; richard.guenther@gmail.com; > > hongtao.liu@intel.com; Pan Li <pan2.li@intel.com> > > Subject: [PATCH v5 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned > scalar > > int > > > > From: Pan Li <pan2.li@intel.com> > > > > This patch would like to add the middle-end presentation for the > > saturation add. Aka set the result of add to the max when overflow. > > It will take the pattern similar as below. > > > > SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x)) > > > > Take uint8_t as example, we will have: > > > > * SAT_ADD (1, 254) => 255. > > * SAT_ADD (1, 255) => 255. > > * SAT_ADD (2, 255) => 255. > > * SAT_ADD (255, 255) => 255. > > > > Given below example for the unsigned scalar integer uint64_t: > > > > uint64_t sat_add_u64 (uint64_t x, uint64_t y) > > { > > return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x)); > > } > > > > Before this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > long unsigned int _1; > > _Bool _2; > > long unsigned int _3; > > long unsigned int _4; > > uint64_t _7; > > long unsigned int _10; > > __complex__ long unsigned int _11; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _11 = .ADD_OVERFLOW (x_5(D), y_6(D)); > > _1 = REALPART_EXPR <_11>; > > _10 = IMAGPART_EXPR <_11>; > > _2 = _10 != 0; > > _3 = (long unsigned int) _2; > > _4 = -_3; > > _7 = _1 | _4; > > return _7; > > ;; succ: EXIT > > > > } > > > > After this patch: > > uint64_t sat_add_uint64_t (uint64_t x, uint64_t y) > > { > > uint64_t _7; > > > > ;; basic block 2, loop depth 0 > > ;; pred: ENTRY > > _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call] > > return _7; > > ;; succ: EXIT > > } > > > > The below tests are passed for this patch: > > 1. The riscv fully regression tests. > > 3. The x86 bootstrap tests. > > 4. The x86 fully regression tests. > > > > PR target/51492 > > PR target/112600 > > > > gcc/ChangeLog: > > > > * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD > > to the return true switch case(s). > > * internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD. > > * match.pd: Add unsigned SAT_ADD match(es). > > * optabs.def (OPTAB_NL): Remove fixed-point limitation for > > us/ssadd. > > * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New > > extern func decl generated in match.pd match. > > (match_saturation_arith): New func impl to match the saturation arith. > > (math_opts_dom_walker::after_dom_children): Try match saturation > > arith when IOR expr. > > > > LGTM but you'll need an OK from Richard, > > Thanks for working on this! > > Tamar > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/internal-fn.cc | 1 + > > gcc/internal-fn.def | 2 ++ > > gcc/match.pd | 51 +++++++++++++++++++++++++++++++++++++++ > > gcc/optabs.def | 4 +-- > > gcc/tree-ssa-math-opts.cc | 32 ++++++++++++++++++++++++ > > 5 files changed, 88 insertions(+), 2 deletions(-) > > > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > > index 0a7053c2286..73045ca8c8c 100644 > > --- a/gcc/internal-fn.cc > > +++ b/gcc/internal-fn.cc > > @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) > > case IFN_UBSAN_CHECK_MUL: > > case IFN_ADD_OVERFLOW: > > case IFN_MUL_OVERFLOW: > > + case IFN_SAT_ADD: > > case IFN_VEC_WIDEN_PLUS: > > case IFN_VEC_WIDEN_PLUS_LO: > > case IFN_VEC_WIDEN_PLUS_HI: > > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > > index 848bb9dbff3..25badbb86e5 100644 > > --- a/gcc/internal-fn.def > > +++ b/gcc/internal-fn.def > > @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, > ECF_CONST > > | ECF_NOTHROW, first, > > DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, > > first, > > smulhrs, umulhrs, binary) > > > > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, > > binary) > > + > > DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) > > DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) > > DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 07e743ae464..0f9c34fa897 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > || POINTER_TYPE_P (itype)) > > && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) > > > > +/* Unsigned Saturation Add */ > > +(match (usadd_left_part_1 @0 @1) > > + (plus:c @0 @1) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_left_part_2 @0 @1) > > + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (lt (plus:c @0 @1) @0))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_1 @0 @1) > > + (negate (convert (gt @0 (plus:c @0 @1)))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +(match (usadd_right_part_2 @0 @1) > > + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) > > integer_zerop))) > > + (if (INTEGRAL_TYPE_P (type) > > + && TYPE_UNSIGNED (TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@0)) > > + && types_match (type, TREE_TYPE (@1))))) > > + > > +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 > > + because the sub part of left_part_2 cannot work with right_part_1. > > + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the > > + right_part_1 has nothing to do with .ADD_OVERFLOW. */ > > + > > +/* Unsigned saturation add, case 1 (branchless): > > + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or > > + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) > > + > > +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ > > +(match (unsigned_integer_sat_add @0 @1) > > + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) > > + > > /* x > y && x != XXX_MIN --> x > y > > x > y && x == XXX_MIN --> false . */ > > (for eqne (eq ne) > > diff --git a/gcc/optabs.def b/gcc/optabs.def > > index ad14f9328b9..3f2cb46aff8 100644 > > --- a/gcc/optabs.def > > +++ b/gcc/optabs.def > > @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") > > OPTAB_NX(add_optab, "add$Q$a3") > > OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) > > OPTAB_VX(addv_optab, "add$F$a3") > > -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', > > gen_signed_fixed_libfunc) > > +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', > > gen_unsigned_fixed_libfunc) > > OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', > gen_int_fp_fixed_libfunc) > > OPTAB_NX(sub_optab, "sub$F$a3") > > OPTAB_NX(sub_optab, "sub$Q$a3") > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > > index e8c804f09b7..62da1c5ee08 100644 > > --- a/gcc/tree-ssa-math-opts.cc > > +++ b/gcc/tree-ssa-math-opts.cc > > @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple > > *cast_stmt, gimple *&use_stmt, > > return 0; > > } > > > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); > > + > > +/* > > + * Try to match saturation arith pattern(s). > > + * 1. SAT_ADD (unsigned) > > + * _7 = _4 + _6; > > + * _8 = _4 > _7; > > + * _9 = (long unsigned int) _8; > > + * _10 = -_9; > > + * _12 = _7 | _10; > > + * => > > + * _12 = .SAT_ADD (_4, _6); */ > > +static void > > +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) > > +{ > > + gcall *call = NULL; > > + > > + tree ops[2]; > > + tree lhs = gimple_assign_lhs (stmt); > > + > > + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) > > + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), > > + OPTIMIZE_FOR_BOTH)) > > + { > > + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); > > + gimple_call_set_lhs (call, lhs); > > + gsi_replace (gsi, call, true); > > + } > > +} > > + > > /* Recognize for unsigned x > > x = y - z; > > if (x > y) > > @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children > > (basic_block bb) > > break; > > > > case BIT_IOR_EXPR: > > + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); > > + /* fall-through */ > > case BIT_XOR_EXPR: > > match_uaddc_usubc (&gsi, stmt, code); > > break; > > -- > > 2.34.1
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 0a7053c2286..73045ca8c8c 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) case IFN_UBSAN_CHECK_MUL: case IFN_ADD_OVERFLOW: case IFN_MUL_OVERFLOW: + case IFN_SAT_ADD: case IFN_VEC_WIDEN_PLUS: case IFN_VEC_WIDEN_PLUS_LO: case IFN_VEC_WIDEN_PLUS_HI: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 848bb9dbff3..25badbb86e5 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, smulhrs, umulhrs, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) + DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) diff --git a/gcc/match.pd b/gcc/match.pd index 07e743ae464..0f9c34fa897 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || POINTER_TYPE_P (itype)) && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) +/* Unsigned Saturation Add */ +(match (usadd_left_part_1 @0 @1) + (plus:c @0 @1) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_left_part_2 @0 @1) + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_right_part_1 @0 @1) + (negate (convert (lt (plus:c @0 @1) @0))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_right_part_1 @0 @1) + (negate (convert (gt @0 (plus:c @0 @1)))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_right_part_2 @0 @1) + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 + because the sub part of left_part_2 cannot work with right_part_1. + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the + right_part_1 has nothing to do with .ADD_OVERFLOW. */ + +/* Unsigned saturation add, case 1 (branchless): + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ +(match (unsigned_integer_sat_add @0 @1) + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) + +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ +(match (unsigned_integer_sat_add @0 @1) + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/optabs.def b/gcc/optabs.def index ad14f9328b9..3f2cb46aff8 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") OPTAB_NX(add_optab, "add$Q$a3") OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) OPTAB_VX(addv_optab, "add$F$a3") -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc) -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc) +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc) +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc) OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) OPTAB_NX(sub_optab, "sub$F$a3") OPTAB_NX(sub_optab, "sub$Q$a3") diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index e8c804f09b7..62da1c5ee08 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, return 0; } +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); + +/* + * Try to match saturation arith pattern(s). + * 1. SAT_ADD (unsigned) + * _7 = _4 + _6; + * _8 = _4 > _7; + * _9 = (long unsigned int) _8; + * _10 = -_9; + * _12 = _7 | _10; + * => + * _12 = .SAT_ADD (_4, _6); */ +static void +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) +{ + gcall *call = NULL; + + tree ops[2]; + tree lhs = gimple_assign_lhs (stmt); + + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), + OPTIMIZE_FOR_BOTH)) + { + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); + gimple_call_set_lhs (call, lhs); + gsi_replace (gsi, call, true); + } +} + /* Recognize for unsigned x x = y - z; if (x > y) @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; case BIT_IOR_EXPR: + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); + /* fall-through */ case BIT_XOR_EXPR: match_uaddc_usubc (&gsi, stmt, code); break;