diff mbox series

[v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

Message ID 20240528082937.3300351-1-pan2.li@intel.com
State New
Headers show
Series [v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int | expand

Commit Message

Li, Pan2 May 28, 2024, 8:29 a.m. UTC
From: Pan Li <pan2.li@intel.com>

This patch would like to add the middle-end presentation for the
saturation sub.  Aka set the result of add to the min when downflow.
It will take the pattern similar as below.

SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));

For example for uint8_t, we have

* SAT_SUB (255, 0)   => 255
* SAT_SUB (1, 2)     => 0
* SAT_SUB (254, 255) => 0
* SAT_SUB (0, 255)   => 0

Given below SAT_SUB for uint64

uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
{
  return (x + y) & (- (uint64_t)((x >= y)));
}

Before this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  _Bool _1;
  long unsigned int _3;
  uint64_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _1 = x_4(D) >= y_5(D);
  _3 = x_4(D) - y_5(D);
  _6 = _1 ? _3 : 0;
  return _6;
;;    succ:       EXIT
}

After this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
  return _6;
;;    succ:       EXIT
}

The below tests are running for this patch:
*. The riscv fully regression tests.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.

	PR target/51492
	PR target/112600

gcc/ChangeLog:

	* internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
	* match.pd: Add new match for SAT_SUB.
	* optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
	new decl for generated in match.pd.
	(build_saturation_binary_arith_call): Add new helper function
	to build the gimple call to binary SAT alu.
	(match_saturation_arith): Rename from.
	(match_unsigned_saturation_add): Rename to.
	(match_unsigned_saturation_sub): Add new func to match the
	unsigned sat sub.
	(math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
	try when COND_EXPR.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/internal-fn.def       |  1 +
 gcc/match.pd              | 14 ++++++++
 gcc/optabs.def            |  4 +--
 gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------
 4 files changed, 64 insertions(+), 22 deletions(-)

Comments

Li, Pan2 June 5, 2024, 1:19 a.m. UTC | #1
Kindly ping, almost the same but for subtract.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Tuesday, May 28, 2024 4:30 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com; richard.guenther@gmail.com; Li, Pan2 <pan2.li@intel.com>
Subject: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

From: Pan Li <pan2.li@intel.com>

This patch would like to add the middle-end presentation for the
saturation sub.  Aka set the result of add to the min when downflow.
It will take the pattern similar as below.

SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));

For example for uint8_t, we have

* SAT_SUB (255, 0)   => 255
* SAT_SUB (1, 2)     => 0
* SAT_SUB (254, 255) => 0
* SAT_SUB (0, 255)   => 0

Given below SAT_SUB for uint64

uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
{
  return (x + y) & (- (uint64_t)((x >= y)));
}

Before this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  _Bool _1;
  long unsigned int _3;
  uint64_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _1 = x_4(D) >= y_5(D);
  _3 = x_4(D) - y_5(D);
  _6 = _1 ? _3 : 0;
  return _6;
;;    succ:       EXIT
}

After this patch:
uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
  return _6;
;;    succ:       EXIT
}

The below tests are running for this patch:
*. The riscv fully regression tests.
*. The x86 bootstrap tests.
*. The x86 fully regression tests.

	PR target/51492
	PR target/112600

gcc/ChangeLog:

	* internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
	* match.pd: Add new match for SAT_SUB.
	* optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
	* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
	new decl for generated in match.pd.
	(build_saturation_binary_arith_call): Add new helper function
	to build the gimple call to binary SAT alu.
	(match_saturation_arith): Rename from.
	(match_unsigned_saturation_add): Rename to.
	(match_unsigned_saturation_sub): Add new func to match the
	unsigned sat sub.
	(math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
	try when COND_EXPR.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/internal-fn.def       |  1 +
 gcc/match.pd              | 14 ++++++++
 gcc/optabs.def            |  4 +--
 gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------
 4 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 25badbb86e5..24539716e5b 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
 			      smulhrs, umulhrs, binary)
 
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
 
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 024e3350465..3e334533ff8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+/* Unsigned saturation sub, case 1 (branch with gt):
+   SAT_U_SUB = X > Y ? X - Y : 0  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (gt @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 2 (branch with ge):
+   SAT_U_SUB = X >= Y ? X - Y : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (ge @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 3f2cb46aff8..bc2611abdc2 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
 OPTAB_NX(sub_optab, "sub$Q$a3")
 OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
 OPTAB_VX(subv_optab, "sub$F$a3")
-OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
-OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
+OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
+OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
 OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc)
 OPTAB_NX(smul_optab, "mul$P$a3")
 OPTAB_NX(smul_optab, "mul$F$a3")
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 62da1c5ee08..4717302b728 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
 }
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+
+static void
+build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
+				    tree lhs, tree op_0, tree op_1)
+{
+  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
+    {
+      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
+      gimple_call_set_lhs (call, lhs);
+      gsi_replace (gsi, call, /* update_eh_info */ true);
+    }
+}
 
 /*
- * Try to match saturation arith pattern(s).
- *   1. SAT_ADD (unsigned)
- *      _7 = _4 + _6;
- *      _8 = _4 > _7;
- *      _9 = (long unsigned int) _8;
- *      _10 = -_9;
- *      _12 = _7 | _10;
- *      =>
- *      _12 = .SAT_ADD (_4, _6);  */
+ * Try to match saturation unsigned add.
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *   =>
+ *   _12 = .SAT_ADD (_4, _6);  */
+
 static void
-match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
 {
-  gcall *call = NULL;
+  tree ops[2];
+  tree lhs = gimple_assign_lhs (stmt);
 
+  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
+    build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]);
+}
+
+/*
+ * Try to match saturation unsigned sub.
+ *   _1 = _4 >= _5;
+ *   _3 = _4 - _5;
+ *   _6 = _1 ? _3 : 0;
+ *   =>
+ *   _6 = .SAT_SUB (_4, _5);  */
+
+static void
+match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
+{
   tree ops[2];
   tree lhs = gimple_assign_lhs (stmt);
 
-  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
-      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
-					 OPTIMIZE_FOR_BOTH))
-    {
-      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
-      gimple_call_set_lhs (call, lhs);
-      gsi_replace (gsi, call, true);
-    }
+  if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
+    build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
 }
 
 /* Recognize for unsigned x
@@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 	      break;
 
 	    case BIT_IOR_EXPR:
-	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+	      match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
 	      /* fall-through  */
 	    case BIT_XOR_EXPR:
 	      match_uaddc_usubc (&gsi, stmt, code);
@@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 	      match_single_bit_test (&gsi, stmt);
 	      break;
 
+	    case COND_EXPR:
+	      match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
+	      break;
+
 	    default:;
 	    }
 	}
Richard Biener June 5, 2024, 7:19 a.m. UTC | #2
On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to add the middle-end presentation for the
> saturation sub.  Aka set the result of add to the min when downflow.
> It will take the pattern similar as below.
>
> SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));
>
> For example for uint8_t, we have
>
> * SAT_SUB (255, 0)   => 255
> * SAT_SUB (1, 2)     => 0
> * SAT_SUB (254, 255) => 0
> * SAT_SUB (0, 255)   => 0
>
> Given below SAT_SUB for uint64
>
> uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) & (- (uint64_t)((x >= y)));
> }
>
> Before this patch:
> uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> {
>   _Bool _1;
>   long unsigned int _3;
>   uint64_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _1 = x_4(D) >= y_5(D);
>   _3 = x_4(D) - y_5(D);
>   _6 = _1 ? _3 : 0;
>   return _6;
> ;;    succ:       EXIT
> }
>
> After this patch:
> uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
>   return _6;
> ;;    succ:       EXIT
> }
>
> The below tests are running for this patch:
> *. The riscv fully regression tests.
> *. The x86 bootstrap tests.
> *. The x86 fully regression tests.

OK.

Thanks,
Richard.

>         PR target/51492
>         PR target/112600
>
> gcc/ChangeLog:
>
>         * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
>         * match.pd: Add new match for SAT_SUB.
>         * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
>         * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
>         new decl for generated in match.pd.
>         (build_saturation_binary_arith_call): Add new helper function
>         to build the gimple call to binary SAT alu.
>         (match_saturation_arith): Rename from.
>         (match_unsigned_saturation_add): Rename to.
>         (match_unsigned_saturation_sub): Add new func to match the
>         unsigned sat sub.
>         (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
>         try when COND_EXPR.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.def       |  1 +
>  gcc/match.pd              | 14 ++++++++
>  gcc/optabs.def            |  4 +--
>  gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------
>  4 files changed, 64 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 25badbb86e5..24539716e5b 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>                               smulhrs, umulhrs, binary)
>
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
>
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 024e3350465..3e334533ff8 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +/* Unsigned saturation sub, case 1 (branch with gt):
> +   SAT_U_SUB = X > Y ? X - Y : 0  */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 2 (branch with ge):
> +   SAT_U_SUB = X >= Y ? X - Y : 0.  */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 3f2cb46aff8..bc2611abdc2 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
>  OPTAB_NX(sub_optab, "sub$Q$a3")
>  OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
>  OPTAB_VX(subv_optab, "sub$F$a3")
> -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
> +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
>  OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc)
>  OPTAB_NX(smul_optab, "mul$P$a3")
>  OPTAB_NX(smul_optab, "mul$F$a3")
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 62da1c5ee08..4717302b728 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
>  }
>
>  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> +
> +static void
> +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> +                                   tree lhs, tree op_0, tree op_1)
> +{
> +  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, /* update_eh_info */ true);
> +    }
> +}
>
>  /*
> - * Try to match saturation arith pattern(s).
> - *   1. SAT_ADD (unsigned)
> - *      _7 = _4 + _6;
> - *      _8 = _4 > _7;
> - *      _9 = (long unsigned int) _8;
> - *      _10 = -_9;
> - *      _12 = _7 | _10;
> - *      =>
> - *      _12 = .SAT_ADD (_4, _6);  */
> + * Try to match saturation unsigned add.
> + *   _7 = _4 + _6;
> + *   _8 = _4 > _7;
> + *   _9 = (long unsigned int) _8;
> + *   _10 = -_9;
> + *   _12 = _7 | _10;
> + *   =>
> + *   _12 = .SAT_ADD (_4, _6);  */
> +
>  static void
> -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>  {
> -  gcall *call = NULL;
> +  tree ops[2];
> +  tree lhs = gimple_assign_lhs (stmt);
>
> +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
> +    build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]);
> +}
> +
> +/*
> + * Try to match saturation unsigned sub.
> + *   _1 = _4 >= _5;
> + *   _3 = _4 - _5;
> + *   _6 = _1 ? _3 : 0;
> + *   =>
> + *   _6 = .SAT_SUB (_4, _5);  */
> +
> +static void
> +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
>    tree ops[2];
>    tree lhs = gimple_assign_lhs (stmt);
>
> -  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> -      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> -                                        OPTIMIZE_FOR_BOTH))
> -    {
> -      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> -      gimple_call_set_lhs (call, lhs);
> -      gsi_replace (gsi, call, true);
> -    }
> +  if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
> +    build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
>  }
>
>  /* Recognize for unsigned x
> @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               break;
>
>             case BIT_IOR_EXPR:
> -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +             match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               match_single_bit_test (&gsi, stmt);
>               break;
>
> +           case COND_EXPR:
> +             match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> +             break;
> +
>             default:;
>             }
>         }
> --
> 2.34.1
>
Li, Pan2 June 5, 2024, 7:38 a.m. UTC | #3
Thanks Richard, will commit after the rebased pass the regression test.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Wednesday, June 5, 2024 3:19 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to add the middle-end presentation for the
> saturation sub.  Aka set the result of add to the min when downflow.
> It will take the pattern similar as below.
>
> SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));
>
> For example for uint8_t, we have
>
> * SAT_SUB (255, 0)   => 255
> * SAT_SUB (1, 2)     => 0
> * SAT_SUB (254, 255) => 0
> * SAT_SUB (0, 255)   => 0
>
> Given below SAT_SUB for uint64
>
> uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) & (- (uint64_t)((x >= y)));
> }
>
> Before this patch:
> uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> {
>   _Bool _1;
>   long unsigned int _3;
>   uint64_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _1 = x_4(D) >= y_5(D);
>   _3 = x_4(D) - y_5(D);
>   _6 = _1 ? _3 : 0;
>   return _6;
> ;;    succ:       EXIT
> }
>
> After this patch:
> uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _6;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
>   return _6;
> ;;    succ:       EXIT
> }
>
> The below tests are running for this patch:
> *. The riscv fully regression tests.
> *. The x86 bootstrap tests.
> *. The x86 fully regression tests.

OK.

Thanks,
Richard.

>         PR target/51492
>         PR target/112600
>
> gcc/ChangeLog:
>
>         * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
>         * match.pd: Add new match for SAT_SUB.
>         * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
>         * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
>         new decl for generated in match.pd.
>         (build_saturation_binary_arith_call): Add new helper function
>         to build the gimple call to binary SAT alu.
>         (match_saturation_arith): Rename from.
>         (match_unsigned_saturation_add): Rename to.
>         (match_unsigned_saturation_sub): Add new func to match the
>         unsigned sat sub.
>         (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
>         try when COND_EXPR.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.def       |  1 +
>  gcc/match.pd              | 14 ++++++++
>  gcc/optabs.def            |  4 +--
>  gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------
>  4 files changed, 64 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 25badbb86e5..24539716e5b 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>                               smulhrs, umulhrs, binary)
>
>  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
>
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 024e3350465..3e334533ff8 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (match (unsigned_integer_sat_add @0 @1)
>   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
>
> +/* Unsigned saturation sub, case 1 (branch with gt):
> +   SAT_U_SUB = X > Y ? X - Y : 0  */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
> +/* Unsigned saturation sub, case 2 (branch with ge):
> +   SAT_U_SUB = X >= Y ? X - Y : 0.  */
> +(match (unsigned_integer_sat_sub @0 @1)
> + (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (type, @0, @1))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 3f2cb46aff8..bc2611abdc2 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
>  OPTAB_NX(sub_optab, "sub$Q$a3")
>  OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
>  OPTAB_VX(subv_optab, "sub$F$a3")
> -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
> +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
>  OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc)
>  OPTAB_NX(smul_optab, "mul$P$a3")
>  OPTAB_NX(smul_optab, "mul$F$a3")
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 62da1c5ee08..4717302b728 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
>  }
>
>  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> +
> +static void
> +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> +                                   tree lhs, tree op_0, tree op_1)
> +{
> +  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
> +    {
> +      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, /* update_eh_info */ true);
> +    }
> +}
>
>  /*
> - * Try to match saturation arith pattern(s).
> - *   1. SAT_ADD (unsigned)
> - *      _7 = _4 + _6;
> - *      _8 = _4 > _7;
> - *      _9 = (long unsigned int) _8;
> - *      _10 = -_9;
> - *      _12 = _7 | _10;
> - *      =>
> - *      _12 = .SAT_ADD (_4, _6);  */
> + * Try to match saturation unsigned add.
> + *   _7 = _4 + _6;
> + *   _8 = _4 > _7;
> + *   _9 = (long unsigned int) _8;
> + *   _10 = -_9;
> + *   _12 = _7 | _10;
> + *   =>
> + *   _12 = .SAT_ADD (_4, _6);  */
> +
>  static void
> -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
>  {
> -  gcall *call = NULL;
> +  tree ops[2];
> +  tree lhs = gimple_assign_lhs (stmt);
>
> +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
> +    build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]);
> +}
> +
> +/*
> + * Try to match saturation unsigned sub.
> + *   _1 = _4 >= _5;
> + *   _3 = _4 - _5;
> + *   _6 = _1 ? _3 : 0;
> + *   =>
> + *   _6 = .SAT_SUB (_4, _5);  */
> +
> +static void
> +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> +{
>    tree ops[2];
>    tree lhs = gimple_assign_lhs (stmt);
>
> -  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> -      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> -                                        OPTIMIZE_FOR_BOTH))
> -    {
> -      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> -      gimple_call_set_lhs (call, lhs);
> -      gsi_replace (gsi, call, true);
> -    }
> +  if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
> +    build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
>  }
>
>  /* Recognize for unsigned x
> @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               break;
>
>             case BIT_IOR_EXPR:
> -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> +             match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
>               /* fall-through  */
>             case BIT_XOR_EXPR:
>               match_uaddc_usubc (&gsi, stmt, code);
> @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
>               match_single_bit_test (&gsi, stmt);
>               break;
>
> +           case COND_EXPR:
> +             match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> +             break;
> +
>             default:;
>             }
>         }
> --
> 2.34.1
>
Uros Bizjak June 5, 2024, 8:09 a.m. UTC | #4
On Wed, Jun 5, 2024 at 9:38 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard, will commit after the rebased pass the regression test.
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Wednesday, June 5, 2024 3:19 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
> Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int
>
> On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to add the middle-end presentation for the
> > saturation sub.  Aka set the result of add to the min when downflow.
> > It will take the pattern similar as below.
> >
> > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));
> >
> > For example for uint8_t, we have
> >
> > * SAT_SUB (255, 0)   => 255
> > * SAT_SUB (1, 2)     => 0
> > * SAT_SUB (254, 255) => 0
> > * SAT_SUB (0, 255)   => 0
> >
> > Given below SAT_SUB for uint64
> >
> > uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
> > {
> >   return (x + y) & (- (uint64_t)((x >= y)));
> > }

Is the above testcase correct? You need "(x + y)" as the first term.

BTW: After applying your patch, I'm not able to produce .SAT_SUB with
x86_64 and the following testcase:

--cut here--
typedef unsigned short T;

void foo (T *out, T *x, T *y, int n)
{
  int i;

  for (i = 0; i < n; i++)
    out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i]));
}
--cut here--

with gcc -O2 -ftree-vectorize -msse2

I think that all relevant optabs were added for x86 in

https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b59de4113262f2bee14147eb17eb3592f03d9556

as part of the commit for PR112600, comment 8.

Uros.

> >
> > Before this patch:
> > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> > {
> >   _Bool _1;
> >   long unsigned int _3;
> >   uint64_t _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _1 = x_4(D) >= y_5(D);
> >   _3 = x_4(D) - y_5(D);
> >   _6 = _1 ? _3 : 0;
> >   return _6;
> > ;;    succ:       EXIT
> > }
> >
> > After this patch:
> > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> > {
> >   uint64_t _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> >   return _6;
> > ;;    succ:       EXIT
> > }
> >
> > The below tests are running for this patch:
> > *. The riscv fully regression tests.
> > *. The x86 bootstrap tests.
> > *. The x86 fully regression tests.
>
> OK.
>
> Thanks,
> Richard.
>
> >         PR target/51492
> >         PR target/112600
> >
> > gcc/ChangeLog:
> >
> >         * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
> >         * match.pd: Add new match for SAT_SUB.
> >         * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
> >         * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
> >         new decl for generated in match.pd.
> >         (build_saturation_binary_arith_call): Add new helper function
> >         to build the gimple call to binary SAT alu.
> >         (match_saturation_arith): Rename from.
> >         (match_unsigned_saturation_add): Rename to.
> >         (match_unsigned_saturation_sub): Add new func to match the
> >         unsigned sat sub.
> >         (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
> >         try when COND_EXPR.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/internal-fn.def       |  1 +
> >  gcc/match.pd              | 14 ++++++++
> >  gcc/optabs.def            |  4 +--
> >  gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------
> >  4 files changed, 64 insertions(+), 22 deletions(-)
> >
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 25badbb86e5..24539716e5b 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
> >                               smulhrs, umulhrs, binary)
> >
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
> >
> >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 024e3350465..3e334533ff8 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  (match (unsigned_integer_sat_add @0 @1)
> >   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> >
> > +/* Unsigned saturation sub, case 1 (branch with gt):
> > +   SAT_U_SUB = X > Y ? X - Y : 0  */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 2 (branch with ge):
> > +   SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 3f2cb46aff8..bc2611abdc2 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
> >  OPTAB_NX(sub_optab, "sub$Q$a3")
> >  OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
> >  OPTAB_VX(subv_optab, "sub$F$a3")
> > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
> > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
> >  OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc)
> >  OPTAB_NX(smul_optab, "mul$P$a3")
> >  OPTAB_NX(smul_optab, "mul$F$a3")
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 62da1c5ee08..4717302b728 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
> >  }
> >
> >  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> > +
> > +static void
> > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> > +                                   tree lhs, tree op_0, tree op_1)
> > +{
> > +  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
> > +    {
> > +      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
> > +      gimple_call_set_lhs (call, lhs);
> > +      gsi_replace (gsi, call, /* update_eh_info */ true);
> > +    }
> > +}
> >
> >  /*
> > - * Try to match saturation arith pattern(s).
> > - *   1. SAT_ADD (unsigned)
> > - *      _7 = _4 + _6;
> > - *      _8 = _4 > _7;
> > - *      _9 = (long unsigned int) _8;
> > - *      _10 = -_9;
> > - *      _12 = _7 | _10;
> > - *      =>
> > - *      _12 = .SAT_ADD (_4, _6);  */
> > + * Try to match saturation unsigned add.
> > + *   _7 = _4 + _6;
> > + *   _8 = _4 > _7;
> > + *   _9 = (long unsigned int) _8;
> > + *   _10 = -_9;
> > + *   _12 = _7 | _10;
> > + *   =>
> > + *   _12 = .SAT_ADD (_4, _6);  */
> > +
> >  static void
> > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >  {
> > -  gcall *call = NULL;
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> >
> > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
> > +    build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]);
> > +}
> > +
> > +/*
> > + * Try to match saturation unsigned sub.
> > + *   _1 = _4 >= _5;
> > + *   _3 = _4 - _5;
> > + *   _6 = _1 ? _3 : 0;
> > + *   =>
> > + *   _6 = .SAT_SUB (_4, _5);  */
> > +
> > +static void
> > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> > +{
> >    tree ops[2];
> >    tree lhs = gimple_assign_lhs (stmt);
> >
> > -  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > -      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > -                                        OPTIMIZE_FOR_BOTH))
> > -    {
> > -      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > -      gimple_call_set_lhs (call, lhs);
> > -      gsi_replace (gsi, call, true);
> > -    }
> > +  if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
> > +    build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> >  }
> >
> >  /* Recognize for unsigned x
> > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >               break;
> >
> >             case BIT_IOR_EXPR:
> > -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +             match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
> >               /* fall-through  */
> >             case BIT_XOR_EXPR:
> >               match_uaddc_usubc (&gsi, stmt, code);
> > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >               match_single_bit_test (&gsi, stmt);
> >               break;
> >
> > +           case COND_EXPR:
> > +             match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > +             break;
> > +
> >             default:;
> >             }
> >         }
> > --
> > 2.34.1
> >
Li, Pan2 June 5, 2024, 8:22 a.m. UTC | #5
> Is the above testcase correct? You need "(x + y)" as the first term.

Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference.

+#define DEF_SAT_U_SUB_FMT_1(T)     \
+T __attribute__((noinline))        \
+sat_u_sub_##T##_fmt_1 (T x, T y)   \
+{                                  \
+  return (x - y) & (-(T)(x >= y)); \
+}
+
+#define DEF_SAT_U_SUB_FMT_2(T)    \
+T __attribute__((noinline))       \
+sat_u_sub_##T##_fmt_2 (T x, T y)  \
+{                                 \
+  return (x - y) & (-(T)(x > y)); \
+}

> BTW: After applying your patch, I'm not able to produce .SAT_SUB with
> x86_64 and the following testcase:

You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part.
Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int?
Of course, I will have a try later as in the middle of sth.

https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html

Pan


-----Original Message-----
From: Uros Bizjak <ubizjak@gmail.com> 
Sent: Wednesday, June 5, 2024 4:09 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

On Wed, Jun 5, 2024 at 9:38 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard, will commit after the rebased pass the regression test.
>
> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Wednesday, June 5, 2024 3:19 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
> Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int
>
> On Tue, May 28, 2024 at 10:29 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > This patch would like to add the middle-end presentation for the
> > saturation sub.  Aka set the result of add to the min when downflow.
> > It will take the pattern similar as below.
> >
> > SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y));
> >
> > For example for uint8_t, we have
> >
> > * SAT_SUB (255, 0)   => 255
> > * SAT_SUB (1, 2)     => 0
> > * SAT_SUB (254, 255) => 0
> > * SAT_SUB (0, 255)   => 0
> >
> > Given below SAT_SUB for uint64
> >
> > uint64_t sat_sub_u64 (uint64_t x, uint64_t y)
> > {
> >   return (x + y) & (- (uint64_t)((x >= y)));
> > }

Is the above testcase correct? You need "(x + y)" as the first term.

BTW: After applying your patch, I'm not able to produce .SAT_SUB with
x86_64 and the following testcase:

--cut here--
typedef unsigned short T;

void foo (T *out, T *x, T *y, int n)
{
  int i;

  for (i = 0; i < n; i++)
    out[i] = (x[i] - y[i]) & (-(T)(x[i] >= y[i]));
}
--cut here--

with gcc -O2 -ftree-vectorize -msse2

I think that all relevant optabs were added for x86 in

https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b59de4113262f2bee14147eb17eb3592f03d9556

as part of the commit for PR112600, comment 8.

Uros.

> >
> > Before this patch:
> > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> > {
> >   _Bool _1;
> >   long unsigned int _3;
> >   uint64_t _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _1 = x_4(D) >= y_5(D);
> >   _3 = x_4(D) - y_5(D);
> >   _6 = _1 ? _3 : 0;
> >   return _6;
> > ;;    succ:       EXIT
> > }
> >
> > After this patch:
> > uint64_t sat_sub_u_0_uint64_t (uint64_t x, uint64_t y)
> > {
> >   uint64_t _6;
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >   _6 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
> >   return _6;
> > ;;    succ:       EXIT
> > }
> >
> > The below tests are running for this patch:
> > *. The riscv fully regression tests.
> > *. The x86 bootstrap tests.
> > *. The x86 fully regression tests.
>
> OK.
>
> Thanks,
> Richard.
>
> >         PR target/51492
> >         PR target/112600
> >
> > gcc/ChangeLog:
> >
> >         * internal-fn.def (SAT_SUB): Add new IFN define for SAT_SUB.
> >         * match.pd: Add new match for SAT_SUB.
> >         * optabs.def (OPTAB_NL): Remove fixed-point for ussub/ssub.
> >         * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_sub): Add
> >         new decl for generated in match.pd.
> >         (build_saturation_binary_arith_call): Add new helper function
> >         to build the gimple call to binary SAT alu.
> >         (match_saturation_arith): Rename from.
> >         (match_unsigned_saturation_add): Rename to.
> >         (match_unsigned_saturation_sub): Add new func to match the
> >         unsigned sat sub.
> >         (math_opts_dom_walker::after_dom_children): Add SAT_SUB matching
> >         try when COND_EXPR.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/internal-fn.def       |  1 +
> >  gcc/match.pd              | 14 ++++++++
> >  gcc/optabs.def            |  4 +--
> >  gcc/tree-ssa-math-opts.cc | 67 +++++++++++++++++++++++++++------------
> >  4 files changed, 64 insertions(+), 22 deletions(-)
> >
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 25badbb86e5..24539716e5b 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -276,6 +276,7 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
> >                               smulhrs, umulhrs, binary)
> >
> >  DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
> > +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
> >
> >  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
> >  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 024e3350465..3e334533ff8 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3086,6 +3086,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  (match (unsigned_integer_sat_add @0 @1)
> >   (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
> >
> > +/* Unsigned saturation sub, case 1 (branch with gt):
> > +   SAT_U_SUB = X > Y ? X - Y : 0  */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond (gt @0 @1) (minus @0 @1) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> > +/* Unsigned saturation sub, case 2 (branch with ge):
> > +   SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > +(match (unsigned_integer_sat_sub @0 @1)
> > + (cond (ge @0 @1) (minus @0 @1) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (type, @0, @1))))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 3f2cb46aff8..bc2611abdc2 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -118,8 +118,8 @@ OPTAB_NX(sub_optab, "sub$F$a3")
> >  OPTAB_NX(sub_optab, "sub$Q$a3")
> >  OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
> >  OPTAB_VX(subv_optab, "sub$F$a3")
> > -OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> > -OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
> > +OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
> > +OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
> >  OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc)
> >  OPTAB_NX(smul_optab, "mul$P$a3")
> >  OPTAB_NX(smul_optab, "mul$F$a3")
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index 62da1c5ee08..4717302b728 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -4087,33 +4087,56 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
> >  }
> >
> >  extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
> > +extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
> > +
> > +static void
> > +build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
> > +                                   tree lhs, tree op_0, tree op_1)
> > +{
> > +  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
> > +    {
> > +      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
> > +      gimple_call_set_lhs (call, lhs);
> > +      gsi_replace (gsi, call, /* update_eh_info */ true);
> > +    }
> > +}
> >
> >  /*
> > - * Try to match saturation arith pattern(s).
> > - *   1. SAT_ADD (unsigned)
> > - *      _7 = _4 + _6;
> > - *      _8 = _4 > _7;
> > - *      _9 = (long unsigned int) _8;
> > - *      _10 = -_9;
> > - *      _12 = _7 | _10;
> > - *      =>
> > - *      _12 = .SAT_ADD (_4, _6);  */
> > + * Try to match saturation unsigned add.
> > + *   _7 = _4 + _6;
> > + *   _8 = _4 > _7;
> > + *   _9 = (long unsigned int) _8;
> > + *   _10 = -_9;
> > + *   _12 = _7 | _10;
> > + *   =>
> > + *   _12 = .SAT_ADD (_4, _6);  */
> > +
> >  static void
> > -match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
> > +match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
> >  {
> > -  gcall *call = NULL;
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> >
> > +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
> > +    build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]);
> > +}
> > +
> > +/*
> > + * Try to match saturation unsigned sub.
> > + *   _1 = _4 >= _5;
> > + *   _3 = _4 - _5;
> > + *   _6 = _1 ? _3 : 0;
> > + *   =>
> > + *   _6 = .SAT_SUB (_4, _5);  */
> > +
> > +static void
> > +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
> > +{
> >    tree ops[2];
> >    tree lhs = gimple_assign_lhs (stmt);
> >
> > -  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> > -      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> > -                                        OPTIMIZE_FOR_BOTH))
> > -    {
> > -      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
> > -      gimple_call_set_lhs (call, lhs);
> > -      gsi_replace (gsi, call, true);
> > -    }
> > +  if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
> > +    build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
> >  }
> >
> >  /* Recognize for unsigned x
> > @@ -6078,7 +6101,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >               break;
> >
> >             case BIT_IOR_EXPR:
> > -             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
> > +             match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
> >               /* fall-through  */
> >             case BIT_XOR_EXPR:
> >               match_uaddc_usubc (&gsi, stmt, code);
> > @@ -6089,6 +6112,10 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
> >               match_single_bit_test (&gsi, stmt);
> >               break;
> >
> > +           case COND_EXPR:
> > +             match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
> > +             break;
> > +
> >             default:;
> >             }
> >         }
> > --
> > 2.34.1
> >
Uros Bizjak June 5, 2024, 8:29 a.m. UTC | #6
On Wed, Jun 5, 2024 at 10:22 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > Is the above testcase correct? You need "(x + y)" as the first term.
>
> Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference.
>
> +#define DEF_SAT_U_SUB_FMT_1(T)     \
> +T __attribute__((noinline))        \
> +sat_u_sub_##T##_fmt_1 (T x, T y)   \
> +{                                  \
> +  return (x - y) & (-(T)(x >= y)); \
> +}
> +
> +#define DEF_SAT_U_SUB_FMT_2(T)    \
> +T __attribute__((noinline))       \
> +sat_u_sub_##T##_fmt_2 (T x, T y)  \
> +{                                 \
> +  return (x - y) & (-(T)(x > y)); \
> +}
>
> > BTW: After applying your patch, I'm not able to produce .SAT_SUB with
> > x86_64 and the following testcase:
>
> You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part.
> Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int?
> Of course, I will have a try later as in the middle of sth.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html

I see. x86 doesn't have scalar saturating instructions, so the scalar
version indeed can't be converted.

I will amend x86 testcases after the vector part of your patch is committed.

Thanks,
Uros.
Li, Pan2 June 5, 2024, 8:38 a.m. UTC | #7
> I see. x86 doesn't have scalar saturating instructions, so the scalar
> version indeed can't be converted.

> I will amend x86 testcases after the vector part of your patch is committed.

Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below.

.SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here

It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here.

Pan

-----Original Message-----
From: Uros Bizjak <ubizjak@gmail.com> 
Sent: Wednesday, June 5, 2024 4:30 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

On Wed, Jun 5, 2024 at 10:22 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > Is the above testcase correct? You need "(x + y)" as the first term.
>
> Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference.
>
> +#define DEF_SAT_U_SUB_FMT_1(T)     \
> +T __attribute__((noinline))        \
> +sat_u_sub_##T##_fmt_1 (T x, T y)   \
> +{                                  \
> +  return (x - y) & (-(T)(x >= y)); \
> +}
> +
> +#define DEF_SAT_U_SUB_FMT_2(T)    \
> +T __attribute__((noinline))       \
> +sat_u_sub_##T##_fmt_2 (T x, T y)  \
> +{                                 \
> +  return (x - y) & (-(T)(x > y)); \
> +}
>
> > BTW: After applying your patch, I'm not able to produce .SAT_SUB with
> > x86_64 and the following testcase:
>
> You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part.
> Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int?
> Of course, I will have a try later as in the middle of sth.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html

I see. x86 doesn't have scalar saturating instructions, so the scalar
version indeed can't be converted.

I will amend x86 testcases after the vector part of your patch is committed.

Thanks,
Uros.
Li, Pan2 June 5, 2024, 8:40 a.m. UTC | #8
Committed with the example in commit log updated, thanks all.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Wednesday, June 5, 2024 4:38 PM
To: Uros Bizjak <ubizjak@gmail.com>
Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: RE: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

> I see. x86 doesn't have scalar saturating instructions, so the scalar
> version indeed can't be converted.

> I will amend x86 testcases after the vector part of your patch is committed.

Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below.

.SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here

It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here.

Pan

-----Original Message-----
From: Uros Bizjak <ubizjak@gmail.com> 
Sent: Wednesday, June 5, 2024 4:30 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

On Wed, Jun 5, 2024 at 10:22 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > Is the above testcase correct? You need "(x + y)" as the first term.
>
> Thanks for comments, should be copy issue here, you can take SAT_SUB (x, y) => (x - y) & (-(TYPE)(x >= y)) or below template for reference.
>
> +#define DEF_SAT_U_SUB_FMT_1(T)     \
> +T __attribute__((noinline))        \
> +sat_u_sub_##T##_fmt_1 (T x, T y)   \
> +{                                  \
> +  return (x - y) & (-(T)(x >= y)); \
> +}
> +
> +#define DEF_SAT_U_SUB_FMT_2(T)    \
> +T __attribute__((noinline))       \
> +sat_u_sub_##T##_fmt_2 (T x, T y)  \
> +{                                 \
> +  return (x - y) & (-(T)(x > y)); \
> +}
>
> > BTW: After applying your patch, I'm not able to produce .SAT_SUB with
> > x86_64 and the following testcase:
>
> You mean vectorize part? This patch is only for unsigned scalar int (see title) and the below is the vect part.
> Could you please help to double confirm if you cannot see .SAT_SUB after widen_mul pass in x86 for unsigned scalar int?
> Of course, I will have a try later as in the middle of sth.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2024-May/653024.html

I see. x86 doesn't have scalar saturating instructions, so the scalar
version indeed can't be converted.

I will amend x86 testcases after the vector part of your patch is committed.

Thanks,
Uros.
Uros Bizjak June 5, 2024, 8:45 a.m. UTC | #9
On Wed, Jun 5, 2024 at 10:38 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > I see. x86 doesn't have scalar saturating instructions, so the scalar
> > version indeed can't be converted.
>
> > I will amend x86 testcases after the vector part of your patch is committed.
>
> Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below.
>
> .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here
>
> It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here.

x86 will emit cmove in the above case:

       movl    %edi, %eax
       xorl    %edx, %edx
       subl    %esi, %eax
       cmpl    %edi, %esi
       cmovnb  %edx, %eax

Maybe we can reuse flags from the subtraction here to avoid the compare.

Uros.
Li, Pan2 June 5, 2024, 8:52 a.m. UTC | #10
Thanks for explaining. I see, cmove is well designed for such cases.

Pan

-----Original Message-----
From: Uros Bizjak <ubizjak@gmail.com> 
Sent: Wednesday, June 5, 2024 4:46 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int

On Wed, Jun 5, 2024 at 10:38 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> > I see. x86 doesn't have scalar saturating instructions, so the scalar
> > version indeed can't be converted.
>
> > I will amend x86 testcases after the vector part of your patch is committed.
>
> Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below.
>
> .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here
>
> It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here.

x86 will emit cmove in the above case:

       movl    %edi, %eax
       xorl    %edx, %edx
       subl    %esi, %eax
       cmpl    %edi, %esi
       cmovnb  %edx, %eax

Maybe we can reuse flags from the subtraction here to avoid the compare.

Uros.
Uros Bizjak June 5, 2024, 9:11 a.m. UTC | #11
On Wed, Jun 5, 2024 at 10:52 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks for explaining. I see, cmove is well designed for such cases.

If the question is if it is worth it to convert using
__builtin_sub_overflow here if the target doesn't provide scalar
saturating optab, I think the answer is yes. For x86, the compare will
be eliminated.

Please consider this testcase:

--cut here--
unsigned int
__attribute__((noinline))
foo (unsigned int x, unsigned int y)
{
  return x > y ? x - y : 0;
}

unsigned int
__attribute__((noinline))
bar (unsigned int x, unsigned int y)
{
  unsigned int z;

  return __builtin_sub_overflow (x, y, &z) ? 0 : z;
}
--cut here--

This will compile to:

0000000000000000 <foo>:
  0:   89 f8                   mov    %edi,%eax
  2:   31 d2                   xor    %edx,%edx
  4:   29 f0                   sub    %esi,%eax
  6:   39 fe                   cmp    %edi,%esi
  8:   0f 43 c2                cmovae %edx,%eax
  b:   c3                      ret
  c:   0f 1f 40 00             nopl   0x0(%rax)

0000000000000010 <bar>:
 10:   29 f7                   sub    %esi,%edi
 12:   72 03                   jb     17 <bar+0x7>
 14:   89 f8                   mov    %edi,%eax
 16:   c3                      ret
 17:   31 c0                   xor    %eax,%eax
 19:   c3                      ret

Please note that the compare was eliminated in the later test. So, if
the target does not provide saturated optab but provides
__builtin_sub_overflow, I think it is worth emitting .SAT_SUB via
__builtin_sub_overflow (and in similar way for saturated add).

Uros.


>
> Pan
>
> -----Original Message-----
> From: Uros Bizjak <ubizjak@gmail.com>
> Sent: Wednesday, June 5, 2024 4:46 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: Richard Biener <richard.guenther@gmail.com>; gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com
> Subject: Re: [PATCH v1] Internal-fn: Support new IFN SAT_SUB for unsigned scalar int
>
> On Wed, Jun 5, 2024 at 10:38 AM Li, Pan2 <pan2.li@intel.com> wrote:
> >
> > > I see. x86 doesn't have scalar saturating instructions, so the scalar
> > > version indeed can't be converted.
> >
> > > I will amend x86 testcases after the vector part of your patch is committed.
> >
> > Thanks for the confirmation. Just curious, the .SAT_SUB for scalar has sorts of forms, like a branch version as below.
> >
> > .SAT_SUB (x, y) = x > y ? x - y : 0. // or leverage __builtin_sub_overflow here
> >
> > It is reasonable to implement the scalar .SAT_SUB for x86? Given somehow we can eliminate the branch here.
>
> x86 will emit cmove in the above case:
>
>        movl    %edi, %eax
>        xorl    %edx, %edx
>        subl    %esi, %eax
>        cmpl    %edi, %esi
>        cmovnb  %edx, %eax
>
> Maybe we can reuse flags from the subtraction here to avoid the compare.
>
> Uros.
diff mbox series

Patch

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 25badbb86e5..24539716e5b 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -276,6 +276,7 @@  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
 			      smulhrs, umulhrs, binary)
 
 DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_SUB, ECF_CONST, first, sssub, ussub, binary)
 
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 024e3350465..3e334533ff8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3086,6 +3086,20 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+/* Unsigned saturation sub, case 1 (branch with gt):
+   SAT_U_SUB = X > Y ? X - Y : 0  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (gt @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 2 (branch with ge):
+   SAT_U_SUB = X >= Y ? X - Y : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond (ge @0 @1) (minus @0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 3f2cb46aff8..bc2611abdc2 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -118,8 +118,8 @@  OPTAB_NX(sub_optab, "sub$F$a3")
 OPTAB_NX(sub_optab, "sub$Q$a3")
 OPTAB_VL(subv_optab, "subv$I$a3", MINUS, "sub", '3', gen_intv_fp_libfunc)
 OPTAB_VX(subv_optab, "sub$F$a3")
-OPTAB_NL(sssub_optab, "sssub$Q$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
-OPTAB_NL(ussub_optab, "ussub$Q$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
+OPTAB_NL(sssub_optab, "sssub$a3", SS_MINUS, "sssub", '3', gen_signed_fixed_libfunc)
+OPTAB_NL(ussub_optab, "ussub$a3", US_MINUS, "ussub", '3', gen_unsigned_fixed_libfunc)
 OPTAB_NL(smul_optab, "mul$Q$a3", MULT, "mul", '3', gen_int_fp_fixed_libfunc)
 OPTAB_NX(smul_optab, "mul$P$a3")
 OPTAB_NX(smul_optab, "mul$F$a3")
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 62da1c5ee08..4717302b728 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4087,33 +4087,56 @@  arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
 }
 
 extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
+
+static void
+build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, internal_fn fn,
+				    tree lhs, tree op_0, tree op_1)
+{
+  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
+    {
+      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
+      gimple_call_set_lhs (call, lhs);
+      gsi_replace (gsi, call, /* update_eh_info */ true);
+    }
+}
 
 /*
- * Try to match saturation arith pattern(s).
- *   1. SAT_ADD (unsigned)
- *      _7 = _4 + _6;
- *      _8 = _4 > _7;
- *      _9 = (long unsigned int) _8;
- *      _10 = -_9;
- *      _12 = _7 | _10;
- *      =>
- *      _12 = .SAT_ADD (_4, _6);  */
+ * Try to match saturation unsigned add.
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *   =>
+ *   _12 = .SAT_ADD (_4, _6);  */
+
 static void
-match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+match_unsigned_saturation_add (gimple_stmt_iterator *gsi, gassign *stmt)
 {
-  gcall *call = NULL;
+  tree ops[2];
+  tree lhs = gimple_assign_lhs (stmt);
 
+  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL))
+    build_saturation_binary_arith_call (gsi, IFN_SAT_ADD, lhs, ops[0], ops[1]);
+}
+
+/*
+ * Try to match saturation unsigned sub.
+ *   _1 = _4 >= _5;
+ *   _3 = _4 - _5;
+ *   _6 = _1 ? _3 : 0;
+ *   =>
+ *   _6 = .SAT_SUB (_4, _5);  */
+
+static void
+match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
+{
   tree ops[2];
   tree lhs = gimple_assign_lhs (stmt);
 
-  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
-      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
-					 OPTIMIZE_FOR_BOTH))
-    {
-      call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]);
-      gimple_call_set_lhs (call, lhs);
-      gsi_replace (gsi, call, true);
-    }
+  if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL))
+    build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
 }
 
 /* Recognize for unsigned x
@@ -6078,7 +6101,7 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
 	      break;
 
 	    case BIT_IOR_EXPR:
-	      match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+	      match_unsigned_saturation_add (&gsi, as_a<gassign *> (stmt));
 	      /* fall-through  */
 	    case BIT_XOR_EXPR:
 	      match_uaddc_usubc (&gsi, stmt, code);
@@ -6089,6 +6112,10 @@  math_opts_dom_walker::after_dom_children (basic_block bb)
 	      match_single_bit_test (&gsi, stmt);
 	      break;
 
+	    case COND_EXPR:
+	      match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
+	      break;
+
 	    default:;
 	    }
 	}