diff mbox series

[v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB

Message ID 20240621035252.742099-1-pan2.li@intel.com
State New
Headers show
Series [v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB | expand

Commit Message

Li, Pan2 June 21, 2024, 3:52 a.m. UTC
From: Pan Li <pan2.li@intel.com>

The zip benchmark of coremark-pro have one SAT_SUB like pattern but
truncated as below:

void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  register uint16_t *p = x;

  do {
    a = *--p;
    *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
  } while (--n);
}

It will have gimple after ifcvt pass,  it cannot hit any pattern of
SAT_SUB and then cannot vectorize to SAT_SUB.

_2 = a_11 - b_12(D);
iftmp.0_13 = (short unsigned int) _2;
_18 = a_11 >= b_12(D);
iftmp.0_5 = _18 ? iftmp.0_13 : 0;

This patch would like to do some reconcile for above pattern to match
the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
the SAT_SUB.

_2 = a_11 - b_12(D);
_18 = a_11 >= b_12(D);
_pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
iftmp.0_13 = (short unsigned int) _pattmp;
iftmp.0_5 = iftmp.0_13;

The below tests are running for this patch.
1. The rv64gcv fully regression tests.
2. The rv64gcv build with glibc.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

gcc/ChangeLog:

	* match.pd: Add new match for trunated unsigned sat_sub.
	* tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
	New external decl from match.pd.
	(tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
	to reconcile the truncated sat_sub pattern.
	(tree_if_cond_reconcile): New func impl to reconcile.
	(pass_if_conversion::execute): Try to reconcile after ifcvt.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd        |  9 +++++
 gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

Comments

Richard Biener June 21, 2024, 7 a.m. UTC | #1
On Fri, Jun 21, 2024 at 5:53 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> The zip benchmark of coremark-pro have one SAT_SUB like pattern but
> truncated as below:
>
> void test (uint16_t *x, unsigned b, unsigned n)
> {
>   unsigned a = 0;
>   register uint16_t *p = x;
>
>   do {
>     a = *--p;
>     *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
>   } while (--n);
> }
>
> It will have gimple after ifcvt pass,  it cannot hit any pattern of
> SAT_SUB and then cannot vectorize to SAT_SUB.
>
> _2 = a_11 - b_12(D);
> iftmp.0_13 = (short unsigned int) _2;
> _18 = a_11 >= b_12(D);
> iftmp.0_5 = _18 ? iftmp.0_13 : 0;
>
> This patch would like to do some reconcile for above pattern to match
> the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
> the SAT_SUB.

Hmm.  I was thinking of allowing

/* Unsigned saturation sub, case 2 (branch with ge):
   SAT_U_SUB = X >= Y ? X - Y : 0.  */
(match (unsigned_integer_sat_sub @0 @1)
 (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
      && types_match (type, @0, @1))))

to match this by changing it to

/* Unsigned saturation sub, case 2 (branch with ge):
   SAT_U_SUB = X >= Y ? X - Y : 0.  */
(match (unsigned_integer_sat_sub @0 @1)
 (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
      && types_match (type, @0, @1))))

and when using the gimple_match_* function make sure to consider
that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
we matched?

Richard.

> _2 = a_11 - b_12(D);
> _18 = a_11 >= b_12(D);
> _pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
> iftmp.0_13 = (short unsigned int) _pattmp;
> iftmp.0_5 = iftmp.0_13;
>
> The below tests are running for this patch.
> 1. The rv64gcv fully regression tests.
> 2. The rv64gcv build with glibc.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
>
> gcc/ChangeLog:
>
>         * match.pd: Add new match for trunated unsigned sat_sub.
>         * tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
>         New external decl from match.pd.
>         (tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
>         to reconcile the truncated sat_sub pattern.
>         (tree_if_cond_reconcile): New func impl to reconcile.
>         (pass_if_conversion::execute): Try to reconcile after ifcvt.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd        |  9 +++++
>  gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 92 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 3d0689c9312..9617a5f9d5e 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3210,6 +3210,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Unsigned saturation sub and then truncated, aka:
> +   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
> + */
> +(match (truncated_unsigned_integer_sat_sub @0 @1)
> + (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (@0, @1)
> +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 57992b6deca..535743130f2 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -3738,6 +3738,87 @@ bitfields_to_lower_p (class loop *loop,
>    return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
>  }
>
> +extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
> +                                                      tree (*)(tree));
> +
> +/*
> + * Try to reconcile the stmt pattern as below to math the SAT_SUB
> + * in vectorization.  If and only if the related internal_fn has
> + * been implemented already.
> + *
> + * The reconcile will insert one new stmt named 'a' in below example,
> + * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
> + * pattern is able to hit the SAT_SUB pattern in the underlying pass.
> + *
> + * 1. _2 = a_11 - b_12(D);
> + * 2. iftmp.0_13 = (short unsigned int) _2;
> + * 3. _18 = a_11 >= b_12(D);
> + * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> + * ==>
> + * 1. _2 = a_11 - b_12(D);
> + * 3. _18 = a_11 >= b_12(D);
> + * a. pattmp = _18 ? _2 : 0;                     // New insertion
> + * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
> + * b. iftmp.0_5 = iftmp.0_13;
> + *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> + */
> +static void
> +tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
> +                                                gassign *stmt)
> +{
> +  tree ops[2];
> +  tree lhs = gimple_assign_lhs (stmt);
> +  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
> +                                                    TREE_TYPE (lhs),
> +                                                    OPTIMIZE_FOR_BOTH);
> +
> +  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
> +    {
> +      tree cond = gimple_assign_rhs1 (stmt); // aka _18
> +      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
> +      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
> +      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
> +      tree raw_type = TREE_TYPE (minus);
> +      tree zero = build_zero_cst (raw_type);
> +      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
> +
> +      /* For stmt 'a' in above example  */
> +      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
> +      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
> +      update_stmt (stmt_a);
> +
> +      /* For stmt '2' in above example  */
> +      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
> +      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
> +      gimple_assign_set_rhs1 (stmt_2, tmp);
> +      update_stmt (stmt_2);
> +
> +      /* For stmt 'b' in above example  */
> +      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
> +      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
> +      update_stmt (stmt_b);
> +    }
> +}
> +
> +static void
> +tree_if_cond_reconcile (function *fun)
> +{
> +  basic_block bb;
> +  FOR_EACH_BB_FN (bb, fun)
> +    {
> +      gimple_stmt_iterator gsi;
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +       {
> +         gimple *stmt = gsi_stmt (gsi);
> +
> +         if (is_gimple_assign (stmt))
> +           {
> +             gassign *assign = dyn_cast <gassign *> (stmt);
> +             tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
> +           }
> +       }
> +    }
> +}
>
>  /* If-convert LOOP when it is legal.  For the moment this pass has no
>     profitability analysis.  Returns non-zero todo flags when something
> @@ -4063,6 +4144,8 @@ pass_if_conversion::execute (function *fun)
>         }
>      }
>
> +  tree_if_cond_reconcile (fun);
> +
>    return 0;
>  }
>
> --
> 2.34.1
>
Li, Pan2 June 21, 2024, 8:50 a.m. UTC | #2
Thanks Richard for comments.

> to match this by changing it to

> /* Unsigned saturation sub, case 2 (branch with ge):
>    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> (match (unsigned_integer_sat_sub @0 @1)
> (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
>  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>       && types_match (type, @0, @1))))

Do we need another name for this matching ? Add (convert? here may change the sematics of .SAT_SUB.
When we call gimple_unsigned_integer_sat_sub (lhs, ops, NULL), the converted value may be returned different
to the (minus @0 @1). Please correct me if my understanding is wrong.

> and when using the gimple_match_* function make sure to consider
> that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> we matched?

This may have problem for vector part I guess, require some additional change from vectorize_convert when
I try to do that in previous. Let me double check about it, and keep you posted.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Friday, June 21, 2024 3:00 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB

On Fri, Jun 21, 2024 at 5:53 AM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> The zip benchmark of coremark-pro have one SAT_SUB like pattern but
> truncated as below:
>
> void test (uint16_t *x, unsigned b, unsigned n)
> {
>   unsigned a = 0;
>   register uint16_t *p = x;
>
>   do {
>     a = *--p;
>     *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
>   } while (--n);
> }
>
> It will have gimple after ifcvt pass,  it cannot hit any pattern of
> SAT_SUB and then cannot vectorize to SAT_SUB.
>
> _2 = a_11 - b_12(D);
> iftmp.0_13 = (short unsigned int) _2;
> _18 = a_11 >= b_12(D);
> iftmp.0_5 = _18 ? iftmp.0_13 : 0;
>
> This patch would like to do some reconcile for above pattern to match
> the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
> the SAT_SUB.

Hmm.  I was thinking of allowing

/* Unsigned saturation sub, case 2 (branch with ge):
   SAT_U_SUB = X >= Y ? X - Y : 0.  */
(match (unsigned_integer_sat_sub @0 @1)
 (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
      && types_match (type, @0, @1))))

to match this by changing it to

/* Unsigned saturation sub, case 2 (branch with ge):
   SAT_U_SUB = X >= Y ? X - Y : 0.  */
(match (unsigned_integer_sat_sub @0 @1)
 (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
 (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
      && types_match (type, @0, @1))))

and when using the gimple_match_* function make sure to consider
that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
we matched?

Richard.

> _2 = a_11 - b_12(D);
> _18 = a_11 >= b_12(D);
> _pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
> iftmp.0_13 = (short unsigned int) _pattmp;
> iftmp.0_5 = iftmp.0_13;
>
> The below tests are running for this patch.
> 1. The rv64gcv fully regression tests.
> 2. The rv64gcv build with glibc.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
>
> gcc/ChangeLog:
>
>         * match.pd: Add new match for trunated unsigned sat_sub.
>         * tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
>         New external decl from match.pd.
>         (tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
>         to reconcile the truncated sat_sub pattern.
>         (tree_if_cond_reconcile): New func impl to reconcile.
>         (pass_if_conversion::execute): Try to reconcile after ifcvt.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd        |  9 +++++
>  gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 92 insertions(+)
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 3d0689c9312..9617a5f9d5e 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3210,6 +3210,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>        && types_match (type, @0, @1))))
>
> +/* Unsigned saturation sub and then truncated, aka:
> +   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
> + */
> +(match (truncated_unsigned_integer_sat_sub @0 @1)
> + (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
> + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> +      && types_match (@0, @1)
> +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
> +
>  /* x >  y  &&  x != XXX_MIN  -->  x > y
>     x >  y  &&  x == XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index 57992b6deca..535743130f2 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -3738,6 +3738,87 @@ bitfields_to_lower_p (class loop *loop,
>    return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
>  }
>
> +extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
> +                                                      tree (*)(tree));
> +
> +/*
> + * Try to reconcile the stmt pattern as below to math the SAT_SUB
> + * in vectorization.  If and only if the related internal_fn has
> + * been implemented already.
> + *
> + * The reconcile will insert one new stmt named 'a' in below example,
> + * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
> + * pattern is able to hit the SAT_SUB pattern in the underlying pass.
> + *
> + * 1. _2 = a_11 - b_12(D);
> + * 2. iftmp.0_13 = (short unsigned int) _2;
> + * 3. _18 = a_11 >= b_12(D);
> + * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> + * ==>
> + * 1. _2 = a_11 - b_12(D);
> + * 3. _18 = a_11 >= b_12(D);
> + * a. pattmp = _18 ? _2 : 0;                     // New insertion
> + * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
> + * b. iftmp.0_5 = iftmp.0_13;
> + *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> + */
> +static void
> +tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
> +                                                gassign *stmt)
> +{
> +  tree ops[2];
> +  tree lhs = gimple_assign_lhs (stmt);
> +  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
> +                                                    TREE_TYPE (lhs),
> +                                                    OPTIMIZE_FOR_BOTH);
> +
> +  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
> +    {
> +      tree cond = gimple_assign_rhs1 (stmt); // aka _18
> +      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
> +      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
> +      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
> +      tree raw_type = TREE_TYPE (minus);
> +      tree zero = build_zero_cst (raw_type);
> +      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
> +
> +      /* For stmt 'a' in above example  */
> +      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
> +      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
> +      update_stmt (stmt_a);
> +
> +      /* For stmt '2' in above example  */
> +      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
> +      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
> +      gimple_assign_set_rhs1 (stmt_2, tmp);
> +      update_stmt (stmt_2);
> +
> +      /* For stmt 'b' in above example  */
> +      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
> +      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
> +      update_stmt (stmt_b);
> +    }
> +}
> +
> +static void
> +tree_if_cond_reconcile (function *fun)
> +{
> +  basic_block bb;
> +  FOR_EACH_BB_FN (bb, fun)
> +    {
> +      gimple_stmt_iterator gsi;
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +       {
> +         gimple *stmt = gsi_stmt (gsi);
> +
> +         if (is_gimple_assign (stmt))
> +           {
> +             gassign *assign = dyn_cast <gassign *> (stmt);
> +             tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
> +           }
> +       }
> +    }
> +}
>
>  /* If-convert LOOP when it is legal.  For the moment this pass has no
>     profitability analysis.  Returns non-zero todo flags when something
> @@ -4063,6 +4144,8 @@ pass_if_conversion::execute (function *fun)
>         }
>      }
>
> +  tree_if_cond_reconcile (fun);
> +
>    return 0;
>  }
>
> --
> 2.34.1
>
Richard Biener June 21, 2024, 9:28 a.m. UTC | #3
On Fri, Jun 21, 2024 at 10:50 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard for comments.
>
> > to match this by changing it to
>
> > /* Unsigned saturation sub, case 2 (branch with ge):
> >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > (match (unsigned_integer_sat_sub @0 @1)
> > (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
> >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >       && types_match (type, @0, @1))))
>
> Do we need another name for this matching ? Add (convert? here may change the sematics of .SAT_SUB.
> When we call gimple_unsigned_integer_sat_sub (lhs, ops, NULL), the converted value may be returned different
> to the (minus @0 @1). Please correct me if my understanding is wrong.

I think gimple_unsigned_integer_sat_sub (lhs, ...) simply matches
(typeof LHS).SAT_SUB (ops[0], ops[1]) now, I don't think it's necessary to
handle the case where typef LHS and typeof ops[0] are equal specially?

> > and when using the gimple_match_* function make sure to consider
> > that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> > we matched?
>
> This may have problem for vector part I guess, require some additional change from vectorize_convert when
> I try to do that in previous. Let me double check about it, and keep you posted.

You are using gimple_unsigned_integer_sat_sub from pattern recognition, the
thing to do is simply to add a conversion stmt to the pattern sequence in case
the types differ?

But maybe I'm missing something.

Richard.

> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 21, 2024 3:00 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB
>
> On Fri, Jun 21, 2024 at 5:53 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > The zip benchmark of coremark-pro have one SAT_SUB like pattern but
> > truncated as below:
> >
> > void test (uint16_t *x, unsigned b, unsigned n)
> > {
> >   unsigned a = 0;
> >   register uint16_t *p = x;
> >
> >   do {
> >     a = *--p;
> >     *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
> >   } while (--n);
> > }
> >
> > It will have gimple after ifcvt pass,  it cannot hit any pattern of
> > SAT_SUB and then cannot vectorize to SAT_SUB.
> >
> > _2 = a_11 - b_12(D);
> > iftmp.0_13 = (short unsigned int) _2;
> > _18 = a_11 >= b_12(D);
> > iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> >
> > This patch would like to do some reconcile for above pattern to match
> > the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
> > the SAT_SUB.
>
> Hmm.  I was thinking of allowing
>
> /* Unsigned saturation sub, case 2 (branch with ge):
>    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> (match (unsigned_integer_sat_sub @0 @1)
>  (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
>  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>       && types_match (type, @0, @1))))
>
> to match this by changing it to
>
> /* Unsigned saturation sub, case 2 (branch with ge):
>    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> (match (unsigned_integer_sat_sub @0 @1)
>  (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
>  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>       && types_match (type, @0, @1))))
>
> and when using the gimple_match_* function make sure to consider
> that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> we matched?
>
> Richard.
>
> > _2 = a_11 - b_12(D);
> > _18 = a_11 >= b_12(D);
> > _pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
> > iftmp.0_13 = (short unsigned int) _pattmp;
> > iftmp.0_5 = iftmp.0_13;
> >
> > The below tests are running for this patch.
> > 1. The rv64gcv fully regression tests.
> > 2. The rv64gcv build with glibc.
> > 3. The x86 bootstrap tests.
> > 4. The x86 fully regression tests.
> >
> > gcc/ChangeLog:
> >
> >         * match.pd: Add new match for trunated unsigned sat_sub.
> >         * tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
> >         New external decl from match.pd.
> >         (tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
> >         to reconcile the truncated sat_sub pattern.
> >         (tree_if_cond_reconcile): New func impl to reconcile.
> >         (pass_if_conversion::execute): Try to reconcile after ifcvt.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/match.pd        |  9 +++++
> >  gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 92 insertions(+)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 3d0689c9312..9617a5f9d5e 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3210,6 +3210,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* Unsigned saturation sub and then truncated, aka:
> > +   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
> > + */
> > +(match (truncated_unsigned_integer_sat_sub @0 @1)
> > + (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (@0, @1)
> > +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> > index 57992b6deca..535743130f2 100644
> > --- a/gcc/tree-if-conv.cc
> > +++ b/gcc/tree-if-conv.cc
> > @@ -3738,6 +3738,87 @@ bitfields_to_lower_p (class loop *loop,
> >    return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
> >  }
> >
> > +extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
> > +                                                      tree (*)(tree));
> > +
> > +/*
> > + * Try to reconcile the stmt pattern as below to math the SAT_SUB
> > + * in vectorization.  If and only if the related internal_fn has
> > + * been implemented already.
> > + *
> > + * The reconcile will insert one new stmt named 'a' in below example,
> > + * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
> > + * pattern is able to hit the SAT_SUB pattern in the underlying pass.
> > + *
> > + * 1. _2 = a_11 - b_12(D);
> > + * 2. iftmp.0_13 = (short unsigned int) _2;
> > + * 3. _18 = a_11 >= b_12(D);
> > + * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > + * ==>
> > + * 1. _2 = a_11 - b_12(D);
> > + * 3. _18 = a_11 >= b_12(D);
> > + * a. pattmp = _18 ? _2 : 0;                     // New insertion
> > + * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
> > + * b. iftmp.0_5 = iftmp.0_13;
> > + *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > + */
> > +static void
> > +tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
> > +                                                gassign *stmt)
> > +{
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> > +  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
> > +                                                    TREE_TYPE (lhs),
> > +                                                    OPTIMIZE_FOR_BOTH);
> > +
> > +  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
> > +    {
> > +      tree cond = gimple_assign_rhs1 (stmt); // aka _18
> > +      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
> > +      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
> > +      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
> > +      tree raw_type = TREE_TYPE (minus);
> > +      tree zero = build_zero_cst (raw_type);
> > +      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
> > +
> > +      /* For stmt 'a' in above example  */
> > +      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
> > +      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
> > +      update_stmt (stmt_a);
> > +
> > +      /* For stmt '2' in above example  */
> > +      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
> > +      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
> > +      gimple_assign_set_rhs1 (stmt_2, tmp);
> > +      update_stmt (stmt_2);
> > +
> > +      /* For stmt 'b' in above example  */
> > +      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
> > +      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
> > +      update_stmt (stmt_b);
> > +    }
> > +}
> > +
> > +static void
> > +tree_if_cond_reconcile (function *fun)
> > +{
> > +  basic_block bb;
> > +  FOR_EACH_BB_FN (bb, fun)
> > +    {
> > +      gimple_stmt_iterator gsi;
> > +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > +       {
> > +         gimple *stmt = gsi_stmt (gsi);
> > +
> > +         if (is_gimple_assign (stmt))
> > +           {
> > +             gassign *assign = dyn_cast <gassign *> (stmt);
> > +             tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
> > +           }
> > +       }
> > +    }
> > +}
> >
> >  /* If-convert LOOP when it is legal.  For the moment this pass has no
> >     profitability analysis.  Returns non-zero todo flags when something
> > @@ -4063,6 +4144,8 @@ pass_if_conversion::execute (function *fun)
> >         }
> >      }
> >
> > +  tree_if_cond_reconcile (fun);
> > +
> >    return 0;
> >  }
> >
> > --
> > 2.34.1
> >
Li, Pan2 June 21, 2024, 2:45 p.m. UTC | #4
Thanks Richard for suggestion, tried the (convert? with below gimple stmt but got a miss def ice.
To double confirm, the *type_out should be the vector type of lhs, and we only need to build
one cvt stmt from itype to otype here. Or just return the call directly and set the type_out to the v_otype?

static gimple *
vect_recog_build_binary_gimple_stmt (vec_info *vinfo, gimple *stmt,
                                     internal_fn fn, tree *type_out,
                                     tree lhs, tree op_0, tree op_1)
{
  tree itype = TREE_TYPE (op_0);
  tree otype = TREE_TYPE (lhs);
  tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
  tree v_otype = get_vectype_for_scalar_type (vinfo, otype);

  if (v_itype != NULL_TREE && v_otype != NULL_TREE
    && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
    {
      gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
      tree itype_ssa = vect_recog_temp_ssa_var (itype, NULL);

      gimple_call_set_lhs (call, itype_ssa);
      gimple_call_set_nothrow (call, /* nothrow_p */ false);
      gimple_set_location (call, gimple_location (stmt));

      *type_out = v_otype;
      gimple *new_stmt = call;

      if (itype != otype)
        {
          tree otype_ssa = vect_recog_temp_ssa_var (otype, NULL);
          new_stmt = gimple_build_assign (otype_ssa, CONVERT_EXPR, itype_ssa);
        }

      return new_stmt;
    }

  return NULL;
}

-----cut the ice---

zip.test.c: In function ‘test’:
zip.test.c:4:6: error: missing definition
    4 | void test (uint16_t *x, unsigned b, unsigned n)
      |      ^~~~
for SSA_NAME: patt_40 in statement:
vect_cst__151 = [vec_duplicate_expr] patt_40;
during GIMPLE pass: vect
dump file: zip.test.c.180t.vect
zip.test.c:4:6: internal compiler error: verify_ssa failed
0x1de0860 verify_ssa(bool, bool)
        /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/tree-ssa.cc:1203
0x1919f69 execute_function_todo
        /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/passes.cc:2096
0x1918b46 do_per_function
        /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/passes.cc:1688
0x191a116 execute_todo

Pan


-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Friday, June 21, 2024 5:29 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB

On Fri, Jun 21, 2024 at 10:50 AM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard for comments.
>
> > to match this by changing it to
>
> > /* Unsigned saturation sub, case 2 (branch with ge):
> >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > (match (unsigned_integer_sat_sub @0 @1)
> > (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
> >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >       && types_match (type, @0, @1))))
>
> Do we need another name for this matching ? Add (convert? here may change the sematics of .SAT_SUB.
> When we call gimple_unsigned_integer_sat_sub (lhs, ops, NULL), the converted value may be returned different
> to the (minus @0 @1). Please correct me if my understanding is wrong.

I think gimple_unsigned_integer_sat_sub (lhs, ...) simply matches
(typeof LHS).SAT_SUB (ops[0], ops[1]) now, I don't think it's necessary to
handle the case where typef LHS and typeof ops[0] are equal specially?

> > and when using the gimple_match_* function make sure to consider
> > that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> > we matched?
>
> This may have problem for vector part I guess, require some additional change from vectorize_convert when
> I try to do that in previous. Let me double check about it, and keep you posted.

You are using gimple_unsigned_integer_sat_sub from pattern recognition, the
thing to do is simply to add a conversion stmt to the pattern sequence in case
the types differ?

But maybe I'm missing something.

Richard.

> Pan
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 21, 2024 3:00 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB
>
> On Fri, Jun 21, 2024 at 5:53 AM <pan2.li@intel.com> wrote:
> >
> > From: Pan Li <pan2.li@intel.com>
> >
> > The zip benchmark of coremark-pro have one SAT_SUB like pattern but
> > truncated as below:
> >
> > void test (uint16_t *x, unsigned b, unsigned n)
> > {
> >   unsigned a = 0;
> >   register uint16_t *p = x;
> >
> >   do {
> >     a = *--p;
> >     *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
> >   } while (--n);
> > }
> >
> > It will have gimple after ifcvt pass,  it cannot hit any pattern of
> > SAT_SUB and then cannot vectorize to SAT_SUB.
> >
> > _2 = a_11 - b_12(D);
> > iftmp.0_13 = (short unsigned int) _2;
> > _18 = a_11 >= b_12(D);
> > iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> >
> > This patch would like to do some reconcile for above pattern to match
> > the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
> > the SAT_SUB.
>
> Hmm.  I was thinking of allowing
>
> /* Unsigned saturation sub, case 2 (branch with ge):
>    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> (match (unsigned_integer_sat_sub @0 @1)
>  (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
>  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>       && types_match (type, @0, @1))))
>
> to match this by changing it to
>
> /* Unsigned saturation sub, case 2 (branch with ge):
>    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> (match (unsigned_integer_sat_sub @0 @1)
>  (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
>  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
>       && types_match (type, @0, @1))))
>
> and when using the gimple_match_* function make sure to consider
> that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> we matched?
>
> Richard.
>
> > _2 = a_11 - b_12(D);
> > _18 = a_11 >= b_12(D);
> > _pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
> > iftmp.0_13 = (short unsigned int) _pattmp;
> > iftmp.0_5 = iftmp.0_13;
> >
> > The below tests are running for this patch.
> > 1. The rv64gcv fully regression tests.
> > 2. The rv64gcv build with glibc.
> > 3. The x86 bootstrap tests.
> > 4. The x86 fully regression tests.
> >
> > gcc/ChangeLog:
> >
> >         * match.pd: Add new match for trunated unsigned sat_sub.
> >         * tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
> >         New external decl from match.pd.
> >         (tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
> >         to reconcile the truncated sat_sub pattern.
> >         (tree_if_cond_reconcile): New func impl to reconcile.
> >         (pass_if_conversion::execute): Try to reconcile after ifcvt.
> >
> > Signed-off-by: Pan Li <pan2.li@intel.com>
> > ---
> >  gcc/match.pd        |  9 +++++
> >  gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 92 insertions(+)
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 3d0689c9312..9617a5f9d5e 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -3210,6 +3210,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >        && types_match (type, @0, @1))))
> >
> > +/* Unsigned saturation sub and then truncated, aka:
> > +   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
> > + */
> > +(match (truncated_unsigned_integer_sat_sub @0 @1)
> > + (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
> > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > +      && types_match (@0, @1)
> > +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
> > +
> >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> >     x >  y  &&  x == XXX_MIN  -->  false . */
> >  (for eqne (eq ne)
> > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> > index 57992b6deca..535743130f2 100644
> > --- a/gcc/tree-if-conv.cc
> > +++ b/gcc/tree-if-conv.cc
> > @@ -3738,6 +3738,87 @@ bitfields_to_lower_p (class loop *loop,
> >    return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
> >  }
> >
> > +extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
> > +                                                      tree (*)(tree));
> > +
> > +/*
> > + * Try to reconcile the stmt pattern as below to math the SAT_SUB
> > + * in vectorization.  If and only if the related internal_fn has
> > + * been implemented already.
> > + *
> > + * The reconcile will insert one new stmt named 'a' in below example,
> > + * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
> > + * pattern is able to hit the SAT_SUB pattern in the underlying pass.
> > + *
> > + * 1. _2 = a_11 - b_12(D);
> > + * 2. iftmp.0_13 = (short unsigned int) _2;
> > + * 3. _18 = a_11 >= b_12(D);
> > + * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > + * ==>
> > + * 1. _2 = a_11 - b_12(D);
> > + * 3. _18 = a_11 >= b_12(D);
> > + * a. pattmp = _18 ? _2 : 0;                     // New insertion
> > + * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
> > + * b. iftmp.0_5 = iftmp.0_13;
> > + *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > + */
> > +static void
> > +tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
> > +                                                gassign *stmt)
> > +{
> > +  tree ops[2];
> > +  tree lhs = gimple_assign_lhs (stmt);
> > +  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
> > +                                                    TREE_TYPE (lhs),
> > +                                                    OPTIMIZE_FOR_BOTH);
> > +
> > +  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
> > +    {
> > +      tree cond = gimple_assign_rhs1 (stmt); // aka _18
> > +      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
> > +      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
> > +      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
> > +      tree raw_type = TREE_TYPE (minus);
> > +      tree zero = build_zero_cst (raw_type);
> > +      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
> > +
> > +      /* For stmt 'a' in above example  */
> > +      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
> > +      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
> > +      update_stmt (stmt_a);
> > +
> > +      /* For stmt '2' in above example  */
> > +      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
> > +      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
> > +      gimple_assign_set_rhs1 (stmt_2, tmp);
> > +      update_stmt (stmt_2);
> > +
> > +      /* For stmt 'b' in above example  */
> > +      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
> > +      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
> > +      update_stmt (stmt_b);
> > +    }
> > +}
> > +
> > +static void
> > +tree_if_cond_reconcile (function *fun)
> > +{
> > +  basic_block bb;
> > +  FOR_EACH_BB_FN (bb, fun)
> > +    {
> > +      gimple_stmt_iterator gsi;
> > +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > +       {
> > +         gimple *stmt = gsi_stmt (gsi);
> > +
> > +         if (is_gimple_assign (stmt))
> > +           {
> > +             gassign *assign = dyn_cast <gassign *> (stmt);
> > +             tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
> > +           }
> > +       }
> > +    }
> > +}
> >
> >  /* If-convert LOOP when it is legal.  For the moment this pass has no
> >     profitability analysis.  Returns non-zero todo flags when something
> > @@ -4063,6 +4144,8 @@ pass_if_conversion::execute (function *fun)
> >         }
> >      }
> >
> > +  tree_if_cond_reconcile (fun);
> > +
> >    return 0;
> >  }
> >
> > --
> > 2.34.1
> >
Richard Biener June 22, 2024, 1:19 p.m. UTC | #5
On Fri, Jun 21, 2024 at 4:45 PM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard for suggestion, tried the (convert? with below gimple stmt but got a miss def ice.
> To double confirm, the *type_out should be the vector type of lhs, and we only need to build
> one cvt stmt from itype to otype here. Or just return the call directly and set the type_out to the v_otype?
>
> static gimple *
> vect_recog_build_binary_gimple_stmt (vec_info *vinfo, gimple *stmt,
>                                      internal_fn fn, tree *type_out,
>                                      tree lhs, tree op_0, tree op_1)
> {
>   tree itype = TREE_TYPE (op_0);
>   tree otype = TREE_TYPE (lhs);
>   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
>   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
>
>   if (v_itype != NULL_TREE && v_otype != NULL_TREE
>     && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
>     {
>       gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
>       tree itype_ssa = vect_recog_temp_ssa_var (itype, NULL);
>
>       gimple_call_set_lhs (call, itype_ssa);
>       gimple_call_set_nothrow (call, /* nothrow_p */ false);
>       gimple_set_location (call, gimple_location (stmt));
>
>       *type_out = v_otype;
>       gimple *new_stmt = call;
>
>       if (itype != otype)
>         {
>           tree otype_ssa = vect_recog_temp_ssa_var (otype, NULL);
>           new_stmt = gimple_build_assign (otype_ssa, CONVERT_EXPR, itype_ssa);
>         }
>
>       return new_stmt;

You need to refactor this to add to the stmts pattern def sequence
(look for append_pattern_def_seq uses for example)

>     }
>
>   return NULL;
> }
>
> -----cut the ice---
>
> zip.test.c: In function ‘test’:
> zip.test.c:4:6: error: missing definition
>     4 | void test (uint16_t *x, unsigned b, unsigned n)
>       |      ^~~~
> for SSA_NAME: patt_40 in statement:
> vect_cst__151 = [vec_duplicate_expr] patt_40;
> during GIMPLE pass: vect
> dump file: zip.test.c.180t.vect
> zip.test.c:4:6: internal compiler error: verify_ssa failed
> 0x1de0860 verify_ssa(bool, bool)
>         /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/tree-ssa.cc:1203
> 0x1919f69 execute_function_todo
>         /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/passes.cc:2096
> 0x1918b46 do_per_function
>         /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/passes.cc:1688
> 0x191a116 execute_todo
>
> Pan
>
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 21, 2024 5:29 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB
>
> On Fri, Jun 21, 2024 at 10:50 AM Li, Pan2 <pan2.li@intel.com> wrote:
> >
> > Thanks Richard for comments.
> >
> > > to match this by changing it to
> >
> > > /* Unsigned saturation sub, case 2 (branch with ge):
> > >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > > (match (unsigned_integer_sat_sub @0 @1)
> > > (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
> > >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > >       && types_match (type, @0, @1))))
> >
> > Do we need another name for this matching ? Add (convert? here may change the sematics of .SAT_SUB.
> > When we call gimple_unsigned_integer_sat_sub (lhs, ops, NULL), the converted value may be returned different
> > to the (minus @0 @1). Please correct me if my understanding is wrong.
>
> I think gimple_unsigned_integer_sat_sub (lhs, ...) simply matches
> (typeof LHS).SAT_SUB (ops[0], ops[1]) now, I don't think it's necessary to
> handle the case where typef LHS and typeof ops[0] are equal specially?
>
> > > and when using the gimple_match_* function make sure to consider
> > > that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> > > we matched?
> >
> > This may have problem for vector part I guess, require some additional change from vectorize_convert when
> > I try to do that in previous. Let me double check about it, and keep you posted.
>
> You are using gimple_unsigned_integer_sat_sub from pattern recognition, the
> thing to do is simply to add a conversion stmt to the pattern sequence in case
> the types differ?
>
> But maybe I'm missing something.
>
> Richard.
>
> > Pan
> >
> > -----Original Message-----
> > From: Richard Biener <richard.guenther@gmail.com>
> > Sent: Friday, June 21, 2024 3:00 PM
> > To: Li, Pan2 <pan2.li@intel.com>
> > Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> > Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB
> >
> > On Fri, Jun 21, 2024 at 5:53 AM <pan2.li@intel.com> wrote:
> > >
> > > From: Pan Li <pan2.li@intel.com>
> > >
> > > The zip benchmark of coremark-pro have one SAT_SUB like pattern but
> > > truncated as below:
> > >
> > > void test (uint16_t *x, unsigned b, unsigned n)
> > > {
> > >   unsigned a = 0;
> > >   register uint16_t *p = x;
> > >
> > >   do {
> > >     a = *--p;
> > >     *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
> > >   } while (--n);
> > > }
> > >
> > > It will have gimple after ifcvt pass,  it cannot hit any pattern of
> > > SAT_SUB and then cannot vectorize to SAT_SUB.
> > >
> > > _2 = a_11 - b_12(D);
> > > iftmp.0_13 = (short unsigned int) _2;
> > > _18 = a_11 >= b_12(D);
> > > iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > >
> > > This patch would like to do some reconcile for above pattern to match
> > > the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
> > > the SAT_SUB.
> >
> > Hmm.  I was thinking of allowing
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > (match (unsigned_integer_sat_sub @0 @1)
> >  (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >       && types_match (type, @0, @1))))
> >
> > to match this by changing it to
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > (match (unsigned_integer_sat_sub @0 @1)
> >  (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
> >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >       && types_match (type, @0, @1))))
> >
> > and when using the gimple_match_* function make sure to consider
> > that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> > we matched?
> >
> > Richard.
> >
> > > _2 = a_11 - b_12(D);
> > > _18 = a_11 >= b_12(D);
> > > _pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
> > > iftmp.0_13 = (short unsigned int) _pattmp;
> > > iftmp.0_5 = iftmp.0_13;
> > >
> > > The below tests are running for this patch.
> > > 1. The rv64gcv fully regression tests.
> > > 2. The rv64gcv build with glibc.
> > > 3. The x86 bootstrap tests.
> > > 4. The x86 fully regression tests.
> > >
> > > gcc/ChangeLog:
> > >
> > >         * match.pd: Add new match for trunated unsigned sat_sub.
> > >         * tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
> > >         New external decl from match.pd.
> > >         (tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
> > >         to reconcile the truncated sat_sub pattern.
> > >         (tree_if_cond_reconcile): New func impl to reconcile.
> > >         (pass_if_conversion::execute): Try to reconcile after ifcvt.
> > >
> > > Signed-off-by: Pan Li <pan2.li@intel.com>
> > > ---
> > >  gcc/match.pd        |  9 +++++
> > >  gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 92 insertions(+)
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 3d0689c9312..9617a5f9d5e 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -3210,6 +3210,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > >        && types_match (type, @0, @1))))
> > >
> > > +/* Unsigned saturation sub and then truncated, aka:
> > > +   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
> > > + */
> > > +(match (truncated_unsigned_integer_sat_sub @0 @1)
> > > + (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
> > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > > +      && types_match (@0, @1)
> > > +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
> > > +
> > >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> > >     x >  y  &&  x == XXX_MIN  -->  false . */
> > >  (for eqne (eq ne)
> > > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> > > index 57992b6deca..535743130f2 100644
> > > --- a/gcc/tree-if-conv.cc
> > > +++ b/gcc/tree-if-conv.cc
> > > @@ -3738,6 +3738,87 @@ bitfields_to_lower_p (class loop *loop,
> > >    return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
> > >  }
> > >
> > > +extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
> > > +                                                      tree (*)(tree));
> > > +
> > > +/*
> > > + * Try to reconcile the stmt pattern as below to math the SAT_SUB
> > > + * in vectorization.  If and only if the related internal_fn has
> > > + * been implemented already.
> > > + *
> > > + * The reconcile will insert one new stmt named 'a' in below example,
> > > + * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
> > > + * pattern is able to hit the SAT_SUB pattern in the underlying pass.
> > > + *
> > > + * 1. _2 = a_11 - b_12(D);
> > > + * 2. iftmp.0_13 = (short unsigned int) _2;
> > > + * 3. _18 = a_11 >= b_12(D);
> > > + * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > > + * ==>
> > > + * 1. _2 = a_11 - b_12(D);
> > > + * 3. _18 = a_11 >= b_12(D);
> > > + * a. pattmp = _18 ? _2 : 0;                     // New insertion
> > > + * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
> > > + * b. iftmp.0_5 = iftmp.0_13;
> > > + *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > > + */
> > > +static void
> > > +tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
> > > +                                                gassign *stmt)
> > > +{
> > > +  tree ops[2];
> > > +  tree lhs = gimple_assign_lhs (stmt);
> > > +  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
> > > +                                                    TREE_TYPE (lhs),
> > > +                                                    OPTIMIZE_FOR_BOTH);
> > > +
> > > +  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
> > > +    {
> > > +      tree cond = gimple_assign_rhs1 (stmt); // aka _18
> > > +      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
> > > +      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
> > > +      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
> > > +      tree raw_type = TREE_TYPE (minus);
> > > +      tree zero = build_zero_cst (raw_type);
> > > +      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
> > > +
> > > +      /* For stmt 'a' in above example  */
> > > +      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
> > > +      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
> > > +      update_stmt (stmt_a);
> > > +
> > > +      /* For stmt '2' in above example  */
> > > +      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
> > > +      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
> > > +      gimple_assign_set_rhs1 (stmt_2, tmp);
> > > +      update_stmt (stmt_2);
> > > +
> > > +      /* For stmt 'b' in above example  */
> > > +      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
> > > +      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
> > > +      update_stmt (stmt_b);
> > > +    }
> > > +}
> > > +
> > > +static void
> > > +tree_if_cond_reconcile (function *fun)
> > > +{
> > > +  basic_block bb;
> > > +  FOR_EACH_BB_FN (bb, fun)
> > > +    {
> > > +      gimple_stmt_iterator gsi;
> > > +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > > +       {
> > > +         gimple *stmt = gsi_stmt (gsi);
> > > +
> > > +         if (is_gimple_assign (stmt))
> > > +           {
> > > +             gassign *assign = dyn_cast <gassign *> (stmt);
> > > +             tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
> > > +           }
> > > +       }
> > > +    }
> > > +}
> > >
> > >  /* If-convert LOOP when it is legal.  For the moment this pass has no
> > >     profitability analysis.  Returns non-zero todo flags when something
> > > @@ -4063,6 +4144,8 @@ pass_if_conversion::execute (function *fun)
> > >         }
> > >      }
> > >
> > > +  tree_if_cond_reconcile (fun);
> > > +
> > >    return 0;
> > >  }
> > >
> > > --
> > > 2.34.1
> > >
Li, Pan2 June 23, 2024, 12:57 p.m. UTC | #6
> You need to refactor this to add to the stmts pattern def sequence
>  (look for append_pattern_def_seq uses for example)

Thanks Richard, really save my day, will have a try in v2.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Saturday, June 22, 2024 9:19 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB

On Fri, Jun 21, 2024 at 4:45 PM Li, Pan2 <pan2.li@intel.com> wrote:
>
> Thanks Richard for suggestion, tried the (convert? with below gimple stmt but got a miss def ice.
> To double confirm, the *type_out should be the vector type of lhs, and we only need to build
> one cvt stmt from itype to otype here. Or just return the call directly and set the type_out to the v_otype?
>
> static gimple *
> vect_recog_build_binary_gimple_stmt (vec_info *vinfo, gimple *stmt,
>                                      internal_fn fn, tree *type_out,
>                                      tree lhs, tree op_0, tree op_1)
> {
>   tree itype = TREE_TYPE (op_0);
>   tree otype = TREE_TYPE (lhs);
>   tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
>   tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
>
>   if (v_itype != NULL_TREE && v_otype != NULL_TREE
>     && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
>     {
>       gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
>       tree itype_ssa = vect_recog_temp_ssa_var (itype, NULL);
>
>       gimple_call_set_lhs (call, itype_ssa);
>       gimple_call_set_nothrow (call, /* nothrow_p */ false);
>       gimple_set_location (call, gimple_location (stmt));
>
>       *type_out = v_otype;
>       gimple *new_stmt = call;
>
>       if (itype != otype)
>         {
>           tree otype_ssa = vect_recog_temp_ssa_var (otype, NULL);
>           new_stmt = gimple_build_assign (otype_ssa, CONVERT_EXPR, itype_ssa);
>         }
>
>       return new_stmt;

You need to refactor this to add to the stmts pattern def sequence
(look for append_pattern_def_seq uses for example)

>     }
>
>   return NULL;
> }
>
> -----cut the ice---
>
> zip.test.c: In function ‘test’:
> zip.test.c:4:6: error: missing definition
>     4 | void test (uint16_t *x, unsigned b, unsigned n)
>       |      ^~~~
> for SSA_NAME: patt_40 in statement:
> vect_cst__151 = [vec_duplicate_expr] patt_40;
> during GIMPLE pass: vect
> dump file: zip.test.c.180t.vect
> zip.test.c:4:6: internal compiler error: verify_ssa failed
> 0x1de0860 verify_ssa(bool, bool)
>         /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/tree-ssa.cc:1203
> 0x1919f69 execute_function_todo
>         /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/passes.cc:2096
> 0x1918b46 do_per_function
>         /home/pli/gcc/555/riscv-gnu-toolchain/gcc/__RISCV_BUILD__/../gcc/passes.cc:1688
> 0x191a116 execute_todo
>
> Pan
>
>
> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Friday, June 21, 2024 5:29 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB
>
> On Fri, Jun 21, 2024 at 10:50 AM Li, Pan2 <pan2.li@intel.com> wrote:
> >
> > Thanks Richard for comments.
> >
> > > to match this by changing it to
> >
> > > /* Unsigned saturation sub, case 2 (branch with ge):
> > >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > > (match (unsigned_integer_sat_sub @0 @1)
> > > (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
> > >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > >       && types_match (type, @0, @1))))
> >
> > Do we need another name for this matching ? Add (convert? here may change the sematics of .SAT_SUB.
> > When we call gimple_unsigned_integer_sat_sub (lhs, ops, NULL), the converted value may be returned different
> > to the (minus @0 @1). Please correct me if my understanding is wrong.
>
> I think gimple_unsigned_integer_sat_sub (lhs, ...) simply matches
> (typeof LHS).SAT_SUB (ops[0], ops[1]) now, I don't think it's necessary to
> handle the case where typef LHS and typeof ops[0] are equal specially?
>
> > > and when using the gimple_match_* function make sure to consider
> > > that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> > > we matched?
> >
> > This may have problem for vector part I guess, require some additional change from vectorize_convert when
> > I try to do that in previous. Let me double check about it, and keep you posted.
>
> You are using gimple_unsigned_integer_sat_sub from pattern recognition, the
> thing to do is simply to add a conversion stmt to the pattern sequence in case
> the types differ?
>
> But maybe I'm missing something.
>
> Richard.
>
> > Pan
> >
> > -----Original Message-----
> > From: Richard Biener <richard.guenther@gmail.com>
> > Sent: Friday, June 21, 2024 3:00 PM
> > To: Li, Pan2 <pan2.li@intel.com>
> > Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com
> > Subject: Re: [PATCH v1] Ifcvt: Add cond tree reconcile for truncated .SAT_SUB
> >
> > On Fri, Jun 21, 2024 at 5:53 AM <pan2.li@intel.com> wrote:
> > >
> > > From: Pan Li <pan2.li@intel.com>
> > >
> > > The zip benchmark of coremark-pro have one SAT_SUB like pattern but
> > > truncated as below:
> > >
> > > void test (uint16_t *x, unsigned b, unsigned n)
> > > {
> > >   unsigned a = 0;
> > >   register uint16_t *p = x;
> > >
> > >   do {
> > >     a = *--p;
> > >     *p = (uint16_t)(a >= b ? a - b : 0); // Truncate the result of SAT_SUB
> > >   } while (--n);
> > > }
> > >
> > > It will have gimple after ifcvt pass,  it cannot hit any pattern of
> > > SAT_SUB and then cannot vectorize to SAT_SUB.
> > >
> > > _2 = a_11 - b_12(D);
> > > iftmp.0_13 = (short unsigned int) _2;
> > > _18 = a_11 >= b_12(D);
> > > iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > >
> > > This patch would like to do some reconcile for above pattern to match
> > > the SAT_SUB pattern.  Then the underlying vect pass is able to vectorize
> > > the SAT_SUB.
> >
> > Hmm.  I was thinking of allowing
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > (match (unsigned_integer_sat_sub @0 @1)
> >  (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
> >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >       && types_match (type, @0, @1))))
> >
> > to match this by changing it to
> >
> > /* Unsigned saturation sub, case 2 (branch with ge):
> >    SAT_U_SUB = X >= Y ? X - Y : 0.  */
> > (match (unsigned_integer_sat_sub @0 @1)
> >  (cond^ (ge @0 @1) (convert? (minus @0 @1)) integer_zerop)
> >  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> >       && types_match (type, @0, @1))))
> >
> > and when using the gimple_match_* function make sure to consider
> > that the .SAT_SUB (@0, @1) is converted to the type of the SSA name
> > we matched?
> >
> > Richard.
> >
> > > _2 = a_11 - b_12(D);
> > > _18 = a_11 >= b_12(D);
> > > _pattmp = _18 ? _2 : 0; // .SAT_SUB pattern
> > > iftmp.0_13 = (short unsigned int) _pattmp;
> > > iftmp.0_5 = iftmp.0_13;
> > >
> > > The below tests are running for this patch.
> > > 1. The rv64gcv fully regression tests.
> > > 2. The rv64gcv build with glibc.
> > > 3. The x86 bootstrap tests.
> > > 4. The x86 fully regression tests.
> > >
> > > gcc/ChangeLog:
> > >
> > >         * match.pd: Add new match for trunated unsigned sat_sub.
> > >         * tree-if-conv.cc (gimple_truncated_unsigned_integer_sat_sub):
> > >         New external decl from match.pd.
> > >         (tree_if_cond_reconcile_unsigned_integer_sat_sub): New func impl
> > >         to reconcile the truncated sat_sub pattern.
> > >         (tree_if_cond_reconcile): New func impl to reconcile.
> > >         (pass_if_conversion::execute): Try to reconcile after ifcvt.
> > >
> > > Signed-off-by: Pan Li <pan2.li@intel.com>
> > > ---
> > >  gcc/match.pd        |  9 +++++
> > >  gcc/tree-if-conv.cc | 83 +++++++++++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 92 insertions(+)
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 3d0689c9312..9617a5f9d5e 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -3210,6 +3210,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > >        && types_match (type, @0, @1))))
> > >
> > > +/* Unsigned saturation sub and then truncated, aka:
> > > +   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
> > > + */
> > > +(match (truncated_unsigned_integer_sat_sub @0 @1)
> > > + (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
> > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> > > +      && types_match (@0, @1)
> > > +      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
> > > +
> > >  /* x >  y  &&  x != XXX_MIN  -->  x > y
> > >     x >  y  &&  x == XXX_MIN  -->  false . */
> > >  (for eqne (eq ne)
> > > diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> > > index 57992b6deca..535743130f2 100644
> > > --- a/gcc/tree-if-conv.cc
> > > +++ b/gcc/tree-if-conv.cc
> > > @@ -3738,6 +3738,87 @@ bitfields_to_lower_p (class loop *loop,
> > >    return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
> > >  }
> > >
> > > +extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
> > > +                                                      tree (*)(tree));
> > > +
> > > +/*
> > > + * Try to reconcile the stmt pattern as below to math the SAT_SUB
> > > + * in vectorization.  If and only if the related internal_fn has
> > > + * been implemented already.
> > > + *
> > > + * The reconcile will insert one new stmt named 'a' in below example,
> > > + * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
> > > + * pattern is able to hit the SAT_SUB pattern in the underlying pass.
> > > + *
> > > + * 1. _2 = a_11 - b_12(D);
> > > + * 2. iftmp.0_13 = (short unsigned int) _2;
> > > + * 3. _18 = a_11 >= b_12(D);
> > > + * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > > + * ==>
> > > + * 1. _2 = a_11 - b_12(D);
> > > + * 3. _18 = a_11 >= b_12(D);
> > > + * a. pattmp = _18 ? _2 : 0;                     // New insertion
> > > + * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
> > > + * b. iftmp.0_5 = iftmp.0_13;
> > > + *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
> > > + */
> > > +static void
> > > +tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
> > > +                                                gassign *stmt)
> > > +{
> > > +  tree ops[2];
> > > +  tree lhs = gimple_assign_lhs (stmt);
> > > +  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
> > > +                                                    TREE_TYPE (lhs),
> > > +                                                    OPTIMIZE_FOR_BOTH);
> > > +
> > > +  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
> > > +    {
> > > +      tree cond = gimple_assign_rhs1 (stmt); // aka _18
> > > +      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
> > > +      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
> > > +      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
> > > +      tree raw_type = TREE_TYPE (minus);
> > > +      tree zero = build_zero_cst (raw_type);
> > > +      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
> > > +
> > > +      /* For stmt 'a' in above example  */
> > > +      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
> > > +      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
> > > +      update_stmt (stmt_a);
> > > +
> > > +      /* For stmt '2' in above example  */
> > > +      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
> > > +      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
> > > +      gimple_assign_set_rhs1 (stmt_2, tmp);
> > > +      update_stmt (stmt_2);
> > > +
> > > +      /* For stmt 'b' in above example  */
> > > +      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
> > > +      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
> > > +      update_stmt (stmt_b);
> > > +    }
> > > +}
> > > +
> > > +static void
> > > +tree_if_cond_reconcile (function *fun)
> > > +{
> > > +  basic_block bb;
> > > +  FOR_EACH_BB_FN (bb, fun)
> > > +    {
> > > +      gimple_stmt_iterator gsi;
> > > +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > > +       {
> > > +         gimple *stmt = gsi_stmt (gsi);
> > > +
> > > +         if (is_gimple_assign (stmt))
> > > +           {
> > > +             gassign *assign = dyn_cast <gassign *> (stmt);
> > > +             tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
> > > +           }
> > > +       }
> > > +    }
> > > +}
> > >
> > >  /* If-convert LOOP when it is legal.  For the moment this pass has no
> > >     profitability analysis.  Returns non-zero todo flags when something
> > > @@ -4063,6 +4144,8 @@ pass_if_conversion::execute (function *fun)
> > >         }
> > >      }
> > >
> > > +  tree_if_cond_reconcile (fun);
> > > +
> > >    return 0;
> > >  }
> > >
> > > --
> > > 2.34.1
> > >
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 3d0689c9312..9617a5f9d5e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3210,6 +3210,15 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* Unsigned saturation sub and then truncated, aka:
+   Truncated = X >= Y ? (Other Type) (X - Y) : 0.
+ */
+(match (truncated_unsigned_integer_sat_sub @0 @1)
+ (cond (ge @0 @1) (convert (minus @0 @1)) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (@0, @1)
+      && tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))))))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 57992b6deca..535743130f2 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -3738,6 +3738,87 @@  bitfields_to_lower_p (class loop *loop,
   return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
 }
 
+extern bool gimple_truncated_unsigned_integer_sat_sub (tree, tree*,
+						       tree (*)(tree));
+
+/*
+ * Try to reconcile the stmt pattern as below to math the SAT_SUB
+ * in vectorization.  If and only if the related internal_fn has
+ * been implemented already.
+ *
+ * The reconcile will insert one new stmt named 'a' in below example,
+ * replace the stmt '4' by new added stmt 'b' as well.  Then the stmt
+ * pattern is able to hit the SAT_SUB pattern in the underlying pass.
+ *
+ * 1. _2 = a_11 - b_12(D);
+ * 2. iftmp.0_13 = (short unsigned int) _2;
+ * 3. _18 = a_11 >= b_12(D);
+ * 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
+ * ==>
+ * 1. _2 = a_11 - b_12(D);
+ * 3. _18 = a_11 >= b_12(D);
+ * a. pattmp = _18 ? _2 : 0;                     // New insertion
+ * 2. iftmp.0_13 = (short unsigned int) _pattmp; // Move before
+ * b. iftmp.0_5 = iftmp.0_13;
+ *    == Replace ==> 4. iftmp.0_5 = _18 ? iftmp.0_13 : 0;
+ */
+static void
+tree_if_cond_reconcile_unsigned_integer_sat_sub (gimple_stmt_iterator *gsi,
+						 gassign *stmt)
+{
+  tree ops[2];
+  tree lhs = gimple_assign_lhs (stmt);
+  bool supported_p = direct_internal_fn_supported_p (IFN_SAT_SUB,
+						     TREE_TYPE (lhs),
+						     OPTIMIZE_FOR_BOTH);
+
+  if (supported_p && gimple_truncated_unsigned_integer_sat_sub (lhs, ops, NULL))
+    {
+      tree cond = gimple_assign_rhs1 (stmt); // aka _18
+      tree truncated = gimple_assign_rhs2 (stmt); // aka iftmp.0_13
+      gimple *stmt_2 = SSA_NAME_DEF_STMT (truncated);
+      tree minus = gimple_assign_rhs1 (stmt_2); // aka _2
+      tree raw_type = TREE_TYPE (minus);
+      tree zero = build_zero_cst (raw_type);
+      tree tmp = make_temp_ssa_name (raw_type, NULL, "sat_sub_tmp");
+
+      /* For stmt 'a' in above example  */
+      gimple *stmt_a = gimple_build_assign (tmp, COND_EXPR, cond, minus, zero);
+      gsi_insert_before (gsi, stmt_a, GSI_SAME_STMT);
+      update_stmt (stmt_a);
+
+      /* For stmt '2' in above example  */
+      gimple_stmt_iterator stmt_2_gsi = gsi_for_stmt (stmt_2);
+      gsi_move_before (&stmt_2_gsi, gsi, GSI_SAME_STMT);
+      gimple_assign_set_rhs1 (stmt_2, tmp);
+      update_stmt (stmt_2);
+
+      /* For stmt 'b' in above example  */
+      gimple *stmt_b = gimple_build_assign (lhs, NOP_EXPR, truncated);
+      gsi_replace (gsi, stmt_b, /* update_eh_info */ true);
+      update_stmt (stmt_b);
+    }
+}
+
+static void
+tree_if_cond_reconcile (function *fun)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+
+	  if (is_gimple_assign (stmt))
+	    {
+	      gassign *assign = dyn_cast <gassign *> (stmt);
+	      tree_if_cond_reconcile_unsigned_integer_sat_sub (&gsi, assign);
+	    }
+	}
+    }
+}
 
 /* If-convert LOOP when it is legal.  For the moment this pass has no
    profitability analysis.  Returns non-zero todo flags when something
@@ -4063,6 +4144,8 @@  pass_if_conversion::execute (function *fun)
 	}
     }
 
+  tree_if_cond_reconcile (fun);
+
   return 0;
 }