diff mbox series

[v1] Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863]

Message ID 20240718122648.1606100-1-pan2.li@intel.com
State New
Headers show
Series [v1] Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863] | expand

Commit Message

Li, Pan2 July 18, 2024, 12:26 p.m. UTC
From: Pan Li <pan2.li@intel.com>

The SAT_TRUNC form 2 has below pattern matching.
From:
  _18 = MIN_EXPR <left_8, 4294967295>;
  iftmp.0_11 = (unsigned int) _18;

To:
  _18 = MIN_EXPR <left_8, 4294967295>;
  iftmp.0_11 = .SAT_TRUNC (_18);

But if there is another use of _18 like below,  the transform to the
.SAT_TRUNC may have no earnings.  For example:

From:
  _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
  iftmp.0_11 = (unsigned int) _18;     // op_0
  stream.avail_out = iftmp.0_11;
  left_37 = left_8 - _18;              // op_0 use

To:
  _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
  iftmp.0_11 = .SAT_TRUNC (_18);
  stream.avail_out = iftmp.0_11;
  left_37 = left_8 - _18;              // op_0 use

Pattern recog to .SAT_TRUNC cannot eliminate MIN_EXPR as above.  Then the
backend (for example x86/riscv) will have additional 2-3 more insns
after pattern recog besides the MIN_EXPR.  Thus,  keep the normal truncation
as is should be the better choose.

The below testsuites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

	PR target/115863

gcc/ChangeLog:

	* match.pd: Add single_use of MIN_EXPR for .SAT_TRUNC form 2.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr115863-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/match.pd                               | 15 +++++++--
 gcc/testsuite/gcc.target/i386/pr115863-1.c | 37 ++++++++++++++++++++++
 2 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr115863-1.c

Comments

Tamar Christina July 18, 2024, 12:35 p.m. UTC | #1
> -----Original Message-----
> From: pan2.li@intel.com <pan2.li@intel.com>
> Sent: Thursday, July 18, 2024 1:27 PM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com;
> Tamar Christina <Tamar.Christina@arm.com>; jeffreyalaw@gmail.com;
> rdapp.gcc@gmail.com; hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> Subject: [PATCH v1] Match: Only allow single use of MIN_EXPR for SAT_TRUNC
> form 2 [PR115863]
> 
> From: Pan Li <pan2.li@intel.com>
> 
> The SAT_TRUNC form 2 has below pattern matching.
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = (unsigned int) _18;
> 
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = .SAT_TRUNC (_18);
> 
> But if there is another use of _18 like below,  the transform to the
> .SAT_TRUNC may have no earnings.  For example:
> 
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = (unsigned int) _18;     // op_0
>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
> 
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = .SAT_TRUNC (_18);
>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
> 
> Pattern recog to .SAT_TRUNC cannot eliminate MIN_EXPR as above.  Then the
> backend (for example x86/riscv) will have additional 2-3 more insns
> after pattern recog besides the MIN_EXPR.  Thus,  keep the normal truncation
> as is should be the better choose.
> 
> The below testsuites are passed for this patch:
> 1. The rv64gcv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.
> 
> 	PR target/115863
> 
> gcc/ChangeLog:
> 
> 	* match.pd: Add single_use of MIN_EXPR for .SAT_TRUNC form 2.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/i386/pr115863-1.c: New test.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd                               | 15 +++++++--
>  gcc/testsuite/gcc.target/i386/pr115863-1.c | 37 ++++++++++++++++++++++
>  2 files changed, 50 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr115863-1.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cb399b8718..d4f040b5c7b 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3252,10 +3252,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> 
>  /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
>     SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
> +/* If Op_0 def is MIN_EXPR and not single_use.  Aka below pattern:
> +
> +     _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
> +     iftmp.0_11 = (unsigned int) _18;     // op_0
> +     stream.avail_out = iftmp.0_11;
> +     left_37 = left_8 - _18;              // op_0 use
> +
> +   Transfer to .SAT_TRUNC will have MIN_EXPR still live.  Then the backend
> +   (for example x86/riscv) will have 2-3 more insns generation for .SAT_TRUNC
> +   besides the MIN_EXPR.  Thus,  keep the normal truncation as is should be
> +   the better choose.  */
>  (match (unsigned_integer_sat_trunc @0)
> - (convert (min @0 INTEGER_CST@1))
> + (convert (min@2 @0 INTEGER_CST@1))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> -      && TYPE_UNSIGNED (TREE_TYPE (@0)))
> +      && TYPE_UNSIGNED (TREE_TYPE (@0)) && single_use (@2))

You can probably use the single use flag here? so

> - (convert (min @0 INTEGER_CST@1))
> + (convert (min:s @0 @0 INTEGER_CST@1))

?

Cheers,
Tamar

>   (with
>    {
>     unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
> diff --git a/gcc/testsuite/gcc.target/i386/pr115863-1.c
> b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> new file mode 100644
> index 00000000000..a672f62cec5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> @@ -0,0 +1,37 @@
> +/* PR target/115863 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-rtl-expand-details" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef struct z_stream_s {
> +    uint32_t     avail_out;
> +} z_stream;
> +
> +typedef z_stream *z_streamp;
> +
> +extern int deflate (z_streamp strmp);
> +
> +int compress2 (uint64_t *destLen)
> +{
> +  z_stream stream;
> +  int err;
> +  const uint32_t max = (uint32_t)(-1);
> +  uint64_t left;
> +
> +  left = *destLen;
> +
> +  stream.avail_out = 0;
> +
> +  do {
> +        if (stream.avail_out == 0) {
> +            stream.avail_out = left > (uint64_t)max ? max : (uint32_t)left;
> +            left -= stream.avail_out;
> +        }
> +        err = deflate(&stream);
> +    } while (err == 0);
> +
> +  return err;
> +}
> +
> +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
> --
> 2.34.1
Richard Biener July 18, 2024, 1:26 p.m. UTC | #2
On Thu, Jul 18, 2024 at 2:27 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> The SAT_TRUNC form 2 has below pattern matching.
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = (unsigned int) _18;
>
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = .SAT_TRUNC (_18);

.SAT_TRUNC (left_8);

> But if there is another use of _18 like below,  the transform to the
> .SAT_TRUNC may have no earnings.  For example:
>
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = (unsigned int) _18;     // op_0
>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
>
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = .SAT_TRUNC (_18);

.SAT_TRUNC (left_8);?

Otherwise the patch looks good to me.

Thanks,
Richard.

>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
>
> Pattern recog to .SAT_TRUNC cannot eliminate MIN_EXPR as above.  Then the
> backend (for example x86/riscv) will have additional 2-3 more insns
> after pattern recog besides the MIN_EXPR.  Thus,  keep the normal truncation
> as is should be the better choose.
>
> The below testsuites are passed for this patch:
> 1. The rv64gcv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.
>
>         PR target/115863
>
> gcc/ChangeLog:
>
>         * match.pd: Add single_use of MIN_EXPR for .SAT_TRUNC form 2.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr115863-1.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd                               | 15 +++++++--
>  gcc/testsuite/gcc.target/i386/pr115863-1.c | 37 ++++++++++++++++++++++
>  2 files changed, 50 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr115863-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cb399b8718..d4f040b5c7b 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3252,10 +3252,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>
>  /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
>     SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
> +/* If Op_0 def is MIN_EXPR and not single_use.  Aka below pattern:
> +
> +     _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
> +     iftmp.0_11 = (unsigned int) _18;     // op_0
> +     stream.avail_out = iftmp.0_11;
> +     left_37 = left_8 - _18;              // op_0 use
> +
> +   Transfer to .SAT_TRUNC will have MIN_EXPR still live.  Then the backend
> +   (for example x86/riscv) will have 2-3 more insns generation for .SAT_TRUNC
> +   besides the MIN_EXPR.  Thus,  keep the normal truncation as is should be
> +   the better choose.  */
>  (match (unsigned_integer_sat_trunc @0)
> - (convert (min @0 INTEGER_CST@1))
> + (convert (min@2 @0 INTEGER_CST@1))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> -      && TYPE_UNSIGNED (TREE_TYPE (@0)))
> +      && TYPE_UNSIGNED (TREE_TYPE (@0)) && single_use (@2))
>   (with
>    {
>     unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
> diff --git a/gcc/testsuite/gcc.target/i386/pr115863-1.c b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> new file mode 100644
> index 00000000000..a672f62cec5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> @@ -0,0 +1,37 @@
> +/* PR target/115863 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-rtl-expand-details" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef struct z_stream_s {
> +    uint32_t     avail_out;
> +} z_stream;
> +
> +typedef z_stream *z_streamp;
> +
> +extern int deflate (z_streamp strmp);
> +
> +int compress2 (uint64_t *destLen)
> +{
> +  z_stream stream;
> +  int err;
> +  const uint32_t max = (uint32_t)(-1);
> +  uint64_t left;
> +
> +  left = *destLen;
> +
> +  stream.avail_out = 0;
> +
> +  do {
> +        if (stream.avail_out == 0) {
> +            stream.avail_out = left > (uint64_t)max ? max : (uint32_t)left;
> +            left -= stream.avail_out;
> +        }
> +        err = deflate(&stream);
> +    } while (err == 0);
> +
> +  return err;
> +}
> +
> +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
> --
> 2.34.1
>
Li, Pan2 July 19, 2024, 12:34 a.m. UTC | #3
Thanks Tamar for comments.

The :s flag is somehow ignored in matching according the gccint doc.

"The second supported flag is s which tells the code generator to fail the pattern if the
expression marked with s does have more than one use and the simplification results in an
expression with more than one operator."

I also diff the generated code in gimple_unsigned_integer_sat_trunc, it doesn't have single use when :s flag.

&& TYPE_UNSIGNED (TREE_TYPE (captures[0]))                                                   // the :s flag
&& TYPE_UNSIGNED (TREE_TYPE (captures[0])) && single_use (captures[1]) // explicit single_use check.

Pan

-----Original Message-----
From: Tamar Christina <Tamar.Christina@arm.com> 
Sent: Thursday, July 18, 2024 8:36 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: RE: [PATCH v1] Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863]

> -----Original Message-----
> From: pan2.li@intel.com <pan2.li@intel.com>
> Sent: Thursday, July 18, 2024 1:27 PM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@gmail.com; richard.guenther@gmail.com;
> Tamar Christina <Tamar.Christina@arm.com>; jeffreyalaw@gmail.com;
> rdapp.gcc@gmail.com; hongtao.liu@intel.com; Pan Li <pan2.li@intel.com>
> Subject: [PATCH v1] Match: Only allow single use of MIN_EXPR for SAT_TRUNC
> form 2 [PR115863]
> 
> From: Pan Li <pan2.li@intel.com>
> 
> The SAT_TRUNC form 2 has below pattern matching.
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = (unsigned int) _18;
> 
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = .SAT_TRUNC (_18);
> 
> But if there is another use of _18 like below,  the transform to the
> .SAT_TRUNC may have no earnings.  For example:
> 
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = (unsigned int) _18;     // op_0
>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
> 
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = .SAT_TRUNC (_18);
>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
> 
> Pattern recog to .SAT_TRUNC cannot eliminate MIN_EXPR as above.  Then the
> backend (for example x86/riscv) will have additional 2-3 more insns
> after pattern recog besides the MIN_EXPR.  Thus,  keep the normal truncation
> as is should be the better choose.
> 
> The below testsuites are passed for this patch:
> 1. The rv64gcv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.
> 
> 	PR target/115863
> 
> gcc/ChangeLog:
> 
> 	* match.pd: Add single_use of MIN_EXPR for .SAT_TRUNC form 2.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/i386/pr115863-1.c: New test.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd                               | 15 +++++++--
>  gcc/testsuite/gcc.target/i386/pr115863-1.c | 37 ++++++++++++++++++++++
>  2 files changed, 50 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr115863-1.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cb399b8718..d4f040b5c7b 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3252,10 +3252,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> 
>  /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
>     SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
> +/* If Op_0 def is MIN_EXPR and not single_use.  Aka below pattern:
> +
> +     _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
> +     iftmp.0_11 = (unsigned int) _18;     // op_0
> +     stream.avail_out = iftmp.0_11;
> +     left_37 = left_8 - _18;              // op_0 use
> +
> +   Transfer to .SAT_TRUNC will have MIN_EXPR still live.  Then the backend
> +   (for example x86/riscv) will have 2-3 more insns generation for .SAT_TRUNC
> +   besides the MIN_EXPR.  Thus,  keep the normal truncation as is should be
> +   the better choose.  */
>  (match (unsigned_integer_sat_trunc @0)
> - (convert (min @0 INTEGER_CST@1))
> + (convert (min@2 @0 INTEGER_CST@1))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> -      && TYPE_UNSIGNED (TREE_TYPE (@0)))
> +      && TYPE_UNSIGNED (TREE_TYPE (@0)) && single_use (@2))

You can probably use the single use flag here? so

> - (convert (min @0 INTEGER_CST@1))
> + (convert (min:s @0 @0 INTEGER_CST@1))

?

Cheers,
Tamar

>   (with
>    {
>     unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
> diff --git a/gcc/testsuite/gcc.target/i386/pr115863-1.c
> b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> new file mode 100644
> index 00000000000..a672f62cec5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> @@ -0,0 +1,37 @@
> +/* PR target/115863 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-rtl-expand-details" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef struct z_stream_s {
> +    uint32_t     avail_out;
> +} z_stream;
> +
> +typedef z_stream *z_streamp;
> +
> +extern int deflate (z_streamp strmp);
> +
> +int compress2 (uint64_t *destLen)
> +{
> +  z_stream stream;
> +  int err;
> +  const uint32_t max = (uint32_t)(-1);
> +  uint64_t left;
> +
> +  left = *destLen;
> +
> +  stream.avail_out = 0;
> +
> +  do {
> +        if (stream.avail_out == 0) {
> +            stream.avail_out = left > (uint64_t)max ? max : (uint32_t)left;
> +            left -= stream.avail_out;
> +        }
> +        err = deflate(&stream);
> +    } while (err == 0);
> +
> +  return err;
> +}
> +
> +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
> --
> 2.34.1
Li, Pan2 July 19, 2024, 12:34 a.m. UTC | #4
> Otherwise the patch looks good to me.

Thanks Richard, will commit with the log updated.

Pan

-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com> 
Sent: Thursday, July 18, 2024 9:27 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; tamar.christina@arm.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Liu, Hongtao <hongtao.liu@intel.com>
Subject: Re: [PATCH v1] Match: Only allow single use of MIN_EXPR for SAT_TRUNC form 2 [PR115863]

On Thu, Jul 18, 2024 at 2:27 PM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> The SAT_TRUNC form 2 has below pattern matching.
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = (unsigned int) _18;
>
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>;
>   iftmp.0_11 = .SAT_TRUNC (_18);

.SAT_TRUNC (left_8);

> But if there is another use of _18 like below,  the transform to the
> .SAT_TRUNC may have no earnings.  For example:
>
> From:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = (unsigned int) _18;     // op_0
>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
>
> To:
>   _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
>   iftmp.0_11 = .SAT_TRUNC (_18);

.SAT_TRUNC (left_8);?

Otherwise the patch looks good to me.

Thanks,
Richard.

>   stream.avail_out = iftmp.0_11;
>   left_37 = left_8 - _18;              // op_0 use
>
> Pattern recog to .SAT_TRUNC cannot eliminate MIN_EXPR as above.  Then the
> backend (for example x86/riscv) will have additional 2-3 more insns
> after pattern recog besides the MIN_EXPR.  Thus,  keep the normal truncation
> as is should be the better choose.
>
> The below testsuites are passed for this patch:
> 1. The rv64gcv fully regression tests.
> 2. The x86 bootstrap tests.
> 3. The x86 fully regression tests.
>
>         PR target/115863
>
> gcc/ChangeLog:
>
>         * match.pd: Add single_use of MIN_EXPR for .SAT_TRUNC form 2.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr115863-1.c: New test.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/match.pd                               | 15 +++++++--
>  gcc/testsuite/gcc.target/i386/pr115863-1.c | 37 ++++++++++++++++++++++
>  2 files changed, 50 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr115863-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5cb399b8718..d4f040b5c7b 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3252,10 +3252,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>
>  /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
>     SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
> +/* If Op_0 def is MIN_EXPR and not single_use.  Aka below pattern:
> +
> +     _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
> +     iftmp.0_11 = (unsigned int) _18;     // op_0
> +     stream.avail_out = iftmp.0_11;
> +     left_37 = left_8 - _18;              // op_0 use
> +
> +   Transfer to .SAT_TRUNC will have MIN_EXPR still live.  Then the backend
> +   (for example x86/riscv) will have 2-3 more insns generation for .SAT_TRUNC
> +   besides the MIN_EXPR.  Thus,  keep the normal truncation as is should be
> +   the better choose.  */
>  (match (unsigned_integer_sat_trunc @0)
> - (convert (min @0 INTEGER_CST@1))
> + (convert (min@2 @0 INTEGER_CST@1))
>   (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
> -      && TYPE_UNSIGNED (TREE_TYPE (@0)))
> +      && TYPE_UNSIGNED (TREE_TYPE (@0)) && single_use (@2))
>   (with
>    {
>     unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
> diff --git a/gcc/testsuite/gcc.target/i386/pr115863-1.c b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> new file mode 100644
> index 00000000000..a672f62cec5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr115863-1.c
> @@ -0,0 +1,37 @@
> +/* PR target/115863 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-rtl-expand-details" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef struct z_stream_s {
> +    uint32_t     avail_out;
> +} z_stream;
> +
> +typedef z_stream *z_streamp;
> +
> +extern int deflate (z_streamp strmp);
> +
> +int compress2 (uint64_t *destLen)
> +{
> +  z_stream stream;
> +  int err;
> +  const uint32_t max = (uint32_t)(-1);
> +  uint64_t left;
> +
> +  left = *destLen;
> +
> +  stream.avail_out = 0;
> +
> +  do {
> +        if (stream.avail_out == 0) {
> +            stream.avail_out = left > (uint64_t)max ? max : (uint32_t)left;
> +            left -= stream.avail_out;
> +        }
> +        err = deflate(&stream);
> +    } while (err == 0);
> +
> +  return err;
> +}
> +
> +/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */
> --
> 2.34.1
>
diff mbox series

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 5cb399b8718..d4f040b5c7b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3252,10 +3252,21 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Unsigned saturation truncate, case 2, sizeof (WT) > sizeof (NT).
    SAT_U_TRUNC = (NT)(MIN_EXPR (X, 255)).  */
+/* If Op_0 def is MIN_EXPR and not single_use.  Aka below pattern:
+
+     _18 = MIN_EXPR <left_8, 4294967295>; // op_0 def
+     iftmp.0_11 = (unsigned int) _18;     // op_0
+     stream.avail_out = iftmp.0_11;
+     left_37 = left_8 - _18;              // op_0 use
+
+   Transfer to .SAT_TRUNC will have MIN_EXPR still live.  Then the backend
+   (for example x86/riscv) will have 2-3 more insns generation for .SAT_TRUNC
+   besides the MIN_EXPR.  Thus,  keep the normal truncation as is should be
+   the better choose.  */
 (match (unsigned_integer_sat_trunc @0)
- (convert (min @0 INTEGER_CST@1))
+ (convert (min@2 @0 INTEGER_CST@1))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
-      && TYPE_UNSIGNED (TREE_TYPE (@0)))
+      && TYPE_UNSIGNED (TREE_TYPE (@0)) && single_use (@2))
  (with
   {
    unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0));
diff --git a/gcc/testsuite/gcc.target/i386/pr115863-1.c b/gcc/testsuite/gcc.target/i386/pr115863-1.c
new file mode 100644
index 00000000000..a672f62cec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115863-1.c
@@ -0,0 +1,37 @@ 
+/* PR target/115863 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-rtl-expand-details" } */
+
+#include <stdint-gcc.h>
+
+typedef struct z_stream_s {
+    uint32_t     avail_out;
+} z_stream;
+
+typedef z_stream *z_streamp;
+
+extern int deflate (z_streamp strmp);
+
+int compress2 (uint64_t *destLen)
+{
+  z_stream stream;
+  int err;
+  const uint32_t max = (uint32_t)(-1);
+  uint64_t left;
+
+  left = *destLen;
+
+  stream.avail_out = 0;
+
+  do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uint64_t)max ? max : (uint32_t)left;
+            left -= stream.avail_out;
+        }
+        err = deflate(&stream);
+    } while (err == 0);
+
+  return err;
+}
+
+/* { dg-final { scan-rtl-dump-not ".SAT_TRUNC " "expand" } } */