Message ID | 20241029082521.3638409-1-pan2.li@intel.com |
---|---|
State | New |
Headers | show |
Series | [1/5] Match: Simplify branch form 4 of unsigned SAT_ADD into branchless | expand |
On Tue, Oct 29, 2024 at 9:27 AM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > There are sorts of forms for the unsigned SAT_ADD. Some of them are > complicated while others are cheap. This patch would like to simplify > the complicated form into the cheap ones. For example as below: > > From the form 4 (branch): > SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > > To (branchless): > SAT_U_ADD = (X + Y) | - ((X + Y) < X). > > #define T uint8_t > > T sat_add_u_1 (T x, T y) > { > return (T)(x + y) < x ? -1 : (x + y); > } > > Before this patch: > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 2 │ { > 3 │ uint8_t D.2809; > 4 │ > 5 │ _1 = x + y; > 6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>; > 7 │ <D.2810>: > 8 │ D.2809 = x + y; > 9 │ goto <D.2812>; > 10 │ <D.2811>: > 11 │ D.2809 = 255; > 12 │ <D.2812>: > 13 │ return D.2809; > 14 │ } > > After this patch: > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 2 │ { > 3 │ uint8_t D.2809; > 4 │ > 5 │ _1 = x + y; > 6 │ _2 = x + y; > 7 │ _3 = x > _2; > 8 │ _4 = (unsigned char) _3; > 9 │ _5 = -_4; > 10 │ D.2809 = _1 | _5; > 11 │ return D.2809; > 12 │ } > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > * The x86 bootstrap test. > * The x86 fully regression test. > > gcc/ChangeLog: > > * match.pd: Remove unsigned branch form 4 for SAT_ADD, and > add simplify to branchless instead. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c: New test. You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ I wonder if the simplification is already applied by the frontend and thus .original shows the simplified form or only .gimple? Did you check the simplification applies when writing as if (x + y < x) return -1; else return x + y; ? If it doesn't then removing the match is likely premature. I think phiopt should be able to perform the matching here (it does the COND_EXPR building). Thanks, Richard. > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/match.pd | 11 +++++++---- > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c | 15 +++++++++++++++ > 5 files changed, 67 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 809c717bc86..4d1143b6ec3 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3154,10 +3154,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && types_match (type, @0, @1)) > (bit_ior @2 (negate (convert (lt @2 @0)))))) > > -/* Unsigned saturation add, case 4 (branch with lt): > - SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ > -(match (unsigned_integer_sat_add @0 @1) > - (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) > +/* Simplify SAT_U_ADD to the cheap form > + From: SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > + To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ > +(simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)) > + (bit_ior @2 (negate (convert (lt @2 @0)))))) > > /* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): > SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > new file mode 100644 > index 00000000000..6e327f58d46 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint16_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > new file mode 100644 > index 00000000000..17b09396b69 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint32_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > new file mode 100644 > index 00000000000..8db9b427a65 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint64_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > new file mode 100644 > index 00000000000..4e86ef9a5fd > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint8_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > -- > 2.43.0 >
Thanks Richard for comments. > You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ Sure, will move all test files to there. > I wonder if the simplification is already applied by the frontend and thus > .original shows the simplified form or only .gimple? Yes, you are right, the .original shows the simplified form. Take below code as example: 6 │ #define T uint8_t 7 │ 8 │ T sat_add_u_1 (T x, T y) 9 │ { 10 │ return (T)(x + y) < x ? -1 : (x + y); 11 │ } We have .original similar as below: 6 │ { 7 │ return (unsigned char) x + (unsigned char) y | -(uint8_t) ((unsigned char) x + (unsigned char) y < (unsigned char) x); 8 │ } > Did you check the simplification applies when writing as > > if (x + y < x) > return -1; > else > return x + y; > ? If it doesn't then removing the match is likely premature. I think phiopt > should be able to perform the matching here (it does the COND_EXPR > building). The form above doesn't hit the simplification after .gimple but the .phiopt2 performs the simplify and then we have below gimple after .phiopt2. 10 │ COND_EXPR in block 2 and PHI in block 4 converted to straightline code. 11 │ Merging blocks 2 and 4 12 │ fix_loop_structure: fixing up loops for function 13 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) 14 │ { 15 │ unsigned char _1; 16 │ _Bool _6; 17 │ unsigned char _7; 18 │ unsigned char _8; 19 │ unsigned char _9; 20 │ 21 │ <bb 2> [local count: 1073741824]: 22 │ _1 = x_3(D) + y_4(D); 23 │ _6 = _1 < x_3(D); 24 │ _7 = (unsigned char) _6; 25 │ _8 = -_7; 26 │ _9 = _1 | _8; 27 │ return _9; 28 │ 29 │ } BTW, given sorts of forms will be simplified to the "cheap" one, do we have some best practice to avoid the "cheap" form duplication? Currently for each simplification we have one copy of the "cheap" form. The "switch" in match.pd looks like not applicable here. I also have a try similar as below but it will generate invalid tree for some cases. (simplify (cond (gt @0 (plus:c@2 @0 @1)) integer_minus_onep @2) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)) - (bit_ior @2 (negate (convert (lt @2 @0)))))) + { build_cheap_unsigned_int_sat_add (@0, @1, type); })) +tree +build_cheap_unsigned_int_sat_add (tree op_0, tree op_1, tree type) +{ + /* (bit_ior @2 (negate (convert (lt @2 @0)))))) */ + return build2 (BIT_IOR_EXPR, type, + build2 (PLUS_EXPR, type, op_0, op_1), + build1 (NEGATE_EXPR, type, + build1 (NOP_EXPR, type, + build2 (LT_EXPR, boolean_type_node, + build2 (PLUS_EXPR, type, op_0, op_1), + op_0)))); +} Pan -----Original Message----- From: Richard Biener <richard.guenther@gmail.com> Sent: Tuesday, October 29, 2024 9:06 PM To: Li, Pan2 <pan2.li@intel.com> Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Andrew Pinski <quic_apinski@quicinc.com> Subject: Re: [PATCH 1/5] Match: Simplify branch form 4 of unsigned SAT_ADD into branchless On Tue, Oct 29, 2024 at 9:27 AM <pan2.li@intel.com> wrote: > > From: Pan Li <pan2.li@intel.com> > > There are sorts of forms for the unsigned SAT_ADD. Some of them are > complicated while others are cheap. This patch would like to simplify > the complicated form into the cheap ones. For example as below: > > From the form 4 (branch): > SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > > To (branchless): > SAT_U_ADD = (X + Y) | - ((X + Y) < X). > > #define T uint8_t > > T sat_add_u_1 (T x, T y) > { > return (T)(x + y) < x ? -1 : (x + y); > } > > Before this patch: > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 2 │ { > 3 │ uint8_t D.2809; > 4 │ > 5 │ _1 = x + y; > 6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>; > 7 │ <D.2810>: > 8 │ D.2809 = x + y; > 9 │ goto <D.2812>; > 10 │ <D.2811>: > 11 │ D.2809 = 255; > 12 │ <D.2812>: > 13 │ return D.2809; > 14 │ } > > After this patch: > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 2 │ { > 3 │ uint8_t D.2809; > 4 │ > 5 │ _1 = x + y; > 6 │ _2 = x + y; > 7 │ _3 = x > _2; > 8 │ _4 = (unsigned char) _3; > 9 │ _5 = -_4; > 10 │ D.2809 = _1 | _5; > 11 │ return D.2809; > 12 │ } > > The below test suites are passed for this patch. > * The rv64gcv fully regression test. > * The x86 bootstrap test. > * The x86 fully regression test. > > gcc/ChangeLog: > > * match.pd: Remove unsigned branch form 4 for SAT_ADD, and > add simplify to branchless instead. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c: New test. > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c: New test. You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ I wonder if the simplification is already applied by the frontend and thus .original shows the simplified form or only .gimple? Did you check the simplification applies when writing as if (x + y < x) return -1; else return x + y; ? If it doesn't then removing the match is likely premature. I think phiopt should be able to perform the matching here (it does the COND_EXPR building). Thanks, Richard. > Signed-off-by: Pan Li <pan2.li@intel.com> > --- > gcc/match.pd | 11 +++++++---- > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c | 15 +++++++++++++++ > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c | 15 +++++++++++++++ > 5 files changed, 67 insertions(+), 4 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 809c717bc86..4d1143b6ec3 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3154,10 +3154,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > && types_match (type, @0, @1)) > (bit_ior @2 (negate (convert (lt @2 @0)))))) > > -/* Unsigned saturation add, case 4 (branch with lt): > - SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ > -(match (unsigned_integer_sat_add @0 @1) > - (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) > +/* Simplify SAT_U_ADD to the cheap form > + From: SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > + To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ > +(simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2) > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > + && types_match (type, @0, @1)) > + (bit_ior @2 (negate (convert (lt @2 @0)))))) > > /* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): > SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > new file mode 100644 > index 00000000000..6e327f58d46 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint16_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > new file mode 100644 > index 00000000000..17b09396b69 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint32_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > new file mode 100644 > index 00000000000..8db9b427a65 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint64_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > new file mode 100644 > index 00000000000..4e86ef9a5fd > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > + > +#include <stdint.h> > + > +#define T uint8_t > + > +T sat_add_u_1 (T x, T y) > +{ > + return (T)(x + y) < x ? -1 : (x + y); > +} > + > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > -- > 2.43.0 >
On Wed, Oct 30, 2024 at 3:09 AM Li, Pan2 <pan2.li@intel.com> wrote: > > Thanks Richard for comments. > > > You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ > > Sure, will move all test files to there. > > > I wonder if the simplification is already applied by the frontend and thus > > .original shows the simplified form or only .gimple? > > Yes, you are right, the .original shows the simplified form. Take below code as example: > > 6 │ #define T uint8_t > 7 │ > 8 │ T sat_add_u_1 (T x, T y) > 9 │ { > 10 │ return (T)(x + y) < x ? -1 : (x + y); > 11 │ } > > We have .original similar as below: > > 6 │ { > 7 │ return (unsigned char) x + (unsigned char) y | -(uint8_t) ((unsigned char) x + (unsigned char) y < (unsigned char) x); > 8 │ } > > > Did you check the simplification applies when writing as > > > > if (x + y < x) > > return -1; > > else > > return x + y; > > ? If it doesn't then removing the match is likely premature. I think phiopt > > should be able to perform the matching here (it does the COND_EXPR > > building). > > The form above doesn't hit the simplification after .gimple but the .phiopt2 performs > the simplify and then we have below gimple after .phiopt2. > > 10 │ COND_EXPR in block 2 and PHI in block 4 converted to straightline code. > 11 │ Merging blocks 2 and 4 > 12 │ fix_loop_structure: fixing up loops for function > 13 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 14 │ { > 15 │ unsigned char _1; > 16 │ _Bool _6; > 17 │ unsigned char _7; > 18 │ unsigned char _8; > 19 │ unsigned char _9; > 20 │ > 21 │ <bb 2> [local count: 1073741824]: > 22 │ _1 = x_3(D) + y_4(D); > 23 │ _6 = _1 < x_3(D); > 24 │ _7 = (unsigned char) _6; > 25 │ _8 = -_7; > 26 │ _9 = _1 | _8; > 27 │ return _9; > 28 │ > 29 │ } > > BTW, given sorts of forms will be simplified to the "cheap" one, do we have some best > practice to avoid the "cheap" form duplication? Currently for each simplification we have > one copy of the "cheap" form. The "switch" in match.pd looks like not applicable here. > I also have a try similar as below but it will generate invalid tree for some cases. > > (simplify (cond (gt @0 (plus:c@2 @0 @1)) integer_minus_onep @2) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)) > - (bit_ior @2 (negate (convert (lt @2 @0)))))) > + { build_cheap_unsigned_int_sat_add (@0, @1, type); })) That won't work for GIMPLE. I was originally thinking we either canonicalize all forms to a single "simple cheap" form and only match that to .SAT_* later or we use generate the "simple cheap" form at the point we'd introduce .SAT_* as fallback in a programmatic way (do code-gen from the pass and not via match.pd simplify patterns). The latter variant is more complicated as we'd need to duplicate things in the vectorizer and late. The former variant indeed needs to duplicate the simplify result expression for each matched source (but you could use preprocessor macros if it doesn't get too ugly). Richard. > > +tree > +build_cheap_unsigned_int_sat_add (tree op_0, tree op_1, tree type) > +{ > + /* (bit_ior @2 (negate (convert (lt @2 @0)))))) */ > + return build2 (BIT_IOR_EXPR, type, > + build2 (PLUS_EXPR, type, op_0, op_1), > + build1 (NEGATE_EXPR, type, > + build1 (NOP_EXPR, type, > + build2 (LT_EXPR, boolean_type_node, > + build2 (PLUS_EXPR, type, op_0, op_1), > + op_0)))); > +} > > Pan > > -----Original Message----- > From: Richard Biener <richard.guenther@gmail.com> > Sent: Tuesday, October 29, 2024 9:06 PM > To: Li, Pan2 <pan2.li@intel.com> > Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Andrew Pinski <quic_apinski@quicinc.com> > Subject: Re: [PATCH 1/5] Match: Simplify branch form 4 of unsigned SAT_ADD into branchless > > On Tue, Oct 29, 2024 at 9:27 AM <pan2.li@intel.com> wrote: > > > > From: Pan Li <pan2.li@intel.com> > > > > There are sorts of forms for the unsigned SAT_ADD. Some of them are > > complicated while others are cheap. This patch would like to simplify > > the complicated form into the cheap ones. For example as below: > > > > From the form 4 (branch): > > SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > > > > To (branchless): > > SAT_U_ADD = (X + Y) | - ((X + Y) < X). > > > > #define T uint8_t > > > > T sat_add_u_1 (T x, T y) > > { > > return (T)(x + y) < x ? -1 : (x + y); > > } > > > > Before this patch: > > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > > 2 │ { > > 3 │ uint8_t D.2809; > > 4 │ > > 5 │ _1 = x + y; > > 6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>; > > 7 │ <D.2810>: > > 8 │ D.2809 = x + y; > > 9 │ goto <D.2812>; > > 10 │ <D.2811>: > > 11 │ D.2809 = 255; > > 12 │ <D.2812>: > > 13 │ return D.2809; > > 14 │ } > > > > After this patch: > > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > > 2 │ { > > 3 │ uint8_t D.2809; > > 4 │ > > 5 │ _1 = x + y; > > 6 │ _2 = x + y; > > 7 │ _3 = x > _2; > > 8 │ _4 = (unsigned char) _3; > > 9 │ _5 = -_4; > > 10 │ D.2809 = _1 | _5; > > 11 │ return D.2809; > > 12 │ } > > > > The below test suites are passed for this patch. > > * The rv64gcv fully regression test. > > * The x86 bootstrap test. > > * The x86 fully regression test. > > > > gcc/ChangeLog: > > > > * match.pd: Remove unsigned branch form 4 for SAT_ADD, and > > add simplify to branchless instead. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c: New test. > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c: New test. > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c: New test. > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c: New test. > > You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ > I wonder if the simplification is already applied by the frontend and thus > .original shows the simplified form or only .gimple? > > Did you check the simplification applies when writing as > > if (x + y < x) > return -1; > else > return x + y; > > ? If it doesn't then removing the match is likely premature. I think phiopt > should be able to perform the matching here (it does the COND_EXPR > building). > > Thanks, > Richard. > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/match.pd | 11 +++++++---- > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c | 15 +++++++++++++++ > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c | 15 +++++++++++++++ > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c | 15 +++++++++++++++ > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c | 15 +++++++++++++++ > > 5 files changed, 67 insertions(+), 4 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 809c717bc86..4d1143b6ec3 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3154,10 +3154,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > && types_match (type, @0, @1)) > > (bit_ior @2 (negate (convert (lt @2 @0)))))) > > > > -/* Unsigned saturation add, case 4 (branch with lt): > > - SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ > > -(match (unsigned_integer_sat_add @0 @1) > > - (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) > > +/* Simplify SAT_U_ADD to the cheap form > > + From: SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > > + To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ > > +(simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)) > > + (bit_ior @2 (negate (convert (lt @2 @0)))))) > > > > /* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): > > SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > > new file mode 100644 > > index 00000000000..6e327f58d46 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint16_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > > new file mode 100644 > > index 00000000000..17b09396b69 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint32_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > > new file mode 100644 > > index 00000000000..8db9b427a65 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint64_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > new file mode 100644 > > index 00000000000..4e86ef9a5fd > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint8_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > -- > > 2.43.0 > >
Thanks Richard for comments. > The > former variant indeed needs to duplicate the simplify result expression for each > matched source (but you could use preprocessor macros if it doesn't > get too ugly). Got it, will have a try similar to #define cheap_sat_u_add (op_0, op_1) ... in v2. Pan -----Original Message----- From: Richard Biener <richard.guenther@gmail.com> Sent: Wednesday, October 30, 2024 5:23 PM To: Li, Pan2 <pan2.li@intel.com> Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Andrew Pinski <quic_apinski@quicinc.com> Subject: Re: [PATCH 1/5] Match: Simplify branch form 4 of unsigned SAT_ADD into branchless On Wed, Oct 30, 2024 at 3:09 AM Li, Pan2 <pan2.li@intel.com> wrote: > > Thanks Richard for comments. > > > You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ > > Sure, will move all test files to there. > > > I wonder if the simplification is already applied by the frontend and thus > > .original shows the simplified form or only .gimple? > > Yes, you are right, the .original shows the simplified form. Take below code as example: > > 6 │ #define T uint8_t > 7 │ > 8 │ T sat_add_u_1 (T x, T y) > 9 │ { > 10 │ return (T)(x + y) < x ? -1 : (x + y); > 11 │ } > > We have .original similar as below: > > 6 │ { > 7 │ return (unsigned char) x + (unsigned char) y | -(uint8_t) ((unsigned char) x + (unsigned char) y < (unsigned char) x); > 8 │ } > > > Did you check the simplification applies when writing as > > > > if (x + y < x) > > return -1; > > else > > return x + y; > > ? If it doesn't then removing the match is likely premature. I think phiopt > > should be able to perform the matching here (it does the COND_EXPR > > building). > > The form above doesn't hit the simplification after .gimple but the .phiopt2 performs > the simplify and then we have below gimple after .phiopt2. > > 10 │ COND_EXPR in block 2 and PHI in block 4 converted to straightline code. > 11 │ Merging blocks 2 and 4 > 12 │ fix_loop_structure: fixing up loops for function > 13 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > 14 │ { > 15 │ unsigned char _1; > 16 │ _Bool _6; > 17 │ unsigned char _7; > 18 │ unsigned char _8; > 19 │ unsigned char _9; > 20 │ > 21 │ <bb 2> [local count: 1073741824]: > 22 │ _1 = x_3(D) + y_4(D); > 23 │ _6 = _1 < x_3(D); > 24 │ _7 = (unsigned char) _6; > 25 │ _8 = -_7; > 26 │ _9 = _1 | _8; > 27 │ return _9; > 28 │ > 29 │ } > > BTW, given sorts of forms will be simplified to the "cheap" one, do we have some best > practice to avoid the "cheap" form duplication? Currently for each simplification we have > one copy of the "cheap" form. The "switch" in match.pd looks like not applicable here. > I also have a try similar as below but it will generate invalid tree for some cases. > > (simplify (cond (gt @0 (plus:c@2 @0 @1)) integer_minus_onep @2) > (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > && types_match (type, @0, @1)) > - (bit_ior @2 (negate (convert (lt @2 @0)))))) > + { build_cheap_unsigned_int_sat_add (@0, @1, type); })) That won't work for GIMPLE. I was originally thinking we either canonicalize all forms to a single "simple cheap" form and only match that to .SAT_* later or we use generate the "simple cheap" form at the point we'd introduce .SAT_* as fallback in a programmatic way (do code-gen from the pass and not via match.pd simplify patterns). The latter variant is more complicated as we'd need to duplicate things in the vectorizer and late. The former variant indeed needs to duplicate the simplify result expression for each matched source (but you could use preprocessor macros if it doesn't get too ugly). Richard. > > +tree > +build_cheap_unsigned_int_sat_add (tree op_0, tree op_1, tree type) > +{ > + /* (bit_ior @2 (negate (convert (lt @2 @0)))))) */ > + return build2 (BIT_IOR_EXPR, type, > + build2 (PLUS_EXPR, type, op_0, op_1), > + build1 (NEGATE_EXPR, type, > + build1 (NOP_EXPR, type, > + build2 (LT_EXPR, boolean_type_node, > + build2 (PLUS_EXPR, type, op_0, op_1), > + op_0)))); > +} > > Pan > > -----Original Message----- > From: Richard Biener <richard.guenther@gmail.com> > Sent: Tuesday, October 29, 2024 9:06 PM > To: Li, Pan2 <pan2.li@intel.com> > Cc: gcc-patches@gcc.gnu.org; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; rdapp.gcc@gmail.com; Andrew Pinski <quic_apinski@quicinc.com> > Subject: Re: [PATCH 1/5] Match: Simplify branch form 4 of unsigned SAT_ADD into branchless > > On Tue, Oct 29, 2024 at 9:27 AM <pan2.li@intel.com> wrote: > > > > From: Pan Li <pan2.li@intel.com> > > > > There are sorts of forms for the unsigned SAT_ADD. Some of them are > > complicated while others are cheap. This patch would like to simplify > > the complicated form into the cheap ones. For example as below: > > > > From the form 4 (branch): > > SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > > > > To (branchless): > > SAT_U_ADD = (X + Y) | - ((X + Y) < X). > > > > #define T uint8_t > > > > T sat_add_u_1 (T x, T y) > > { > > return (T)(x + y) < x ? -1 : (x + y); > > } > > > > Before this patch: > > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > > 2 │ { > > 3 │ uint8_t D.2809; > > 4 │ > > 5 │ _1 = x + y; > > 6 │ if (x <= _1) goto <D.2810>; else goto <D.2811>; > > 7 │ <D.2810>: > > 8 │ D.2809 = x + y; > > 9 │ goto <D.2812>; > > 10 │ <D.2811>: > > 11 │ D.2809 = 255; > > 12 │ <D.2812>: > > 13 │ return D.2809; > > 14 │ } > > > > After this patch: > > 1 │ uint8_t sat_add_u_1 (uint8_t x, uint8_t y) > > 2 │ { > > 3 │ uint8_t D.2809; > > 4 │ > > 5 │ _1 = x + y; > > 6 │ _2 = x + y; > > 7 │ _3 = x > _2; > > 8 │ _4 = (unsigned char) _3; > > 9 │ _5 = -_4; > > 10 │ D.2809 = _1 | _5; > > 11 │ return D.2809; > > 12 │ } > > > > The below test suites are passed for this patch. > > * The rv64gcv fully regression test. > > * The x86 bootstrap test. > > * The x86 fully regression test. > > > > gcc/ChangeLog: > > > > * match.pd: Remove unsigned branch form 4 for SAT_ADD, and > > add simplify to branchless instead. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c: New test. > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c: New test. > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c: New test. > > * gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c: New test. > > You are testing GENERIC folding, so gcc.dg/ is a better location, not tree-ssa/ > I wonder if the simplification is already applied by the frontend and thus > .original shows the simplified form or only .gimple? > > Did you check the simplification applies when writing as > > if (x + y < x) > return -1; > else > return x + y; > > ? If it doesn't then removing the match is likely premature. I think phiopt > should be able to perform the matching here (it does the COND_EXPR > building). > > Thanks, > Richard. > > > Signed-off-by: Pan Li <pan2.li@intel.com> > > --- > > gcc/match.pd | 11 +++++++---- > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c | 15 +++++++++++++++ > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c | 15 +++++++++++++++ > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c | 15 +++++++++++++++ > > .../gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c | 15 +++++++++++++++ > > 5 files changed, 67 insertions(+), 4 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 809c717bc86..4d1143b6ec3 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -3154,10 +3154,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > && types_match (type, @0, @1)) > > (bit_ior @2 (negate (convert (lt @2 @0)))))) > > > > -/* Unsigned saturation add, case 4 (branch with lt): > > - SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ > > -(match (unsigned_integer_sat_add @0 @1) > > - (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) > > +/* Simplify SAT_U_ADD to the cheap form > > + From: SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). > > + To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ > > +(simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2) > > + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) > > + && types_match (type, @0, @1)) > > + (bit_ior @2 (negate (convert (lt @2 @0)))))) > > > > /* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): > > SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > > new file mode 100644 > > index 00000000000..6e327f58d46 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint16_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > > new file mode 100644 > > index 00000000000..17b09396b69 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint32_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > > new file mode 100644 > > index 00000000000..8db9b427a65 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint64_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > new file mode 100644 > > index 00000000000..4e86ef9a5fd > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ > > + > > +#include <stdint.h> > > + > > +#define T uint8_t > > + > > +T sat_add_u_1 (T x, T y) > > +{ > > + return (T)(x + y) < x ? -1 : (x + y); > > +} > > + > > +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ > > +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ > > -- > > 2.43.0 > >
diff --git a/gcc/match.pd b/gcc/match.pd index 809c717bc86..4d1143b6ec3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3154,10 +3154,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && types_match (type, @0, @1)) (bit_ior @2 (negate (convert (lt @2 @0)))))) -/* Unsigned saturation add, case 4 (branch with lt): - SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). */ -(match (unsigned_integer_sat_add @0 @1) - (cond^ (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2)) +/* Simplify SAT_U_ADD to the cheap form + From: SAT_U_ADD = (X + Y) < x ? -1 : (X + Y). + To: SAT_U_ADD = (X + Y) | - ((X + Y) < X). */ +(simplify (cond (lt (plus:c@2 @0 @1) @0) integer_minus_onep @2) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)) + (bit_ior @2 (negate (convert (lt @2 @0)))))) /* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW): SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c new file mode 100644 index 00000000000..6e327f58d46 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u16.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint16_t + +T sat_add_u_1 (T x, T y) +{ + return (T)(x + y) < x ? -1 : (x + y); +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c new file mode 100644 index 00000000000..17b09396b69 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u32.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint32_t + +T sat_add_u_1 (T x, T y) +{ + return (T)(x + y) < x ? -1 : (x + y); +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c new file mode 100644 index 00000000000..8db9b427a65 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u64.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint64_t + +T sat_add_u_1 (T x, T y) +{ + return (T)(x + y) < x ? -1 : (x + y); +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c new file mode 100644 index 00000000000..4e86ef9a5fd --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sat_u_add-simplify-2-u8.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-gimple-details" } */ + +#include <stdint.h> + +#define T uint8_t + +T sat_add_u_1 (T x, T y) +{ + return (T)(x + y) < x ? -1 : (x + y); +} + +/* { dg-final { scan-tree-dump-not " if " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " else " "gimple" } } */ +/* { dg-final { scan-tree-dump-not " goto " "gimple" } } */