Message ID | h48y32hyl48.fsf_-_@genoa.aus.stglabs.ibm.com |
---|---|
State | New |
Headers | show |
Series | [V4] A jump threading opportunity for condition branch | expand |
On 6/3/19 11:28 PM, Jiufu Guo wrote: > > Hi, > > This patch implements a new opportunity of jump threading for PR77820. > In this optimization, conditional jumps are merged with unconditional > jump. And then moving CMP result to GPR is eliminated. > > This version is based on the proposal of Richard, Jeff and Andrew on > previous versions, and refined to incorporate comments, such as accept > the path with single_succ_p (e->src). > Thanks for the reviews! > > Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help > from Jeff) with no regressions (two cases are improved and updated > to keep original test coverage) and new testcases are added. > Is this ok for trunk? > > Example of this opportunity looks like below: > > <P0> > p0 = a CMP b > goto <X>; > > <P1> > p1 = c CMP d > goto <X>; > > <X> > # phi = PHI <p0 (P0), p1 (P1)> > if (phi != 0) goto <Y>; else goto <Z>; > > Could be transformed to: > > <P0> > p0 = a CMP b > if (p0 != 0) goto <Y>; else goto <Z>; > > <P1> > p1 = c CMP d > if (p1 != 0) goto <Y>; else goto <Z>; > > > This optimization eliminates: > 1. saving CMP result: p0 = a CMP b. > 2. additional CMP on branch: if (phi != 0). > 3. converting CMP result if there is phi = (INT) p0 if there is. > > Thanks! > Jiufu Guo > > [gcc] > 2019-06-04 Jiufu Guo <guojiufu@linux.ibm.com> > Lijia He <helijia@linux.ibm.com> > > PR tree-optimization/77820 > * tree-ssa-threadedge.c > (edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New > function. > (thread_across_edge): Add call to > edge_forwards_cmp_to_conditional_jump_through_empty_bb_p. > > [gcc/testsuite] > 2019-06-04 Jiufu Guo <guojiufu@linux.ibm.com> > Lijia He <helijia@linux.ibm.com> > > PR tree-optimization/77820 > * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase. > * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase. > * gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase. > * gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase. > * gcc.dg/tree-ssa/split-path-6.c: Update testcase. > * gcc.target/sh/pr51244-20.c: Update testcase. Yes, this is OK for the trunk. I'll commit it momentarily. Jeff > > > --- > gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 ++++++++ > gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 +++++++++ > gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 ++++++++++++++ > gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c | 2 +- > gcc/testsuite/gcc.target/sh/pr51244-20.c | 2 +- > gcc/tree-ssa-threadedge.c | 70 +++++++++++++++++++++++- > 7 files changed, 187 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c > new file mode 100644 > index 0000000..5227c87 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c > @@ -0,0 +1,30 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ > + > +void g (int); > +void g1 (int); > + > +void > +f (long a, long b, long c, long d, long x) > +{ > + _Bool t; > + if (x) > + { > + g (a + 1); > + t = a < b; > + c = d + x; > + } > + else > + { > + g (b + 1); > + a = c + d; > + t = c > d; > + } > + > + if (t) > + g1 (c); > + > + g (a); > +} > + > +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c > new file mode 100644 > index 0000000..eaf89bb > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c > @@ -0,0 +1,23 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ > + > +void g (void); > +void g1 (void); > + > +void > +f (long a, long b, long c, long d, int x) > +{ > + _Bool t; > + if (x) > + t = c < d; > + else > + t = a < b; > + > + if (t) > + { > + g1 (); > + g (); > + } > +} > + > +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c > new file mode 100644 > index 0000000..d5a1e0b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c > @@ -0,0 +1,25 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ > + > +void g (void); > +void g1 (void); > + > +void > +f (long a, long b, long c, long d, int x) > +{ > + int t; > + if (x) > + t = a < b; > + else if (d == x) > + t = c < b; > + else > + t = d > c; > + > + if (t) > + { > + g1 (); > + g (); > + } > +} > + > +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c > new file mode 100644 > index 0000000..53acabc > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c > @@ -0,0 +1,40 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ > + > +void g (int); > +void g1 (int); > + > +void > +f (long a, long b, long c, long d, int x) > +{ > + int t; > + _Bool l1 = 0, l2 = 0; > + if (x) > + { > + g (a); > + c = a + b; > + t = a < b; > + l1 = 1; > + } > + else > + { > + g1 (b); > + t = c > d; > + d = c + b; > + l2 = 1; > + } > + > + if (t) > + { > + if (l1 | l2) > + g1 (c); > + } > + else > + { > + g (d); > + g1 (a + b); > + } > + g (c + d); > +} > + > +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c > index e9b4f26..fb171cd 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */ > +/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -fno-tree-dominator-opts -fno-tree-vrp -w" } */ > > struct __sFILE > { > diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c > index c342163..be265cd 100644 > --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c > +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c > @@ -1,7 +1,7 @@ > /* Check that the SH specific sh_treg_combine RTL optimization pass works as > expected. */ > /* { dg-do compile } */ > -/* { dg-options "-O2" } */ > +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */ > > /* { dg-final { scan-assembler-not "not\t" } } */ > /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */ > diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c > index c3ea2d6..785227d 100644 > --- a/gcc/tree-ssa-threadedge.c > +++ b/gcc/tree-ssa-threadedge.c > @@ -1157,6 +1157,68 @@ thread_through_normal_block (edge e, > return 0; > } > > +/* There are basic blocks look like: > + <P0> > + p0 = a CMP b ; or p0 = (INT) (a CMP b) > + goto <X>; > + > + <P1> > + p1 = c CMP d > + goto <X>; > + > + <X> > + # phi = PHI <p0 (P0), p1 (P1)> > + if (phi != 0) goto <Y>; else goto <Z>; > + > + Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD > + And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK > + > + Return true if E is (P0,X) or (P1,X) */ > + > +bool > +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e) > +{ > + /* See if there is only one stmt which is gcond. */ > + gcond *gs; > + if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest)))) > + return false; > + > + /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block. */ > + tree cond = gimple_cond_lhs (gs); > + enum tree_code code = gimple_cond_code (gs); > + tree rhs = gimple_cond_rhs (gs); > + if (TREE_CODE (cond) != SSA_NAME > + || (code != NE_EXPR && code != EQ_EXPR) > + || (!integer_onep (rhs) && !integer_zerop (rhs))) > + return false; > + gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond)); > + if (phi == NULL || gimple_bb (phi) != e->dest) > + return false; > + > + /* Check if phi's incoming value is CMP. */ > + gassign *def; > + tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); > + if (TREE_CODE (value) != SSA_NAME > + || !has_single_use (value) > + || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value)))) > + return false; > + > + /* Or if it is (INT) (a CMP b). */ > + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) > + { > + value = gimple_assign_rhs1 (def); > + if (TREE_CODE (value) != SSA_NAME > + || !has_single_use (value) > + || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value)))) > + return false; > + } > + > + if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison) > + return false; > + > + return true; > +} > + > /* We are exiting E->src, see if E->dest ends with a conditional > jump which has a known value when reached via E. > > @@ -1317,10 +1379,12 @@ thread_across_edge (gcond *dummy_cond, > > /* If we were able to thread through a successor of E->dest, then > record the jump threading opportunity. */ > - if (found) > + if (found > + || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e)) > { > - propagate_threaded_block_debug_into (path->last ()->e->dest, > - taken_edge->dest); > + if (taken_edge->dest != path->last ()->e->dest) > + propagate_threaded_block_debug_into (path->last ()->e->dest, > + taken_edge->dest); > register_jump_thread (path); > } > else >
Jeff Law <law@redhat.com> writes: > On 6/3/19 11:28 PM, Jiufu Guo wrote: >> >> Hi, >> >> This patch implements a new opportunity of jump threading for PR77820. >> In this optimization, conditional jumps are merged with unconditional >> jump. And then moving CMP result to GPR is eliminated. >> >> This version is based on the proposal of Richard, Jeff and Andrew on >> previous versions, and refined to incorporate comments, such as accept >> the path with single_succ_p (e->src). >> Thanks for the reviews! >> >> Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help >> from Jeff) with no regressions (two cases are improved and updated >> to keep original test coverage) and new testcases are added. >> Is this ok for trunk? >> >> Example of this opportunity looks like below: >> >> <P0> >> p0 = a CMP b >> goto <X>; >> >> <P1> >> p1 = c CMP d >> goto <X>; >> >> <X> >> # phi = PHI <p0 (P0), p1 (P1)> >> if (phi != 0) goto <Y>; else goto <Z>; >> >> Could be transformed to: >> >> <P0> >> p0 = a CMP b >> if (p0 != 0) goto <Y>; else goto <Z>; >> >> <P1> >> p1 = c CMP d >> if (p1 != 0) goto <Y>; else goto <Z>; >> >> >> This optimization eliminates: >> 1. saving CMP result: p0 = a CMP b. >> 2. additional CMP on branch: if (phi != 0). >> 3. converting CMP result if there is phi = (INT) p0 if there is. >> >> Thanks! >> Jiufu Guo >> >> [gcc] >> 2019-06-04 Jiufu Guo <guojiufu@linux.ibm.com> >> Lijia He <helijia@linux.ibm.com> >> >> PR tree-optimization/77820 >> * tree-ssa-threadedge.c >> (edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New >> function. >> (thread_across_edge): Add call to >> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p. >> >> [gcc/testsuite] >> 2019-06-04 Jiufu Guo <guojiufu@linux.ibm.com> >> Lijia He <helijia@linux.ibm.com> >> >> PR tree-optimization/77820 >> * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase. >> * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase. >> * gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase. >> * gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase. >> * gcc.dg/tree-ssa/split-path-6.c: Update testcase. >> * gcc.target/sh/pr51244-20.c: Update testcase. > Yes, this is OK for the trunk. I'll commit it momentarily. Thanks Jeff! I got svn access, I could commit next time. > > Jeff > > > > >> >> >> --- >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c | 30 ++++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c | 23 ++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c | 25 +++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c | 40 ++++++++++++++ >> gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c | 2 +- >> gcc/testsuite/gcc.target/sh/pr51244-20.c | 2 +- >> gcc/tree-ssa-threadedge.c | 70 +++++++++++++++++++++++- >> 7 files changed, 187 insertions(+), 5 deletions(-) >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >> new file mode 100644 >> index 0000000..5227c87 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c >> @@ -0,0 +1,30 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (int); >> +void g1 (int); >> + >> +void >> +f (long a, long b, long c, long d, long x) >> +{ >> + _Bool t; >> + if (x) >> + { >> + g (a + 1); >> + t = a < b; >> + c = d + x; >> + } >> + else >> + { >> + g (b + 1); >> + a = c + d; >> + t = c > d; >> + } >> + >> + if (t) >> + g1 (c); >> + >> + g (a); >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >> new file mode 100644 >> index 0000000..eaf89bb >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c >> @@ -0,0 +1,23 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (void); >> +void g1 (void); >> + >> +void >> +f (long a, long b, long c, long d, int x) >> +{ >> + _Bool t; >> + if (x) >> + t = c < d; >> + else >> + t = a < b; >> + >> + if (t) >> + { >> + g1 (); >> + g (); >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >> new file mode 100644 >> index 0000000..d5a1e0b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c >> @@ -0,0 +1,25 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (void); >> +void g1 (void); >> + >> +void >> +f (long a, long b, long c, long d, int x) >> +{ >> + int t; >> + if (x) >> + t = a < b; >> + else if (d == x) >> + t = c < b; >> + else >> + t = d > c; >> + >> + if (t) >> + { >> + g1 (); >> + g (); >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >> new file mode 100644 >> index 0000000..53acabc >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c >> @@ -0,0 +1,40 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ >> + >> +void g (int); >> +void g1 (int); >> + >> +void >> +f (long a, long b, long c, long d, int x) >> +{ >> + int t; >> + _Bool l1 = 0, l2 = 0; >> + if (x) >> + { >> + g (a); >> + c = a + b; >> + t = a < b; >> + l1 = 1; >> + } >> + else >> + { >> + g1 (b); >> + t = c > d; >> + d = c + b; >> + l2 = 1; >> + } >> + >> + if (t) >> + { >> + if (l1 | l2) >> + g1 (c); >> + } >> + else >> + { >> + g (d); >> + g1 (a + b); >> + } >> + g (c + d); >> +} >> + >> +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >> index e9b4f26..fb171cd 100644 >> --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c >> @@ -1,5 +1,5 @@ >> /* { dg-do compile } */ >> -/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */ >> +/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -fno-tree-dominator-opts -fno-tree-vrp -w" } */ >> >> struct __sFILE >> { >> diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c >> index c342163..be265cd 100644 >> --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c >> +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c >> @@ -1,7 +1,7 @@ >> /* Check that the SH specific sh_treg_combine RTL optimization pass works as >> expected. */ >> /* { dg-do compile } */ >> -/* { dg-options "-O2" } */ >> +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */ >> >> /* { dg-final { scan-assembler-not "not\t" } } */ >> /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */ >> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c >> index c3ea2d6..785227d 100644 >> --- a/gcc/tree-ssa-threadedge.c >> +++ b/gcc/tree-ssa-threadedge.c >> @@ -1157,6 +1157,68 @@ thread_through_normal_block (edge e, >> return 0; >> } >> >> +/* There are basic blocks look like: >> + <P0> >> + p0 = a CMP b ; or p0 = (INT) (a CMP b) >> + goto <X>; >> + >> + <P1> >> + p1 = c CMP d >> + goto <X>; >> + >> + <X> >> + # phi = PHI <p0 (P0), p1 (P1)> >> + if (phi != 0) goto <Y>; else goto <Z>; >> + >> + Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD >> + And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK >> + >> + Return true if E is (P0,X) or (P1,X) */ >> + >> +bool >> +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e) >> +{ >> + /* See if there is only one stmt which is gcond. */ >> + gcond *gs; >> + if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest)))) >> + return false; >> + >> + /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block. */ >> + tree cond = gimple_cond_lhs (gs); >> + enum tree_code code = gimple_cond_code (gs); >> + tree rhs = gimple_cond_rhs (gs); >> + if (TREE_CODE (cond) != SSA_NAME >> + || (code != NE_EXPR && code != EQ_EXPR) >> + || (!integer_onep (rhs) && !integer_zerop (rhs))) >> + return false; >> + gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond)); >> + if (phi == NULL || gimple_bb (phi) != e->dest) >> + return false; >> + >> + /* Check if phi's incoming value is CMP. */ >> + gassign *def; >> + tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); >> + if (TREE_CODE (value) != SSA_NAME >> + || !has_single_use (value) >> + || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value)))) >> + return false; >> + >> + /* Or if it is (INT) (a CMP b). */ >> + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) >> + { >> + value = gimple_assign_rhs1 (def); >> + if (TREE_CODE (value) != SSA_NAME >> + || !has_single_use (value) >> + || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value)))) >> + return false; >> + } >> + >> + if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison) >> + return false; >> + >> + return true; >> +} >> + >> /* We are exiting E->src, see if E->dest ends with a conditional >> jump which has a known value when reached via E. >> >> @@ -1317,10 +1379,12 @@ thread_across_edge (gcond *dummy_cond, >> >> /* If we were able to thread through a successor of E->dest, then >> record the jump threading opportunity. */ >> - if (found) >> + if (found >> + || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e)) >> { >> - propagate_threaded_block_debug_into (path->last ()->e->dest, >> - taken_edge->dest); >> + if (taken_edge->dest != path->last ()->e->dest) >> + propagate_threaded_block_debug_into (path->last ()->e->dest, >> + taken_edge->dest); >> register_jump_thread (path); >> } >> else >>
On 6/14/19 6:51 AM, Jiufu Guo wrote: > Jeff Law <law@redhat.com> writes: > >> On 6/3/19 11:28 PM, Jiufu Guo wrote: >>> >>> Hi, >>> >>> This patch implements a new opportunity of jump threading for PR77820. >>> In this optimization, conditional jumps are merged with unconditional >>> jump. And then moving CMP result to GPR is eliminated. >>> >>> This version is based on the proposal of Richard, Jeff and Andrew on >>> previous versions, and refined to incorporate comments, such as accept >>> the path with single_succ_p (e->src). >>> Thanks for the reviews! >>> >>> Bootstrapped and tested on powerpc64le, powerpc64 and sh (with help >>> from Jeff) with no regressions (two cases are improved and updated >>> to keep original test coverage) and new testcases are added. >>> Is this ok for trunk? >>> >>> Example of this opportunity looks like below: >>> >>> <P0> >>> p0 = a CMP b >>> goto <X>; >>> >>> <P1> >>> p1 = c CMP d >>> goto <X>; >>> >>> <X> >>> # phi = PHI <p0 (P0), p1 (P1)> >>> if (phi != 0) goto <Y>; else goto <Z>; >>> >>> Could be transformed to: >>> >>> <P0> >>> p0 = a CMP b >>> if (p0 != 0) goto <Y>; else goto <Z>; >>> >>> <P1> >>> p1 = c CMP d >>> if (p1 != 0) goto <Y>; else goto <Z>; >>> >>> >>> This optimization eliminates: >>> 1. saving CMP result: p0 = a CMP b. >>> 2. additional CMP on branch: if (phi != 0). >>> 3. converting CMP result if there is phi = (INT) p0 if there is. >>> >>> Thanks! >>> Jiufu Guo >>> >>> [gcc] >>> 2019-06-04 Jiufu Guo <guojiufu@linux.ibm.com> >>> Lijia He <helijia@linux.ibm.com> >>> >>> PR tree-optimization/77820 >>> * tree-ssa-threadedge.c >>> (edge_forwards_cmp_to_conditional_jump_through_empty_bb_p): New >>> function. >>> (thread_across_edge): Add call to >>> edge_forwards_cmp_to_conditional_jump_through_empty_bb_p. >>> >>> [gcc/testsuite] >>> 2019-06-04 Jiufu Guo <guojiufu@linux.ibm.com> >>> Lijia He <helijia@linux.ibm.com> >>> >>> PR tree-optimization/77820 >>> * gcc.dg/tree-ssa/phi_on_compare-1.c: New testcase. >>> * gcc.dg/tree-ssa/phi_on_compare-2.c: New testcase. >>> * gcc.dg/tree-ssa/phi_on_compare-3.c: New testcase. >>> * gcc.dg/tree-ssa/phi_on_compare-4.c: New testcase. >>> * gcc.dg/tree-ssa/split-path-6.c: Update testcase. >>> * gcc.target/sh/pr51244-20.c: Update testcase. >> Yes, this is OK for the trunk. I'll commit it momentarily. > Thanks Jeff! I got svn access, I could commit next time. Yea, Segher pointed that out to me in IRC. Looking forward to your next patchkit :-) jeff
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c new file mode 100644 index 0000000..5227c87 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ + +void g (int); +void g1 (int); + +void +f (long a, long b, long c, long d, long x) +{ + _Bool t; + if (x) + { + g (a + 1); + t = a < b; + c = d + x; + } + else + { + g (b + 1); + a = c + d; + t = c > d; + } + + if (t) + g1 (c); + + g (a); +} + +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c new file mode 100644 index 0000000..eaf89bb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-2.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ + +void g (void); +void g1 (void); + +void +f (long a, long b, long c, long d, int x) +{ + _Bool t; + if (x) + t = c < d; + else + t = a < b; + + if (t) + { + g1 (); + g (); + } +} + +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c new file mode 100644 index 0000000..d5a1e0b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-3.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ + +void g (void); +void g1 (void); + +void +f (long a, long b, long c, long d, int x) +{ + int t; + if (x) + t = a < b; + else if (d == x) + t = c < b; + else + t = d > c; + + if (t) + { + g1 (); + g (); + } +} + +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c new file mode 100644 index 0000000..53acabc --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi_on_compare-4.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-vrp1" } */ + +void g (int); +void g1 (int); + +void +f (long a, long b, long c, long d, int x) +{ + int t; + _Bool l1 = 0, l2 = 0; + if (x) + { + g (a); + c = a + b; + t = a < b; + l1 = 1; + } + else + { + g1 (b); + t = c > d; + d = c + b; + l2 = 1; + } + + if (t) + { + if (l1 | l2) + g1 (c); + } + else + { + g (d); + g1 (a + b); + } + g (c + d); +} + +/* { dg-final { scan-tree-dump-times "Removing basic block" 1 "vrp1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c index e9b4f26..fb171cd 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */ +/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -fno-tree-dominator-opts -fno-tree-vrp -w" } */ struct __sFILE { diff --git a/gcc/testsuite/gcc.target/sh/pr51244-20.c b/gcc/testsuite/gcc.target/sh/pr51244-20.c index c342163..be265cd 100644 --- a/gcc/testsuite/gcc.target/sh/pr51244-20.c +++ b/gcc/testsuite/gcc.target/sh/pr51244-20.c @@ -1,7 +1,7 @@ /* Check that the SH specific sh_treg_combine RTL optimization pass works as expected. */ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-vrp" } */ /* { dg-final { scan-assembler-not "not\t" } } */ /* { dg-final { scan-assembler-times "cmp/eq" 2 } } */ diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c index c3ea2d6..785227d 100644 --- a/gcc/tree-ssa-threadedge.c +++ b/gcc/tree-ssa-threadedge.c @@ -1157,6 +1157,68 @@ thread_through_normal_block (edge e, return 0; } +/* There are basic blocks look like: + <P0> + p0 = a CMP b ; or p0 = (INT) (a CMP b) + goto <X>; + + <P1> + p1 = c CMP d + goto <X>; + + <X> + # phi = PHI <p0 (P0), p1 (P1)> + if (phi != 0) goto <Y>; else goto <Z>; + + Then, edge (P0,X) or (P1,X) could be marked as EDGE_START_JUMP_THREAD + And edge (X,Y), (X,Z) is EDGE_COPY_SRC_JOINER_BLOCK + + Return true if E is (P0,X) or (P1,X) */ + +bool +edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (edge e) +{ + /* See if there is only one stmt which is gcond. */ + gcond *gs; + if (!(gs = safe_dyn_cast<gcond *> (last_and_only_stmt (e->dest)))) + return false; + + /* See if gcond's cond is "(phi !=/== 0/1)" in the basic block. */ + tree cond = gimple_cond_lhs (gs); + enum tree_code code = gimple_cond_code (gs); + tree rhs = gimple_cond_rhs (gs); + if (TREE_CODE (cond) != SSA_NAME + || (code != NE_EXPR && code != EQ_EXPR) + || (!integer_onep (rhs) && !integer_zerop (rhs))) + return false; + gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (cond)); + if (phi == NULL || gimple_bb (phi) != e->dest) + return false; + + /* Check if phi's incoming value is CMP. */ + gassign *def; + tree value = PHI_ARG_DEF_FROM_EDGE (phi, e); + if (TREE_CODE (value) != SSA_NAME + || !has_single_use (value) + || !(def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (value)))) + return false; + + /* Or if it is (INT) (a CMP b). */ + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))) + { + value = gimple_assign_rhs1 (def); + if (TREE_CODE (value) != SSA_NAME + || !has_single_use (value) + || !(def = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (value)))) + return false; + } + + if (TREE_CODE_CLASS (gimple_assign_rhs_code (def)) != tcc_comparison) + return false; + + return true; +} + /* We are exiting E->src, see if E->dest ends with a conditional jump which has a known value when reached via E. @@ -1317,10 +1379,12 @@ thread_across_edge (gcond *dummy_cond, /* If we were able to thread through a successor of E->dest, then record the jump threading opportunity. */ - if (found) + if (found + || edge_forwards_cmp_to_conditional_jump_through_empty_bb_p (e)) { - propagate_threaded_block_debug_into (path->last ()->e->dest, - taken_edge->dest); + if (taken_edge->dest != path->last ()->e->dest) + propagate_threaded_block_debug_into (path->last ()->e->dest, + taken_edge->dest); register_jump_thread (path); } else