diff mbox

Allow FSM to thread single block cases too

Message ID 561CF734.7090802@redhat.com
State New
Headers show

Commit Message

Jeff Law Oct. 13, 2015, 12:21 p.m. UTC
One of the cases that was missing in the FSM support is threading when 
the path is a single block.  ie, a control statement's output can be 
statically determined just by looking at PHIs in the control statement's 
block for one or incoming edges.

This is necessary to fix a regression if I turn off the old jump 
threader's backedge support.  Just as important, Jan has in the past 
asked about a trivial jump threader to be run during early 
optimizations.  Limiting the FSM bits to this case would likely satisfy 
that need in the future.

Bootstrapped and regression tested on x86_64-linux-gnu.  Installed on 
the trunk.

Jeff
commit a53bb29a1dffd329aa6235b88b0c2a830aa5a59e
Author: Jeff Law <law@redhat.com>
Date:   Tue Oct 13 06:19:20 2015 -0600

    [PATCH] Allow FSM to thread single block cases too
    
    	* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
    	Allow single block jump threading paths.
    
    	* gcc.dg/tree-ssa/ssa-thread-13.c: New test.

Comments

Richard Biener Oct. 13, 2015, 12:52 p.m. UTC | #1
On Tue, Oct 13, 2015 at 2:21 PM, Jeff Law <law@redhat.com> wrote:
>
> One of the cases that was missing in the FSM support is threading when the
> path is a single block.  ie, a control statement's output can be statically
> determined just by looking at PHIs in the control statement's block for one
> or incoming edges.
>
> This is necessary to fix a regression if I turn off the old jump threader's
> backedge support.  Just as important, Jan has in the past asked about a
> trivial jump threader to be run during early optimizations.  Limiting the
> FSM bits to this case would likely satisfy that need in the future.

I think he asked for trivial forward threads though due to repeated tests.
I hacked FRE to do this (I think), but maybe some trivial cleanup opportunities
are still left here.  Honza?

Richard.

> Bootstrapped and regression tested on x86_64-linux-gnu.  Installed on the
> trunk.
>
> Jeff
>
> commit a53bb29a1dffd329aa6235b88b0c2a830aa5a59e
> Author: Jeff Law <law@redhat.com>
> Date:   Tue Oct 13 06:19:20 2015 -0600
>
>     [PATCH] Allow FSM to thread single block cases too
>
>         * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
>         Allow single block jump threading paths.
>
>         * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index d71bcd2..caab533 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,8 @@
> +2015-10-13  Jeff Law  <law@redhat.com>
> +
> +       * tree-ssa-threadbackward.c
> (fsm_find_control_statement_thread_paths):
> +       Allow single block jump threading paths.
> +
>  2015-10-13  Tom de Vries  <tom@codesourcery.com>
>
>         PR tree-optimization/67476
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 4a08f0f..acf6df5 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,7 @@
> +2015-10-13  Jeff Law  <law@redhat.com>
> +
> +       * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
> +
>  2015-10-12  Jeff Law  <law@redhat.com>
>
>         * gcc.dg/tree-ssa/ssa-thread-12.c: New test.
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> new file mode 100644
> index 0000000..5051d11
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
> @@ -0,0 +1,70 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
> +/* { dg-final { scan-tree-dump "FSM" "vrp1" } } */
> +
> +typedef struct rtx_def *rtx;
> +typedef const struct rtx_def *const_rtx;
> +enum rtx_code
> +{
> +  UNKNOWN, VALUE, DEBUG_EXPR, EXPR_LIST, INSN_LIST, SEQUENCE, ADDRESS,
> +    DEBUG_INSN, INSN, JUMP_INSN, CALL_INSN, BARRIER, CODE_LABEL, NOTE,
> +    COND_EXEC, PARALLEL, ASM_INPUT, ASM_OPERANDS, UNSPEC, UNSPEC_VOLATILE,
> +    ADDR_VEC, ADDR_DIFF_VEC, PREFETCH, SET, USE, CLOBBER, CALL, RETURN,
> +    EH_RETURN, TRAP_IF, CONST_INT, CONST_FIXED, CONST_DOUBLE, CONST_VECTOR,
> +    CONST_STRING, CONST, PC, REG, SCRATCH, SUBREG, STRICT_LOW_PART, CONCAT,
> +    CONCATN, MEM, LABEL_REF, SYMBOL_REF, CC0, IF_THEN_ELSE, COMPARE, PLUS,
> +    MINUS, NEG, MULT, SS_MULT, US_MULT, DIV, SS_DIV, US_DIV, MOD, UDIV,
> UMOD,
> +    AND, IOR, XOR, NOT, ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, ROTATERT, SMIN,
> +    SMAX, UMIN, UMAX, PRE_DEC, PRE_INC, POST_DEC, POST_INC, PRE_MODIFY,
> +    POST_MODIFY, NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU, UNORDERED,
> +    ORDERED, UNEQ, UNGE, UNGT, UNLE, UNLT, LTGT, SIGN_EXTEND, ZERO_EXTEND,
> +    TRUNCATE, FLOAT_EXTEND, FLOAT_TRUNCATE, FLOAT, FIX, UNSIGNED_FLOAT,
> +    UNSIGNED_FIX, FRACT_CONVERT, UNSIGNED_FRACT_CONVERT, SAT_FRACT,
> +    UNSIGNED_SAT_FRACT, ABS, SQRT, BSWAP, FFS, CLZ, CTZ, POPCOUNT, PARITY,
> +    SIGN_EXTRACT, ZERO_EXTRACT, HIGH, LO_SUM, VEC_MERGE, VEC_SELECT,
> +    VEC_CONCAT, VEC_DUPLICATE, SS_PLUS, US_PLUS, SS_MINUS, SS_NEG, US_NEG,
> +    SS_ABS, SS_ASHIFT, US_ASHIFT, US_MINUS, SS_TRUNCATE, US_TRUNCATE, FMA,
> +    VAR_LOCATION, DEBUG_IMPLICIT_PTR, ENTRY_VALUE, LAST_AND_UNUSED_RTX_CODE
> +};
> +union rtunion_def
> +{
> +  rtx rt_rtx;
> +};
> +typedef union rtunion_def rtunion;
> +struct rtx_def
> +{
> +  __extension__ enum rtx_code code:16;
> +  union u
> +  {
> +    rtunion fld[1];
> +  }
> +  u;
> +};
> +
> +unsigned int rtx_cost (rtx, enum rtx_code, unsigned char);
> +rtx single_set_2 (const_rtx, rtx);
> +
> +unsigned
> +seq_cost (const_rtx seq, unsigned char speed)
> +{
> +  unsigned cost = 0;
> +  rtx set;
> +  for (; seq; seq = (((seq)->u.fld[2]).rt_rtx))
> +    {
> +      set =
> +       (((((enum rtx_code) (seq)->code) == INSN)
> +         || (((enum rtx_code) (seq)->code) == DEBUG_INSN)
> +         || (((enum rtx_code) (seq)->code) == JUMP_INSN)
> +         || (((enum rtx_code) (seq)->code) ==
> +             CALL_INSN)) ? (((enum rtx_code) ((((seq)->u.fld[4]).rt_rtx))->
> +                             code) ==
> +                            SET ? (((seq)->u.fld[4]).
> +                                   rt_rtx) : single_set_2 (seq,
> +                                                           (((seq)->u.
> +                                                             fld[4]).
> +                                                            rt_rtx))) :
> (rtx)
> +        0);
> +      if (set)
> +       cost += rtx_cost (set, SET, speed);
> +    }
> +}
> diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
> index 5be6ee4..9128094 100644
> --- a/gcc/tree-ssa-threadbackward.c
> +++ b/gcc/tree-ssa-threadbackward.c
> @@ -211,10 +211,6 @@ fsm_find_control_statement_thread_paths (tree name,
>         continue;
>
>        int path_length = path->length ();
> -      /* A path with less than 2 basic blocks should not be jump-threaded.
> */
> -      if (path_length < 2)
> -       continue;
> -
>        if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
>         {
>           if (dump_file && (dump_flags & TDF_DETAILS))
>
Richard Biener Oct. 14, 2015, 10:16 a.m. UTC | #2
On Tue, Oct 13, 2015 at 2:52 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Tue, Oct 13, 2015 at 2:21 PM, Jeff Law <law@redhat.com> wrote:
>>
>> One of the cases that was missing in the FSM support is threading when the
>> path is a single block.  ie, a control statement's output can be statically
>> determined just by looking at PHIs in the control statement's block for one
>> or incoming edges.
>>
>> This is necessary to fix a regression if I turn off the old jump threader's
>> backedge support.  Just as important, Jan has in the past asked about a
>> trivial jump threader to be run during early optimizations.  Limiting the
>> FSM bits to this case would likely satisfy that need in the future.
>
> I think he asked for trivial forward threads though due to repeated tests.
> I hacked FRE to do this (I think), but maybe some trivial cleanup opportunities
> are still left here.  Honza?

This or other related patches in the range r228731:228774 has caused a quite
big jump in SPEC CPU 2000 binary sizes (notably 176.gcc - so maybe affecting
bootstrap as well, at -O3).  Are you sure this doesn't re-introduce DOM
effectively peeling all loops once?

Richard.

> Richard.
>
>> Bootstrapped and regression tested on x86_64-linux-gnu.  Installed on the
>> trunk.
>>
>> Jeff
>>
>> commit a53bb29a1dffd329aa6235b88b0c2a830aa5a59e
>> Author: Jeff Law <law@redhat.com>
>> Date:   Tue Oct 13 06:19:20 2015 -0600
>>
>>     [PATCH] Allow FSM to thread single block cases too
>>
>>         * tree-ssa-threadbackward.c
>> (fsm_find_control_statement_thread_paths):
>>         Allow single block jump threading paths.
>>
>>         * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
>>
>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>> index d71bcd2..caab533 100644
>> --- a/gcc/ChangeLog
>> +++ b/gcc/ChangeLog
>> @@ -1,3 +1,8 @@
>> +2015-10-13  Jeff Law  <law@redhat.com>
>> +
>> +       * tree-ssa-threadbackward.c
>> (fsm_find_control_statement_thread_paths):
>> +       Allow single block jump threading paths.
>> +
>>  2015-10-13  Tom de Vries  <tom@codesourcery.com>
>>
>>         PR tree-optimization/67476
>> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
>> index 4a08f0f..acf6df5 100644
>> --- a/gcc/testsuite/ChangeLog
>> +++ b/gcc/testsuite/ChangeLog
>> @@ -1,3 +1,7 @@
>> +2015-10-13  Jeff Law  <law@redhat.com>
>> +
>> +       * gcc.dg/tree-ssa/ssa-thread-13.c: New test.
>> +
>>  2015-10-12  Jeff Law  <law@redhat.com>
>>
>>         * gcc.dg/tree-ssa/ssa-thread-12.c: New test.
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
>> new file mode 100644
>> index 0000000..5051d11
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
>> @@ -0,0 +1,70 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
>> +/* { dg-final { scan-tree-dump "FSM" "vrp1" } } */
>> +
>> +typedef struct rtx_def *rtx;
>> +typedef const struct rtx_def *const_rtx;
>> +enum rtx_code
>> +{
>> +  UNKNOWN, VALUE, DEBUG_EXPR, EXPR_LIST, INSN_LIST, SEQUENCE, ADDRESS,
>> +    DEBUG_INSN, INSN, JUMP_INSN, CALL_INSN, BARRIER, CODE_LABEL, NOTE,
>> +    COND_EXEC, PARALLEL, ASM_INPUT, ASM_OPERANDS, UNSPEC, UNSPEC_VOLATILE,
>> +    ADDR_VEC, ADDR_DIFF_VEC, PREFETCH, SET, USE, CLOBBER, CALL, RETURN,
>> +    EH_RETURN, TRAP_IF, CONST_INT, CONST_FIXED, CONST_DOUBLE, CONST_VECTOR,
>> +    CONST_STRING, CONST, PC, REG, SCRATCH, SUBREG, STRICT_LOW_PART, CONCAT,
>> +    CONCATN, MEM, LABEL_REF, SYMBOL_REF, CC0, IF_THEN_ELSE, COMPARE, PLUS,
>> +    MINUS, NEG, MULT, SS_MULT, US_MULT, DIV, SS_DIV, US_DIV, MOD, UDIV,
>> UMOD,
>> +    AND, IOR, XOR, NOT, ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, ROTATERT, SMIN,
>> +    SMAX, UMIN, UMAX, PRE_DEC, PRE_INC, POST_DEC, POST_INC, PRE_MODIFY,
>> +    POST_MODIFY, NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU, UNORDERED,
>> +    ORDERED, UNEQ, UNGE, UNGT, UNLE, UNLT, LTGT, SIGN_EXTEND, ZERO_EXTEND,
>> +    TRUNCATE, FLOAT_EXTEND, FLOAT_TRUNCATE, FLOAT, FIX, UNSIGNED_FLOAT,
>> +    UNSIGNED_FIX, FRACT_CONVERT, UNSIGNED_FRACT_CONVERT, SAT_FRACT,
>> +    UNSIGNED_SAT_FRACT, ABS, SQRT, BSWAP, FFS, CLZ, CTZ, POPCOUNT, PARITY,
>> +    SIGN_EXTRACT, ZERO_EXTRACT, HIGH, LO_SUM, VEC_MERGE, VEC_SELECT,
>> +    VEC_CONCAT, VEC_DUPLICATE, SS_PLUS, US_PLUS, SS_MINUS, SS_NEG, US_NEG,
>> +    SS_ABS, SS_ASHIFT, US_ASHIFT, US_MINUS, SS_TRUNCATE, US_TRUNCATE, FMA,
>> +    VAR_LOCATION, DEBUG_IMPLICIT_PTR, ENTRY_VALUE, LAST_AND_UNUSED_RTX_CODE
>> +};
>> +union rtunion_def
>> +{
>> +  rtx rt_rtx;
>> +};
>> +typedef union rtunion_def rtunion;
>> +struct rtx_def
>> +{
>> +  __extension__ enum rtx_code code:16;
>> +  union u
>> +  {
>> +    rtunion fld[1];
>> +  }
>> +  u;
>> +};
>> +
>> +unsigned int rtx_cost (rtx, enum rtx_code, unsigned char);
>> +rtx single_set_2 (const_rtx, rtx);
>> +
>> +unsigned
>> +seq_cost (const_rtx seq, unsigned char speed)
>> +{
>> +  unsigned cost = 0;
>> +  rtx set;
>> +  for (; seq; seq = (((seq)->u.fld[2]).rt_rtx))
>> +    {
>> +      set =
>> +       (((((enum rtx_code) (seq)->code) == INSN)
>> +         || (((enum rtx_code) (seq)->code) == DEBUG_INSN)
>> +         || (((enum rtx_code) (seq)->code) == JUMP_INSN)
>> +         || (((enum rtx_code) (seq)->code) ==
>> +             CALL_INSN)) ? (((enum rtx_code) ((((seq)->u.fld[4]).rt_rtx))->
>> +                             code) ==
>> +                            SET ? (((seq)->u.fld[4]).
>> +                                   rt_rtx) : single_set_2 (seq,
>> +                                                           (((seq)->u.
>> +                                                             fld[4]).
>> +                                                            rt_rtx))) :
>> (rtx)
>> +        0);
>> +      if (set)
>> +       cost += rtx_cost (set, SET, speed);
>> +    }
>> +}
>> diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
>> index 5be6ee4..9128094 100644
>> --- a/gcc/tree-ssa-threadbackward.c
>> +++ b/gcc/tree-ssa-threadbackward.c
>> @@ -211,10 +211,6 @@ fsm_find_control_statement_thread_paths (tree name,
>>         continue;
>>
>>        int path_length = path->length ();
>> -      /* A path with less than 2 basic blocks should not be jump-threaded.
>> */
>> -      if (path_length < 2)
>> -       continue;
>> -
>>        if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
>>         {
>>           if (dump_file && (dump_flags & TDF_DETAILS))
>>
Jeff Law Oct. 14, 2015, 12:42 p.m. UTC | #3
On 10/14/2015 04:16 AM, Richard Biener wrote:
> On Tue, Oct 13, 2015 at 2:52 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Tue, Oct 13, 2015 at 2:21 PM, Jeff Law <law@redhat.com> wrote:
>>>
>>> One of the cases that was missing in the FSM support is threading when the
>>> path is a single block.  ie, a control statement's output can be statically
>>> determined just by looking at PHIs in the control statement's block for one
>>> or incoming edges.
>>>
>>> This is necessary to fix a regression if I turn off the old jump threader's
>>> backedge support.  Just as important, Jan has in the past asked about a
>>> trivial jump threader to be run during early optimizations.  Limiting the
>>> FSM bits to this case would likely satisfy that need in the future.
>>
>> I think he asked for trivial forward threads though due to repeated tests.
>> I hacked FRE to do this (I think), but maybe some trivial cleanup opportunities
>> are still left here.  Honza?
>
> This or other related patches in the range r228731:228774 has caused a quite
> big jump in SPEC CPU 2000 binary sizes (notably 176.gcc - so maybe affecting
> bootstrap as well, at -O3).  Are you sure this doesn't re-introduce DOM
> effectively peeling all loops once?
It's possible.  I've actually got a patch in overnight testing that 
introduces some of the heuristics to avoid mucking up loops to the FSM bits.

jeff
Richard Biener Oct. 14, 2015, 12:46 p.m. UTC | #4
On Wed, Oct 14, 2015 at 2:42 PM, Jeff Law <law@redhat.com> wrote:
> On 10/14/2015 04:16 AM, Richard Biener wrote:
>>
>> On Tue, Oct 13, 2015 at 2:52 PM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>>
>>> On Tue, Oct 13, 2015 at 2:21 PM, Jeff Law <law@redhat.com> wrote:
>>>>
>>>>
>>>> One of the cases that was missing in the FSM support is threading when
>>>> the
>>>> path is a single block.  ie, a control statement's output can be
>>>> statically
>>>> determined just by looking at PHIs in the control statement's block for
>>>> one
>>>> or incoming edges.
>>>>
>>>> This is necessary to fix a regression if I turn off the old jump
>>>> threader's
>>>> backedge support.  Just as important, Jan has in the past asked about a
>>>> trivial jump threader to be run during early optimizations.  Limiting
>>>> the
>>>> FSM bits to this case would likely satisfy that need in the future.
>>>
>>>
>>> I think he asked for trivial forward threads though due to repeated
>>> tests.
>>> I hacked FRE to do this (I think), but maybe some trivial cleanup
>>> opportunities
>>> are still left here.  Honza?
>>
>>
>> This or other related patches in the range r228731:228774 has caused a
>> quite
>> big jump in SPEC CPU 2000 binary sizes (notably 176.gcc - so maybe
>> affecting
>> bootstrap as well, at -O3).  Are you sure this doesn't re-introduce DOM
>> effectively peeling all loops once?
>
> It's possible.  I've actually got a patch in overnight testing that
> introduces some of the heuristics to avoid mucking up loops to the FSM bits.

Like never threading a loop exit test to the loop header (but only to the exit).
At least if it is the only exit in the loop (but maybe better for all exits).

Richard.

> jeff
>
Jeff Law Oct. 14, 2015, 12:55 p.m. UTC | #5
On 10/14/2015 06:46 AM, Richard Biener wrote:
>>> This or other related patches in the range r228731:228774 has caused a
>>> quite
>>> big jump in SPEC CPU 2000 binary sizes (notably 176.gcc - so maybe
>>> affecting
>>> bootstrap as well, at -O3).  Are you sure this doesn't re-introduce DOM
>>> effectively peeling all loops once?
>>
>> It's possible.  I've actually got a patch in overnight testing that
>> introduces some of the heuristics to avoid mucking up loops to the FSM bits.
>
> Like never threading a loop exit test to the loop header (but only to the exit).
> At least if it is the only exit in the loop (but maybe better for all exits).
Right.  The FSM bits are totally missing the restrictions on threading 
through the header or latch that we added to the old style threader many 
years ago.

That was OK as the FSM bits weren't used all that much and primarily 
were concerned with removing the multi-way branch.  With the move 
towards generalizing that code, we need the restrictions.   I expect to 
commit the restrictions today after some minor adjustments.

jeff
Jan Hubicka Oct. 14, 2015, 3:43 p.m. UTC | #6
> >>> I think he asked for trivial forward threads though due to repeated
> >>> tests.
> >>> I hacked FRE to do this (I think), but maybe some trivial cleanup
> >>> opportunities
> >>> are still left here.  Honza?

Well, unthreaded jumps quite confuse profile prediction and create profiles
that we can't fix later. An of course they count in time (and size sometimes)
estimates.

From cases I commonly see it is the usual lazyness of repeated tests comming
from early inlining/macro expansion and also C++ love to introduce

  if (ptr != NULL)
    ptr2 = &ptr->foo;
  else
    ptr2 = NULL

for instances of multiple inheritance. usually ptr is known to be non-NULL.
And also cases where if is uses to check individual cases without having proper
esles.

Honza
> >>
> >>
> >> This or other related patches in the range r228731:228774 has caused a
> >> quite
> >> big jump in SPEC CPU 2000 binary sizes (notably 176.gcc - so maybe
> >> affecting
> >> bootstrap as well, at -O3).  Are you sure this doesn't re-introduce DOM
> >> effectively peeling all loops once?
> >
> > It's possible.  I've actually got a patch in overnight testing that
> > introduces some of the heuristics to avoid mucking up loops to the FSM bits.
> 
> Like never threading a loop exit test to the loop header (but only to the exit).
> At least if it is the only exit in the loop (but maybe better for all exits).
> 
> Richard.
> 
> > jeff
> >
Jeff Law Oct. 14, 2015, 3:53 p.m. UTC | #7
On 10/14/2015 09:43 AM, Jan Hubicka wrote:
>>>>> I think he asked for trivial forward threads though due to repeated
>>>>> tests.
>>>>> I hacked FRE to do this (I think), but maybe some trivial cleanup
>>>>> opportunities
>>>>> are still left here.  Honza?
>
> Well, unthreaded jumps quite confuse profile prediction and create profiles
> that we can't fix later. An of course they count in time (and size sometimes)
> estimates.
>
>  From cases I commonly see it is the usual lazyness of repeated tests comming
> from early inlining/macro expansion and also C++ love to introduce
>
>    if (ptr != NULL)
>      ptr2 = &ptr->foo;
>    else
>      ptr2 = NULL
>
> for instances of multiple inheritance. usually ptr is known to be non-NULL.
> And also cases where if is uses to check individual cases without having proper
> esles.
Yea.  I still  see a variety of trivial jump threads lying around early 
in the pipeline.

The nice thing about the backwards walking stuff in this context is we 
can control how hard it looks for jump threads much better.

The difficult thing is it's not currently prepared to find the implicit 
sets from conditionals.  Re-using the ASSERT_EXPR mechanisms from vrp 
may be the solution.  I haven't tried that yet, but it's in the back of 
my mind for solving that class of problems cleanly.



jeff
Richard Biener Oct. 15, 2015, 8:28 a.m. UTC | #8
On Wed, Oct 14, 2015 at 5:53 PM, Jeff Law <law@redhat.com> wrote:
> On 10/14/2015 09:43 AM, Jan Hubicka wrote:
>>>>>>
>>>>>> I think he asked for trivial forward threads though due to repeated
>>>>>> tests.
>>>>>> I hacked FRE to do this (I think), but maybe some trivial cleanup
>>>>>> opportunities
>>>>>> are still left here.  Honza?
>>
>>
>> Well, unthreaded jumps quite confuse profile prediction and create
>> profiles
>> that we can't fix later. An of course they count in time (and size
>> sometimes)
>> estimates.
>>
>>  From cases I commonly see it is the usual lazyness of repeated tests
>> comming
>> from early inlining/macro expansion and also C++ love to introduce
>>
>>    if (ptr != NULL)
>>      ptr2 = &ptr->foo;
>>    else
>>      ptr2 = NULL
>>
>> for instances of multiple inheritance. usually ptr is known to be
>> non-NULL.
>> And also cases where if is uses to check individual cases without having
>> proper
>> esles.
>
> Yea.  I still  see a variety of trivial jump threads lying around early in
> the pipeline.
>
> The nice thing about the backwards walking stuff in this context is we can
> control how hard it looks for jump threads much better.
>
> The difficult thing is it's not currently prepared to find the implicit sets
> from conditionals.  Re-using the ASSERT_EXPR mechanisms from vrp may be the
> solution.  I haven't tried that yet, but it's in the back of my mind for
> solving that class of problems cleanly.

Now that FRE performs a DOM walk when value-numbering and considers
temporary equivalences (well, ok, it does not, it only records known true/false
predicates) one could implement forward jump-threading quite easily in FRE
I think (the value-numbering already will work like if we have
threaded the jump,
we just won't do the actual threading / path duplication).

Do you have any actual testcases I can have a look at?

I suppose running DOM after FRE1 and looking for "extra" optimizations
done on the testsuite would do the trick but is also likely to just uncover
"complex" cases.

Richard.

>
>
> jeff
>
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d71bcd2..caab533 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@ 
+2015-10-13  Jeff Law  <law@redhat.com>
+
+	* tree-ssa-threadbackward.c (fsm_find_control_statement_thread_paths):
+	Allow single block jump threading paths.
+
 2015-10-13  Tom de Vries  <tom@codesourcery.com>
 
 	PR tree-optimization/67476
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4a08f0f..acf6df5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@ 
+2015-10-13  Jeff Law  <law@redhat.com>
+
+	* gcc.dg/tree-ssa/ssa-thread-13.c: New test.
+
 2015-10-12  Jeff Law  <law@redhat.com>
 
 	* gcc.dg/tree-ssa/ssa-thread-12.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
new file mode 100644
index 0000000..5051d11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-13.c
@@ -0,0 +1,70 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details" } */
+/* { dg-final { scan-tree-dump "FSM" "vrp1" } } */
+
+typedef struct rtx_def *rtx;
+typedef const struct rtx_def *const_rtx;
+enum rtx_code
+{
+  UNKNOWN, VALUE, DEBUG_EXPR, EXPR_LIST, INSN_LIST, SEQUENCE, ADDRESS,
+    DEBUG_INSN, INSN, JUMP_INSN, CALL_INSN, BARRIER, CODE_LABEL, NOTE,
+    COND_EXEC, PARALLEL, ASM_INPUT, ASM_OPERANDS, UNSPEC, UNSPEC_VOLATILE,
+    ADDR_VEC, ADDR_DIFF_VEC, PREFETCH, SET, USE, CLOBBER, CALL, RETURN,
+    EH_RETURN, TRAP_IF, CONST_INT, CONST_FIXED, CONST_DOUBLE, CONST_VECTOR,
+    CONST_STRING, CONST, PC, REG, SCRATCH, SUBREG, STRICT_LOW_PART, CONCAT,
+    CONCATN, MEM, LABEL_REF, SYMBOL_REF, CC0, IF_THEN_ELSE, COMPARE, PLUS,
+    MINUS, NEG, MULT, SS_MULT, US_MULT, DIV, SS_DIV, US_DIV, MOD, UDIV, UMOD,
+    AND, IOR, XOR, NOT, ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, ROTATERT, SMIN,
+    SMAX, UMIN, UMAX, PRE_DEC, PRE_INC, POST_DEC, POST_INC, PRE_MODIFY,
+    POST_MODIFY, NE, EQ, GE, GT, LE, LT, GEU, GTU, LEU, LTU, UNORDERED,
+    ORDERED, UNEQ, UNGE, UNGT, UNLE, UNLT, LTGT, SIGN_EXTEND, ZERO_EXTEND,
+    TRUNCATE, FLOAT_EXTEND, FLOAT_TRUNCATE, FLOAT, FIX, UNSIGNED_FLOAT,
+    UNSIGNED_FIX, FRACT_CONVERT, UNSIGNED_FRACT_CONVERT, SAT_FRACT,
+    UNSIGNED_SAT_FRACT, ABS, SQRT, BSWAP, FFS, CLZ, CTZ, POPCOUNT, PARITY,
+    SIGN_EXTRACT, ZERO_EXTRACT, HIGH, LO_SUM, VEC_MERGE, VEC_SELECT,
+    VEC_CONCAT, VEC_DUPLICATE, SS_PLUS, US_PLUS, SS_MINUS, SS_NEG, US_NEG,
+    SS_ABS, SS_ASHIFT, US_ASHIFT, US_MINUS, SS_TRUNCATE, US_TRUNCATE, FMA,
+    VAR_LOCATION, DEBUG_IMPLICIT_PTR, ENTRY_VALUE, LAST_AND_UNUSED_RTX_CODE
+};
+union rtunion_def
+{
+  rtx rt_rtx;
+};
+typedef union rtunion_def rtunion;
+struct rtx_def
+{
+  __extension__ enum rtx_code code:16;
+  union u
+  {
+    rtunion fld[1];
+  }
+  u;
+};
+
+unsigned int rtx_cost (rtx, enum rtx_code, unsigned char);
+rtx single_set_2 (const_rtx, rtx);
+
+unsigned
+seq_cost (const_rtx seq, unsigned char speed)
+{
+  unsigned cost = 0;
+  rtx set;
+  for (; seq; seq = (((seq)->u.fld[2]).rt_rtx))
+    {
+      set =
+	(((((enum rtx_code) (seq)->code) == INSN)
+	  || (((enum rtx_code) (seq)->code) == DEBUG_INSN)
+	  || (((enum rtx_code) (seq)->code) == JUMP_INSN)
+	  || (((enum rtx_code) (seq)->code) ==
+	      CALL_INSN)) ? (((enum rtx_code) ((((seq)->u.fld[4]).rt_rtx))->
+			      code) ==
+			     SET ? (((seq)->u.fld[4]).
+				    rt_rtx) : single_set_2 (seq,
+							    (((seq)->u.
+							      fld[4]).
+							     rt_rtx))) : (rtx)
+	 0);
+      if (set)
+	cost += rtx_cost (set, SET, speed);
+    }
+}
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 5be6ee4..9128094 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -211,10 +211,6 @@  fsm_find_control_statement_thread_paths (tree name,
 	continue;
 
       int path_length = path->length ();
-      /* A path with less than 2 basic blocks should not be jump-threaded.  */
-      if (path_length < 2)
-	continue;
-
       if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))