diff mbox

Fix RTL DSE compile time hog (PR rtl-optimization/48141)

Message ID 20110316115252.GI30899@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek March 16, 2011, 11:52 a.m. UTC
On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote:
> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote:
> >so how much time does this save?
> >
> >I agree that this is a useful simplification, but it seems unlikely to
> >be that important in real code.
> >it seems like the 5000 store test would in general provide a better
> >safety valve.
> 
> I think having both is a good idea.

Here is the second patch, ok for both to trunk if this one passes
bootstrap/regtest?

With just this patch alone on the testcase with the default
--param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds
in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds.
With the earlier patch in addition to this one the time in DSE1+DSE2
is 0.3 seconds no matter what the parameter is.

2011-03-16  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/48141
	* params.def (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES): New.
	* dse.c: Include params.h.
	(active_local_stores_len): New variable.
	(add_wild_read, dse_step1): Clear it when setting active_local_stores
	to NULL.
	(record_store, check_mem_read_rtx): Decrease it when removing
	from the chain.
	(scan_insn): Likewise.  Increase it when adding to chain, if it
	reaches PARAM_MAX_DSE_ACTIVE_LOCAL_STORES limit, set to 1 and
	set active_local_stores to NULL before the addition.
	* Makefile.in (dse.o): Depend on $(PARAMS_H).



	Jakub

Comments

Richard Biener March 16, 2011, 11:59 a.m. UTC | #1
On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote:
>> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote:
>> >so how much time does this save?
>> >
>> >I agree that this is a useful simplification, but it seems unlikely to
>> >be that important in real code.
>> >it seems like the 5000 store test would in general provide a better
>> >safety valve.
>>
>> I think having both is a good idea.
>
> Here is the second patch, ok for both to trunk if this one passes
> bootstrap/regtest?
>
> With just this patch alone on the testcase with the default
> --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds
> in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds.
> With the earlier patch in addition to this one the time in DSE1+DSE2
> is 0.3 seconds no matter what the parameter is.

That suggests we can avoid the new param?  (btw, we should start
thinking of emitting a diagnostic if we run into such artificial limits)

The earlier patch is ok for all branches.

Thanks,
Richard.

> 2011-03-16  Jakub Jelinek  <jakub@redhat.com>
>
>        PR rtl-optimization/48141
>        * params.def (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES): New.
>        * dse.c: Include params.h.
>        (active_local_stores_len): New variable.
>        (add_wild_read, dse_step1): Clear it when setting active_local_stores
>        to NULL.
>        (record_store, check_mem_read_rtx): Decrease it when removing
>        from the chain.
>        (scan_insn): Likewise.  Increase it when adding to chain, if it
>        reaches PARAM_MAX_DSE_ACTIVE_LOCAL_STORES limit, set to 1 and
>        set active_local_stores to NULL before the addition.
>        * Makefile.in (dse.o): Depend on $(PARAMS_H).
>
> --- gcc/params.def.jj   2011-02-15 15:42:27.000000000 +0100
> +++ gcc/params.def      2011-03-16 12:20:16.000000000 +0100
> @@ -698,6 +698,12 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS,
>         "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass",
>         100, 0, 0)
>
> +/* This is the maximum number of active local stores RTL DSE will consider.  */
> +DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES,
> +         "max-dse-active-local-stores",
> +         "Maximum number of active local stores in RTL dead store elimination",
> +         5000, 0, 0)
> +
>  /* Prefetching and cache-optimizations related parameters.  Default values are
>    usually set by machine description.  */
>
> --- gcc/dse.c.jj        2011-02-15 15:42:26.000000000 +0100
> +++ gcc/dse.c   2011-03-16 12:34:18.000000000 +0100
> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.
>  #include "optabs.h"
>  #include "dbgcnt.h"
>  #include "target.h"
> +#include "params.h"
>
>  /* This file contains three techniques for performing Dead Store
>    Elimination (dse).
> @@ -387,6 +388,7 @@ static alloc_pool insn_info_pool;
>  /* The linked list of stores that are under consideration in this
>    basic block.  */
>  static insn_info_t active_local_stores;
> +static int active_local_stores_len;
>
>  struct bb_info
>  {
> @@ -947,6 +949,7 @@ add_wild_read (bb_info_t bb_info)
>     }
>   insn_info->wild_read = true;
>   active_local_stores = NULL;
> +  active_local_stores_len = 0;
>  }
>
>
> @@ -1538,6 +1541,7 @@ record_store (rtx body, bb_info_t bb_inf
>        {
>          insn_info_t insn_to_delete = ptr;
>
> +         active_local_stores_len--;
>          if (last)
>            last->next_local_store = ptr->next_local_store;
>          else
> @@ -2074,6 +2078,7 @@ check_mem_read_rtx (rtx *loc, void *data
>              if (dump_file)
>                dump_insn_info ("removing from active", i_ptr);
>
> +             active_local_stores_len--;
>              if (last)
>                last->next_local_store = i_ptr->next_local_store;
>              else
> @@ -2163,6 +2168,7 @@ check_mem_read_rtx (rtx *loc, void *data
>              if (dump_file)
>                dump_insn_info ("removing from active", i_ptr);
>
> +             active_local_stores_len--;
>              if (last)
>                last->next_local_store = i_ptr->next_local_store;
>              else
> @@ -2222,6 +2228,7 @@ check_mem_read_rtx (rtx *loc, void *data
>              if (dump_file)
>                dump_insn_info ("removing from active", i_ptr);
>
> +             active_local_stores_len--;
>              if (last)
>                last->next_local_store = i_ptr->next_local_store;
>              else
> @@ -2426,6 +2433,7 @@ scan_insn (bb_info_t bb_info, rtx insn)
>                  if (dump_file)
>                    dump_insn_info ("removing from active", i_ptr);
>
> +                 active_local_stores_len--;
>                  if (last)
>                    last->next_local_store = i_ptr->next_local_store;
>                  else
> @@ -2453,6 +2461,12 @@ scan_insn (bb_info_t bb_info, rtx insn)
>                    fprintf (dump_file, "handling memset as BLKmode store\n");
>                  if (mems_found == 1)
>                    {
> +                     if (active_local_stores_len++
> +                         >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
> +                       {
> +                         active_local_stores_len = 1;
> +                         active_local_stores = NULL;
> +                       }
>                      insn_info->next_local_store = active_local_stores;
>                      active_local_stores = insn_info;
>                    }
> @@ -2496,6 +2510,12 @@ scan_insn (bb_info_t bb_info, rtx insn)
>      it as cannot delete.  This simplifies the processing later.  */
>   if (mems_found == 1)
>     {
> +      if (active_local_stores_len++
> +         >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
> +       {
> +         active_local_stores_len = 1;
> +         active_local_stores = NULL;
> +       }
>       insn_info->next_local_store = active_local_stores;
>       active_local_stores = insn_info;
>     }
> @@ -2534,6 +2554,7 @@ remove_useless_values (cselib_val *base)
>
>       if (del)
>        {
> +         active_local_stores_len--;
>          if (last)
>            last->next_local_store = insn_info->next_local_store;
>          else
> @@ -2584,6 +2605,7 @@ dse_step1 (void)
>            = create_alloc_pool ("cse_store_info_pool",
>                                 sizeof (struct store_info), 100);
>          active_local_stores = NULL;
> +         active_local_stores_len = 0;
>          cselib_clear_table ();
>
>          /* Scan the insns.  */
> --- gcc/Makefile.in.jj  2011-02-02 16:30:50.000000000 +0100
> +++ gcc/Makefile.in     2011-03-16 12:26:12.000000000 +0100
> @@ -3070,7 +3070,7 @@ dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co
>    $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \
>    $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \
>    $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \
> -   $(BITMAP_H)
> +   $(BITMAP_H) $(PARAMS_H)
>  fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
>    $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \
>    output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H) \
>
>
>        Jakub
>
Paolo Bonzini March 16, 2011, 12:19 p.m. UTC | #2
On 03/16/2011 12:59 PM, Richard Guenther wrote:
> On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek<jakub@redhat.com>  wrote:
>> On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote:
>>> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote:
>>>> so how much time does this save?
>>>>
>>>> I agree that this is a useful simplification, but it seems unlikely to
>>>> be that important in real code.
>>>> it seems like the 5000 store test would in general provide a better
>>>> safety valve.
>>>
>>> I think having both is a good idea.
>>
>> Here is the second patch, ok for both to trunk if this one passes
>> bootstrap/regtest?
>>
>> With just this patch alone on the testcase with the default
>> --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds
>> in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds.
>> With the earlier patch in addition to this one the time in DSE1+DSE2
>> is 0.3 seconds no matter what the parameter is.
>
> That suggests we can avoid the new param?  (btw, we should start
> thinking of emitting a diagnostic if we run into such artificial limits)

Jakub's behavior is expected, since the number of active local stores 
should be 1 with the earlier patch in that testcase.  However, similar 
artificial testcases can probably be constructed that still trigger the 
ill behavior.

But I agree that the first patch is enough for now, and in general for 
release branches, especially without a testcase for which DSE is really 
the top hog.

Thanks,

Paolo
Jakub Jelinek March 16, 2011, 1:09 p.m. UTC | #3
On Wed, Mar 16, 2011 at 01:19:30PM +0100, Paolo Bonzini wrote:
> Jakub's behavior is expected, since the number of active local
> stores should be 1 with the earlier patch in that testcase.
> However, similar artificial testcases can probably be constructed
> that still trigger the ill behavior.

Note the original testcase wasn't so artificial, some customer was actually
using it to create very large functions, presumably to test linker behavior
or whatever.  In the original there were even more stores in each function,
many of those functions and several sources with them, so it took many hours
in DSE.

Here is an artificial testcase which shows that just the first patch isn't
enough:

/* PR rtl-optimization/48141 */
/* { dg-do compile } */
/* { dg-options "-O -fno-tree-fre" } */

#define A(n) volatile int n = 0;
#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9)
#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9)
#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9)
#define E(n) D(n##0) D(n##1) D(n##2) D(n##3) D(n##4) D(n##5) D(n##6) D(n##7) D(n##8) D(n##9)
#define F(n) E(n##0) E(n##1) E(n##2) E(n##3) E(n##4) E(n##5) E(n##6) E(n##7) E(n##8) E(n##9)

int
foo (void)
{
  F(i)
  return 0;
}

with both patches in -O -fno-tree-fre --param=max-dse-active-local-stores=10
this takes 16.68 seconds to compile, out of which 0.46s is spent in
DSE1+DSE2.
With -O -fno-tree-fre --param=max-dse-active-local-stores=10000000 it
compiles in in 165.80s, DSE1+DSE2 time is 149.55s.  -fno-tree-fre is
here because that pass (in particular alias stmt walking from fre) is
terribly expensive as well on this testcase.

	Jakub
Kenneth Zadeck March 16, 2011, 1:27 p.m. UTC | #4
i think that i sympathize with richard's "do we really need another 
parm?" whine.  I would have just nailed it in without the parm.   I 
think that it is unreasonable to expect that a program with basic blocks 
this big needs the full range of optimizations performed.

It is perfectly reasonable to have the quality of the optimization 
degrade as a function of size.

If you want to tie the trigger to something, then leave out the parm and 
tie it -O3.


On 03/16/2011 07:59 AM, Richard Guenther wrote:
> On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek<jakub@redhat.com>  wrote:
>> On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote:
>>> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote:
>>>> so how much time does this save?
>>>>
>>>> I agree that this is a useful simplification, but it seems unlikely to
>>>> be that important in real code.
>>>> it seems like the 5000 store test would in general provide a better
>>>> safety valve.
>>> I think having both is a good idea.
>> Here is the second patch, ok for both to trunk if this one passes
>> bootstrap/regtest?
>>
>> With just this patch alone on the testcase with the default
>> --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds
>> in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds.
>> With the earlier patch in addition to this one the time in DSE1+DSE2
>> is 0.3 seconds no matter what the parameter is.
> That suggests we can avoid the new param?  (btw, we should start
> thinking of emitting a diagnostic if we run into such artificial limits)
>
> The earlier patch is ok for all branches.
>
> Thanks,
> Richard.
>
>> 2011-03-16  Jakub Jelinek<jakub@redhat.com>
>>
>>         PR rtl-optimization/48141
>>         * params.def (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES): New.
>>         * dse.c: Include params.h.
>>         (active_local_stores_len): New variable.
>>         (add_wild_read, dse_step1): Clear it when setting active_local_stores
>>         to NULL.
>>         (record_store, check_mem_read_rtx): Decrease it when removing
>>         from the chain.
>>         (scan_insn): Likewise.  Increase it when adding to chain, if it
>>         reaches PARAM_MAX_DSE_ACTIVE_LOCAL_STORES limit, set to 1 and
>>         set active_local_stores to NULL before the addition.
>>         * Makefile.in (dse.o): Depend on $(PARAMS_H).
>>
>> --- gcc/params.def.jj   2011-02-15 15:42:27.000000000 +0100
>> +++ gcc/params.def      2011-03-16 12:20:16.000000000 +0100
>> @@ -698,6 +698,12 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS,
>>          "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass",
>>          100, 0, 0)
>>
>> +/* This is the maximum number of active local stores RTL DSE will consider.  */
>> +DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES,
>> +         "max-dse-active-local-stores",
>> +         "Maximum number of active local stores in RTL dead store elimination",
>> +         5000, 0, 0)
>> +
>>   /* Prefetching and cache-optimizations related parameters.  Default values are
>>     usually set by machine description.  */
>>
>> --- gcc/dse.c.jj        2011-02-15 15:42:26.000000000 +0100
>> +++ gcc/dse.c   2011-03-16 12:34:18.000000000 +0100
>> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.
>>   #include "optabs.h"
>>   #include "dbgcnt.h"
>>   #include "target.h"
>> +#include "params.h"
>>
>>   /* This file contains three techniques for performing Dead Store
>>     Elimination (dse).
>> @@ -387,6 +388,7 @@ static alloc_pool insn_info_pool;
>>   /* The linked list of stores that are under consideration in this
>>     basic block.  */
>>   static insn_info_t active_local_stores;
>> +static int active_local_stores_len;
>>
>>   struct bb_info
>>   {
>> @@ -947,6 +949,7 @@ add_wild_read (bb_info_t bb_info)
>>      }
>>    insn_info->wild_read = true;
>>    active_local_stores = NULL;
>> +  active_local_stores_len = 0;
>>   }
>>
>>
>> @@ -1538,6 +1541,7 @@ record_store (rtx body, bb_info_t bb_inf
>>         {
>>           insn_info_t insn_to_delete = ptr;
>>
>> +         active_local_stores_len--;
>>           if (last)
>>             last->next_local_store = ptr->next_local_store;
>>           else
>> @@ -2074,6 +2078,7 @@ check_mem_read_rtx (rtx *loc, void *data
>>               if (dump_file)
>>                 dump_insn_info ("removing from active", i_ptr);
>>
>> +             active_local_stores_len--;
>>               if (last)
>>                 last->next_local_store = i_ptr->next_local_store;
>>               else
>> @@ -2163,6 +2168,7 @@ check_mem_read_rtx (rtx *loc, void *data
>>               if (dump_file)
>>                 dump_insn_info ("removing from active", i_ptr);
>>
>> +             active_local_stores_len--;
>>               if (last)
>>                 last->next_local_store = i_ptr->next_local_store;
>>               else
>> @@ -2222,6 +2228,7 @@ check_mem_read_rtx (rtx *loc, void *data
>>               if (dump_file)
>>                 dump_insn_info ("removing from active", i_ptr);
>>
>> +             active_local_stores_len--;
>>               if (last)
>>                 last->next_local_store = i_ptr->next_local_store;
>>               else
>> @@ -2426,6 +2433,7 @@ scan_insn (bb_info_t bb_info, rtx insn)
>>                   if (dump_file)
>>                     dump_insn_info ("removing from active", i_ptr);
>>
>> +                 active_local_stores_len--;
>>                   if (last)
>>                     last->next_local_store = i_ptr->next_local_store;
>>                   else
>> @@ -2453,6 +2461,12 @@ scan_insn (bb_info_t bb_info, rtx insn)
>>                     fprintf (dump_file, "handling memset as BLKmode store\n");
>>                   if (mems_found == 1)
>>                     {
>> +                     if (active_local_stores_len++
>> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
>> +                       {
>> +                         active_local_stores_len = 1;
>> +                         active_local_stores = NULL;
>> +                       }
>>                       insn_info->next_local_store = active_local_stores;
>>                       active_local_stores = insn_info;
>>                     }
>> @@ -2496,6 +2510,12 @@ scan_insn (bb_info_t bb_info, rtx insn)
>>       it as cannot delete.  This simplifies the processing later.  */
>>    if (mems_found == 1)
>>      {
>> +      if (active_local_stores_len++
>> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
>> +       {
>> +         active_local_stores_len = 1;
>> +         active_local_stores = NULL;
>> +       }
>>        insn_info->next_local_store = active_local_stores;
>>        active_local_stores = insn_info;
>>      }
>> @@ -2534,6 +2554,7 @@ remove_useless_values (cselib_val *base)
>>
>>        if (del)
>>         {
>> +         active_local_stores_len--;
>>           if (last)
>>             last->next_local_store = insn_info->next_local_store;
>>           else
>> @@ -2584,6 +2605,7 @@ dse_step1 (void)
>>             = create_alloc_pool ("cse_store_info_pool",
>>                                  sizeof (struct store_info), 100);
>>           active_local_stores = NULL;
>> +         active_local_stores_len = 0;
>>           cselib_clear_table ();
>>
>>           /* Scan the insns.  */
>> --- gcc/Makefile.in.jj  2011-02-02 16:30:50.000000000 +0100
>> +++ gcc/Makefile.in     2011-03-16 12:26:12.000000000 +0100
>> @@ -3070,7 +3070,7 @@ dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co
>>     $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \
>>     $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \
>>     $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \
>> -   $(BITMAP_H)
>> +   $(BITMAP_H) $(PARAMS_H)
>>   fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
>>     $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \
>>     output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H) \
>>
>>
>>         Jakub
>>
Richard Biener March 16, 2011, 1:30 p.m. UTC | #5
On Wed, Mar 16, 2011 at 2:27 PM, Kenneth Zadeck
<zadeck@naturalbridge.com> wrote:
> i think that i sympathize with richard's "do we really need another parm?"
> whine.  I would have just nailed it in without the parm.   I think that it
> is unreasonable to expect that a program with basic blocks this big needs
> the full range of optimizations performed.
>
> It is perfectly reasonable to have the quality of the optimization degrade
> as a function of size.
>
> If you want to tie the trigger to something, then leave out the parm and tie
> it -O3.

I think having --param limits for this kind of stuff is exactly the right
thing to do.  We should try to improve both documentation (list
those params in a separate section) and diagnostics.

Richard.

>
> On 03/16/2011 07:59 AM, Richard Guenther wrote:
>>
>> On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek<jakub@redhat.com>  wrote:
>>>
>>> On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote:
>>>>
>>>> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote:
>>>>>
>>>>> so how much time does this save?
>>>>>
>>>>> I agree that this is a useful simplification, but it seems unlikely to
>>>>> be that important in real code.
>>>>> it seems like the 5000 store test would in general provide a better
>>>>> safety valve.
>>>>
>>>> I think having both is a good idea.
>>>
>>> Here is the second patch, ok for both to trunk if this one passes
>>> bootstrap/regtest?
>>>
>>> With just this patch alone on the testcase with the default
>>> --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds
>>> in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds.
>>> With the earlier patch in addition to this one the time in DSE1+DSE2
>>> is 0.3 seconds no matter what the parameter is.
>>
>> That suggests we can avoid the new param?  (btw, we should start
>> thinking of emitting a diagnostic if we run into such artificial limits)
>>
>> The earlier patch is ok for all branches.
>>
>> Thanks,
>> Richard.
>>
>>> 2011-03-16  Jakub Jelinek<jakub@redhat.com>
>>>
>>>        PR rtl-optimization/48141
>>>        * params.def (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES): New.
>>>        * dse.c: Include params.h.
>>>        (active_local_stores_len): New variable.
>>>        (add_wild_read, dse_step1): Clear it when setting
>>> active_local_stores
>>>        to NULL.
>>>        (record_store, check_mem_read_rtx): Decrease it when removing
>>>        from the chain.
>>>        (scan_insn): Likewise.  Increase it when adding to chain, if it
>>>        reaches PARAM_MAX_DSE_ACTIVE_LOCAL_STORES limit, set to 1 and
>>>        set active_local_stores to NULL before the addition.
>>>        * Makefile.in (dse.o): Depend on $(PARAMS_H).
>>>
>>> --- gcc/params.def.jj   2011-02-15 15:42:27.000000000 +0100
>>> +++ gcc/params.def      2011-03-16 12:20:16.000000000 +0100
>>> @@ -698,6 +698,12 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS,
>>>         "The maximum number of instructions ready to be issued to be
>>> considered by the scheduler during the first scheduling pass",
>>>         100, 0, 0)
>>>
>>> +/* This is the maximum number of active local stores RTL DSE will
>>> consider.  */
>>> +DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES,
>>> +         "max-dse-active-local-stores",
>>> +         "Maximum number of active local stores in RTL dead store
>>> elimination",
>>> +         5000, 0, 0)
>>> +
>>>  /* Prefetching and cache-optimizations related parameters.  Default
>>> values are
>>>    usually set by machine description.  */
>>>
>>> --- gcc/dse.c.jj        2011-02-15 15:42:26.000000000 +0100
>>> +++ gcc/dse.c   2011-03-16 12:34:18.000000000 +0100
>>> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.
>>>  #include "optabs.h"
>>>  #include "dbgcnt.h"
>>>  #include "target.h"
>>> +#include "params.h"
>>>
>>>  /* This file contains three techniques for performing Dead Store
>>>    Elimination (dse).
>>> @@ -387,6 +388,7 @@ static alloc_pool insn_info_pool;
>>>  /* The linked list of stores that are under consideration in this
>>>    basic block.  */
>>>  static insn_info_t active_local_stores;
>>> +static int active_local_stores_len;
>>>
>>>  struct bb_info
>>>  {
>>> @@ -947,6 +949,7 @@ add_wild_read (bb_info_t bb_info)
>>>     }
>>>   insn_info->wild_read = true;
>>>   active_local_stores = NULL;
>>> +  active_local_stores_len = 0;
>>>  }
>>>
>>>
>>> @@ -1538,6 +1541,7 @@ record_store (rtx body, bb_info_t bb_inf
>>>        {
>>>          insn_info_t insn_to_delete = ptr;
>>>
>>> +         active_local_stores_len--;
>>>          if (last)
>>>            last->next_local_store = ptr->next_local_store;
>>>          else
>>> @@ -2074,6 +2078,7 @@ check_mem_read_rtx (rtx *loc, void *data
>>>              if (dump_file)
>>>                dump_insn_info ("removing from active", i_ptr);
>>>
>>> +             active_local_stores_len--;
>>>              if (last)
>>>                last->next_local_store = i_ptr->next_local_store;
>>>              else
>>> @@ -2163,6 +2168,7 @@ check_mem_read_rtx (rtx *loc, void *data
>>>              if (dump_file)
>>>                dump_insn_info ("removing from active", i_ptr);
>>>
>>> +             active_local_stores_len--;
>>>              if (last)
>>>                last->next_local_store = i_ptr->next_local_store;
>>>              else
>>> @@ -2222,6 +2228,7 @@ check_mem_read_rtx (rtx *loc, void *data
>>>              if (dump_file)
>>>                dump_insn_info ("removing from active", i_ptr);
>>>
>>> +             active_local_stores_len--;
>>>              if (last)
>>>                last->next_local_store = i_ptr->next_local_store;
>>>              else
>>> @@ -2426,6 +2433,7 @@ scan_insn (bb_info_t bb_info, rtx insn)
>>>                  if (dump_file)
>>>                    dump_insn_info ("removing from active", i_ptr);
>>>
>>> +                 active_local_stores_len--;
>>>                  if (last)
>>>                    last->next_local_store = i_ptr->next_local_store;
>>>                  else
>>> @@ -2453,6 +2461,12 @@ scan_insn (bb_info_t bb_info, rtx insn)
>>>                    fprintf (dump_file, "handling memset as BLKmode
>>> store\n");
>>>                  if (mems_found == 1)
>>>                    {
>>> +                     if (active_local_stores_len++
>>> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
>>> +                       {
>>> +                         active_local_stores_len = 1;
>>> +                         active_local_stores = NULL;
>>> +                       }
>>>                      insn_info->next_local_store = active_local_stores;
>>>                      active_local_stores = insn_info;
>>>                    }
>>> @@ -2496,6 +2510,12 @@ scan_insn (bb_info_t bb_info, rtx insn)
>>>      it as cannot delete.  This simplifies the processing later.  */
>>>   if (mems_found == 1)
>>>     {
>>> +      if (active_local_stores_len++
>>> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
>>> +       {
>>> +         active_local_stores_len = 1;
>>> +         active_local_stores = NULL;
>>> +       }
>>>       insn_info->next_local_store = active_local_stores;
>>>       active_local_stores = insn_info;
>>>     }
>>> @@ -2534,6 +2554,7 @@ remove_useless_values (cselib_val *base)
>>>
>>>       if (del)
>>>        {
>>> +         active_local_stores_len--;
>>>          if (last)
>>>            last->next_local_store = insn_info->next_local_store;
>>>          else
>>> @@ -2584,6 +2605,7 @@ dse_step1 (void)
>>>            = create_alloc_pool ("cse_store_info_pool",
>>>                                 sizeof (struct store_info), 100);
>>>          active_local_stores = NULL;
>>> +         active_local_stores_len = 0;
>>>          cselib_clear_table ();
>>>
>>>          /* Scan the insns.  */
>>> --- gcc/Makefile.in.jj  2011-02-02 16:30:50.000000000 +0100
>>> +++ gcc/Makefile.in     2011-03-16 12:26:12.000000000 +0100
>>> @@ -3070,7 +3070,7 @@ dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co
>>>    $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h
>>> \
>>>    $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \
>>>    $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \
>>> -   $(BITMAP_H)
>>> +   $(BITMAP_H) $(PARAMS_H)
>>>  fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H)
>>> \
>>>    $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H)
>>> $(BASIC_BLOCK_H) \
>>>    output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H)
>>> \
>>>
>>>
>>>        Jakub
>>>
>
diff mbox

Patch

--- gcc/params.def.jj	2011-02-15 15:42:27.000000000 +0100
+++ gcc/params.def	2011-03-16 12:20:16.000000000 +0100
@@ -698,6 +698,12 @@  DEFPARAM(PARAM_MAX_SCHED_READY_INSNS,
 	 "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass",
 	 100, 0, 0)
 
+/* This is the maximum number of active local stores RTL DSE will consider.  */
+DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES,
+	  "max-dse-active-local-stores",
+	  "Maximum number of active local stores in RTL dead store elimination",
+	  5000, 0, 0)
+
 /* Prefetching and cache-optimizations related parameters.  Default values are
    usually set by machine description.  */
 
--- gcc/dse.c.jj	2011-02-15 15:42:26.000000000 +0100
+++ gcc/dse.c	2011-03-16 12:34:18.000000000 +0100
@@ -47,6 +47,7 @@  along with GCC; see the file COPYING3.  
 #include "optabs.h"
 #include "dbgcnt.h"
 #include "target.h"
+#include "params.h"
 
 /* This file contains three techniques for performing Dead Store
    Elimination (dse).
@@ -387,6 +388,7 @@  static alloc_pool insn_info_pool;
 /* The linked list of stores that are under consideration in this
    basic block.  */
 static insn_info_t active_local_stores;
+static int active_local_stores_len;
 
 struct bb_info
 {
@@ -947,6 +949,7 @@  add_wild_read (bb_info_t bb_info)
     }
   insn_info->wild_read = true;
   active_local_stores = NULL;
+  active_local_stores_len = 0;
 }
 
 
@@ -1538,6 +1541,7 @@  record_store (rtx body, bb_info_t bb_inf
 	{
 	  insn_info_t insn_to_delete = ptr;
 
+	  active_local_stores_len--;
 	  if (last)
 	    last->next_local_store = ptr->next_local_store;
 	  else
@@ -2074,6 +2078,7 @@  check_mem_read_rtx (rtx *loc, void *data
 	      if (dump_file)
 		dump_insn_info ("removing from active", i_ptr);
 
+	      active_local_stores_len--;
 	      if (last)
 		last->next_local_store = i_ptr->next_local_store;
 	      else
@@ -2163,6 +2168,7 @@  check_mem_read_rtx (rtx *loc, void *data
 	      if (dump_file)
 		dump_insn_info ("removing from active", i_ptr);
 
+	      active_local_stores_len--;
 	      if (last)
 		last->next_local_store = i_ptr->next_local_store;
 	      else
@@ -2222,6 +2228,7 @@  check_mem_read_rtx (rtx *loc, void *data
 	      if (dump_file)
 		dump_insn_info ("removing from active", i_ptr);
 
+	      active_local_stores_len--;
 	      if (last)
 		last->next_local_store = i_ptr->next_local_store;
 	      else
@@ -2426,6 +2433,7 @@  scan_insn (bb_info_t bb_info, rtx insn)
 		  if (dump_file)
 		    dump_insn_info ("removing from active", i_ptr);
 
+		  active_local_stores_len--;
 		  if (last)
 		    last->next_local_store = i_ptr->next_local_store;
 		  else
@@ -2453,6 +2461,12 @@  scan_insn (bb_info_t bb_info, rtx insn)
 		    fprintf (dump_file, "handling memset as BLKmode store\n");
 		  if (mems_found == 1)
 		    {
+		      if (active_local_stores_len++
+			  >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
+			{
+			  active_local_stores_len = 1;
+			  active_local_stores = NULL;
+			}
 		      insn_info->next_local_store = active_local_stores;
 		      active_local_stores = insn_info;
 		    }
@@ -2496,6 +2510,12 @@  scan_insn (bb_info_t bb_info, rtx insn)
      it as cannot delete.  This simplifies the processing later.  */
   if (mems_found == 1)
     {
+      if (active_local_stores_len++
+	  >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
+	{
+	  active_local_stores_len = 1;
+	  active_local_stores = NULL;
+	}
       insn_info->next_local_store = active_local_stores;
       active_local_stores = insn_info;
     }
@@ -2534,6 +2554,7 @@  remove_useless_values (cselib_val *base)
 
       if (del)
 	{
+	  active_local_stores_len--;
 	  if (last)
 	    last->next_local_store = insn_info->next_local_store;
 	  else
@@ -2584,6 +2605,7 @@  dse_step1 (void)
 	    = create_alloc_pool ("cse_store_info_pool",
 				 sizeof (struct store_info), 100);
 	  active_local_stores = NULL;
+	  active_local_stores_len = 0;
 	  cselib_clear_table ();
 
 	  /* Scan the insns.  */
--- gcc/Makefile.in.jj	2011-02-02 16:30:50.000000000 +0100
+++ gcc/Makefile.in	2011-03-16 12:26:12.000000000 +0100
@@ -3070,7 +3070,7 @@  dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co
    $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \
    $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \
    $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \
-   $(BITMAP_H)
+   $(BITMAP_H) $(PARAMS_H)
 fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \
    output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H) \