Message ID | 20110316115252.GI30899@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek <jakub@redhat.com> wrote: > On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote: >> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote: >> >so how much time does this save? >> > >> >I agree that this is a useful simplification, but it seems unlikely to >> >be that important in real code. >> >it seems like the 5000 store test would in general provide a better >> >safety valve. >> >> I think having both is a good idea. > > Here is the second patch, ok for both to trunk if this one passes > bootstrap/regtest? > > With just this patch alone on the testcase with the default > --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds > in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds. > With the earlier patch in addition to this one the time in DSE1+DSE2 > is 0.3 seconds no matter what the parameter is. That suggests we can avoid the new param? (btw, we should start thinking of emitting a diagnostic if we run into such artificial limits) The earlier patch is ok for all branches. Thanks, Richard. > 2011-03-16 Jakub Jelinek <jakub@redhat.com> > > PR rtl-optimization/48141 > * params.def (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES): New. > * dse.c: Include params.h. > (active_local_stores_len): New variable. > (add_wild_read, dse_step1): Clear it when setting active_local_stores > to NULL. > (record_store, check_mem_read_rtx): Decrease it when removing > from the chain. > (scan_insn): Likewise. Increase it when adding to chain, if it > reaches PARAM_MAX_DSE_ACTIVE_LOCAL_STORES limit, set to 1 and > set active_local_stores to NULL before the addition. > * Makefile.in (dse.o): Depend on $(PARAMS_H). > > --- gcc/params.def.jj 2011-02-15 15:42:27.000000000 +0100 > +++ gcc/params.def 2011-03-16 12:20:16.000000000 +0100 > @@ -698,6 +698,12 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS, > "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass", > 100, 0, 0) > > +/* This is the maximum number of active local stores RTL DSE will consider. */ > +DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES, > + "max-dse-active-local-stores", > + "Maximum number of active local stores in RTL dead store elimination", > + 5000, 0, 0) > + > /* Prefetching and cache-optimizations related parameters. Default values are > usually set by machine description. */ > > --- gcc/dse.c.jj 2011-02-15 15:42:26.000000000 +0100 > +++ gcc/dse.c 2011-03-16 12:34:18.000000000 +0100 > @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. > #include "optabs.h" > #include "dbgcnt.h" > #include "target.h" > +#include "params.h" > > /* This file contains three techniques for performing Dead Store > Elimination (dse). > @@ -387,6 +388,7 @@ static alloc_pool insn_info_pool; > /* The linked list of stores that are under consideration in this > basic block. */ > static insn_info_t active_local_stores; > +static int active_local_stores_len; > > struct bb_info > { > @@ -947,6 +949,7 @@ add_wild_read (bb_info_t bb_info) > } > insn_info->wild_read = true; > active_local_stores = NULL; > + active_local_stores_len = 0; > } > > > @@ -1538,6 +1541,7 @@ record_store (rtx body, bb_info_t bb_inf > { > insn_info_t insn_to_delete = ptr; > > + active_local_stores_len--; > if (last) > last->next_local_store = ptr->next_local_store; > else > @@ -2074,6 +2078,7 @@ check_mem_read_rtx (rtx *loc, void *data > if (dump_file) > dump_insn_info ("removing from active", i_ptr); > > + active_local_stores_len--; > if (last) > last->next_local_store = i_ptr->next_local_store; > else > @@ -2163,6 +2168,7 @@ check_mem_read_rtx (rtx *loc, void *data > if (dump_file) > dump_insn_info ("removing from active", i_ptr); > > + active_local_stores_len--; > if (last) > last->next_local_store = i_ptr->next_local_store; > else > @@ -2222,6 +2228,7 @@ check_mem_read_rtx (rtx *loc, void *data > if (dump_file) > dump_insn_info ("removing from active", i_ptr); > > + active_local_stores_len--; > if (last) > last->next_local_store = i_ptr->next_local_store; > else > @@ -2426,6 +2433,7 @@ scan_insn (bb_info_t bb_info, rtx insn) > if (dump_file) > dump_insn_info ("removing from active", i_ptr); > > + active_local_stores_len--; > if (last) > last->next_local_store = i_ptr->next_local_store; > else > @@ -2453,6 +2461,12 @@ scan_insn (bb_info_t bb_info, rtx insn) > fprintf (dump_file, "handling memset as BLKmode store\n"); > if (mems_found == 1) > { > + if (active_local_stores_len++ > + >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) > + { > + active_local_stores_len = 1; > + active_local_stores = NULL; > + } > insn_info->next_local_store = active_local_stores; > active_local_stores = insn_info; > } > @@ -2496,6 +2510,12 @@ scan_insn (bb_info_t bb_info, rtx insn) > it as cannot delete. This simplifies the processing later. */ > if (mems_found == 1) > { > + if (active_local_stores_len++ > + >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) > + { > + active_local_stores_len = 1; > + active_local_stores = NULL; > + } > insn_info->next_local_store = active_local_stores; > active_local_stores = insn_info; > } > @@ -2534,6 +2554,7 @@ remove_useless_values (cselib_val *base) > > if (del) > { > + active_local_stores_len--; > if (last) > last->next_local_store = insn_info->next_local_store; > else > @@ -2584,6 +2605,7 @@ dse_step1 (void) > = create_alloc_pool ("cse_store_info_pool", > sizeof (struct store_info), 100); > active_local_stores = NULL; > + active_local_stores_len = 0; > cselib_clear_table (); > > /* Scan the insns. */ > --- gcc/Makefile.in.jj 2011-02-02 16:30:50.000000000 +0100 > +++ gcc/Makefile.in 2011-03-16 12:26:12.000000000 +0100 > @@ -3070,7 +3070,7 @@ dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co > $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \ > $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \ > $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \ > - $(BITMAP_H) > + $(BITMAP_H) $(PARAMS_H) > fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ > $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \ > output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H) \ > > > Jakub >
On 03/16/2011 12:59 PM, Richard Guenther wrote: > On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek<jakub@redhat.com> wrote: >> On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote: >>> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote: >>>> so how much time does this save? >>>> >>>> I agree that this is a useful simplification, but it seems unlikely to >>>> be that important in real code. >>>> it seems like the 5000 store test would in general provide a better >>>> safety valve. >>> >>> I think having both is a good idea. >> >> Here is the second patch, ok for both to trunk if this one passes >> bootstrap/regtest? >> >> With just this patch alone on the testcase with the default >> --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds >> in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds. >> With the earlier patch in addition to this one the time in DSE1+DSE2 >> is 0.3 seconds no matter what the parameter is. > > That suggests we can avoid the new param? (btw, we should start > thinking of emitting a diagnostic if we run into such artificial limits) Jakub's behavior is expected, since the number of active local stores should be 1 with the earlier patch in that testcase. However, similar artificial testcases can probably be constructed that still trigger the ill behavior. But I agree that the first patch is enough for now, and in general for release branches, especially without a testcase for which DSE is really the top hog. Thanks, Paolo
On Wed, Mar 16, 2011 at 01:19:30PM +0100, Paolo Bonzini wrote: > Jakub's behavior is expected, since the number of active local > stores should be 1 with the earlier patch in that testcase. > However, similar artificial testcases can probably be constructed > that still trigger the ill behavior. Note the original testcase wasn't so artificial, some customer was actually using it to create very large functions, presumably to test linker behavior or whatever. In the original there were even more stores in each function, many of those functions and several sources with them, so it took many hours in DSE. Here is an artificial testcase which shows that just the first patch isn't enough: /* PR rtl-optimization/48141 */ /* { dg-do compile } */ /* { dg-options "-O -fno-tree-fre" } */ #define A(n) volatile int n = 0; #define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9) #define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9) #define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9) #define E(n) D(n##0) D(n##1) D(n##2) D(n##3) D(n##4) D(n##5) D(n##6) D(n##7) D(n##8) D(n##9) #define F(n) E(n##0) E(n##1) E(n##2) E(n##3) E(n##4) E(n##5) E(n##6) E(n##7) E(n##8) E(n##9) int foo (void) { F(i) return 0; } with both patches in -O -fno-tree-fre --param=max-dse-active-local-stores=10 this takes 16.68 seconds to compile, out of which 0.46s is spent in DSE1+DSE2. With -O -fno-tree-fre --param=max-dse-active-local-stores=10000000 it compiles in in 165.80s, DSE1+DSE2 time is 149.55s. -fno-tree-fre is here because that pass (in particular alias stmt walking from fre) is terribly expensive as well on this testcase. Jakub
i think that i sympathize with richard's "do we really need another parm?" whine. I would have just nailed it in without the parm. I think that it is unreasonable to expect that a program with basic blocks this big needs the full range of optimizations performed. It is perfectly reasonable to have the quality of the optimization degrade as a function of size. If you want to tie the trigger to something, then leave out the parm and tie it -O3. On 03/16/2011 07:59 AM, Richard Guenther wrote: > On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek<jakub@redhat.com> wrote: >> On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote: >>> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote: >>>> so how much time does this save? >>>> >>>> I agree that this is a useful simplification, but it seems unlikely to >>>> be that important in real code. >>>> it seems like the 5000 store test would in general provide a better >>>> safety valve. >>> I think having both is a good idea. >> Here is the second patch, ok for both to trunk if this one passes >> bootstrap/regtest? >> >> With just this patch alone on the testcase with the default >> --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds >> in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds. >> With the earlier patch in addition to this one the time in DSE1+DSE2 >> is 0.3 seconds no matter what the parameter is. > That suggests we can avoid the new param? (btw, we should start > thinking of emitting a diagnostic if we run into such artificial limits) > > The earlier patch is ok for all branches. > > Thanks, > Richard. > >> 2011-03-16 Jakub Jelinek<jakub@redhat.com> >> >> PR rtl-optimization/48141 >> * params.def (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES): New. >> * dse.c: Include params.h. >> (active_local_stores_len): New variable. >> (add_wild_read, dse_step1): Clear it when setting active_local_stores >> to NULL. >> (record_store, check_mem_read_rtx): Decrease it when removing >> from the chain. >> (scan_insn): Likewise. Increase it when adding to chain, if it >> reaches PARAM_MAX_DSE_ACTIVE_LOCAL_STORES limit, set to 1 and >> set active_local_stores to NULL before the addition. >> * Makefile.in (dse.o): Depend on $(PARAMS_H). >> >> --- gcc/params.def.jj 2011-02-15 15:42:27.000000000 +0100 >> +++ gcc/params.def 2011-03-16 12:20:16.000000000 +0100 >> @@ -698,6 +698,12 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS, >> "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass", >> 100, 0, 0) >> >> +/* This is the maximum number of active local stores RTL DSE will consider. */ >> +DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES, >> + "max-dse-active-local-stores", >> + "Maximum number of active local stores in RTL dead store elimination", >> + 5000, 0, 0) >> + >> /* Prefetching and cache-optimizations related parameters. Default values are >> usually set by machine description. */ >> >> --- gcc/dse.c.jj 2011-02-15 15:42:26.000000000 +0100 >> +++ gcc/dse.c 2011-03-16 12:34:18.000000000 +0100 >> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. >> #include "optabs.h" >> #include "dbgcnt.h" >> #include "target.h" >> +#include "params.h" >> >> /* This file contains three techniques for performing Dead Store >> Elimination (dse). >> @@ -387,6 +388,7 @@ static alloc_pool insn_info_pool; >> /* The linked list of stores that are under consideration in this >> basic block. */ >> static insn_info_t active_local_stores; >> +static int active_local_stores_len; >> >> struct bb_info >> { >> @@ -947,6 +949,7 @@ add_wild_read (bb_info_t bb_info) >> } >> insn_info->wild_read = true; >> active_local_stores = NULL; >> + active_local_stores_len = 0; >> } >> >> >> @@ -1538,6 +1541,7 @@ record_store (rtx body, bb_info_t bb_inf >> { >> insn_info_t insn_to_delete = ptr; >> >> + active_local_stores_len--; >> if (last) >> last->next_local_store = ptr->next_local_store; >> else >> @@ -2074,6 +2078,7 @@ check_mem_read_rtx (rtx *loc, void *data >> if (dump_file) >> dump_insn_info ("removing from active", i_ptr); >> >> + active_local_stores_len--; >> if (last) >> last->next_local_store = i_ptr->next_local_store; >> else >> @@ -2163,6 +2168,7 @@ check_mem_read_rtx (rtx *loc, void *data >> if (dump_file) >> dump_insn_info ("removing from active", i_ptr); >> >> + active_local_stores_len--; >> if (last) >> last->next_local_store = i_ptr->next_local_store; >> else >> @@ -2222,6 +2228,7 @@ check_mem_read_rtx (rtx *loc, void *data >> if (dump_file) >> dump_insn_info ("removing from active", i_ptr); >> >> + active_local_stores_len--; >> if (last) >> last->next_local_store = i_ptr->next_local_store; >> else >> @@ -2426,6 +2433,7 @@ scan_insn (bb_info_t bb_info, rtx insn) >> if (dump_file) >> dump_insn_info ("removing from active", i_ptr); >> >> + active_local_stores_len--; >> if (last) >> last->next_local_store = i_ptr->next_local_store; >> else >> @@ -2453,6 +2461,12 @@ scan_insn (bb_info_t bb_info, rtx insn) >> fprintf (dump_file, "handling memset as BLKmode store\n"); >> if (mems_found == 1) >> { >> + if (active_local_stores_len++ >> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) >> + { >> + active_local_stores_len = 1; >> + active_local_stores = NULL; >> + } >> insn_info->next_local_store = active_local_stores; >> active_local_stores = insn_info; >> } >> @@ -2496,6 +2510,12 @@ scan_insn (bb_info_t bb_info, rtx insn) >> it as cannot delete. This simplifies the processing later. */ >> if (mems_found == 1) >> { >> + if (active_local_stores_len++ >> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) >> + { >> + active_local_stores_len = 1; >> + active_local_stores = NULL; >> + } >> insn_info->next_local_store = active_local_stores; >> active_local_stores = insn_info; >> } >> @@ -2534,6 +2554,7 @@ remove_useless_values (cselib_val *base) >> >> if (del) >> { >> + active_local_stores_len--; >> if (last) >> last->next_local_store = insn_info->next_local_store; >> else >> @@ -2584,6 +2605,7 @@ dse_step1 (void) >> = create_alloc_pool ("cse_store_info_pool", >> sizeof (struct store_info), 100); >> active_local_stores = NULL; >> + active_local_stores_len = 0; >> cselib_clear_table (); >> >> /* Scan the insns. */ >> --- gcc/Makefile.in.jj 2011-02-02 16:30:50.000000000 +0100 >> +++ gcc/Makefile.in 2011-03-16 12:26:12.000000000 +0100 >> @@ -3070,7 +3070,7 @@ dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co >> $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \ >> $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \ >> $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \ >> - $(BITMAP_H) >> + $(BITMAP_H) $(PARAMS_H) >> fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ >> $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \ >> output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H) \ >> >> >> Jakub >>
On Wed, Mar 16, 2011 at 2:27 PM, Kenneth Zadeck <zadeck@naturalbridge.com> wrote: > i think that i sympathize with richard's "do we really need another parm?" > whine. I would have just nailed it in without the parm. I think that it > is unreasonable to expect that a program with basic blocks this big needs > the full range of optimizations performed. > > It is perfectly reasonable to have the quality of the optimization degrade > as a function of size. > > If you want to tie the trigger to something, then leave out the parm and tie > it -O3. I think having --param limits for this kind of stuff is exactly the right thing to do. We should try to improve both documentation (list those params in a separate section) and diagnostics. Richard. > > On 03/16/2011 07:59 AM, Richard Guenther wrote: >> >> On Wed, Mar 16, 2011 at 12:52 PM, Jakub Jelinek<jakub@redhat.com> wrote: >>> >>> On Wed, Mar 16, 2011 at 09:46:20AM +0100, Paolo Bonzini wrote: >>>> >>>> On 03/16/2011 12:12 AM, Kenneth Zadeck wrote: >>>>> >>>>> so how much time does this save? >>>>> >>>>> I agree that this is a useful simplification, but it seems unlikely to >>>>> be that important in real code. >>>>> it seems like the 5000 store test would in general provide a better >>>>> safety valve. >>>> >>>> I think having both is a good idea. >>> >>> Here is the second patch, ok for both to trunk if this one passes >>> bootstrap/regtest? >>> >>> With just this patch alone on the testcase with the default >>> --param=max-dse-active-local-stores=5000 cc1 spends 18.9 seconds >>> in DSE1+DSE2, with =1000 just 4 seconds and with =10 0.4 seconds. >>> With the earlier patch in addition to this one the time in DSE1+DSE2 >>> is 0.3 seconds no matter what the parameter is. >> >> That suggests we can avoid the new param? (btw, we should start >> thinking of emitting a diagnostic if we run into such artificial limits) >> >> The earlier patch is ok for all branches. >> >> Thanks, >> Richard. >> >>> 2011-03-16 Jakub Jelinek<jakub@redhat.com> >>> >>> PR rtl-optimization/48141 >>> * params.def (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES): New. >>> * dse.c: Include params.h. >>> (active_local_stores_len): New variable. >>> (add_wild_read, dse_step1): Clear it when setting >>> active_local_stores >>> to NULL. >>> (record_store, check_mem_read_rtx): Decrease it when removing >>> from the chain. >>> (scan_insn): Likewise. Increase it when adding to chain, if it >>> reaches PARAM_MAX_DSE_ACTIVE_LOCAL_STORES limit, set to 1 and >>> set active_local_stores to NULL before the addition. >>> * Makefile.in (dse.o): Depend on $(PARAMS_H). >>> >>> --- gcc/params.def.jj 2011-02-15 15:42:27.000000000 +0100 >>> +++ gcc/params.def 2011-03-16 12:20:16.000000000 +0100 >>> @@ -698,6 +698,12 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS, >>> "The maximum number of instructions ready to be issued to be >>> considered by the scheduler during the first scheduling pass", >>> 100, 0, 0) >>> >>> +/* This is the maximum number of active local stores RTL DSE will >>> consider. */ >>> +DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES, >>> + "max-dse-active-local-stores", >>> + "Maximum number of active local stores in RTL dead store >>> elimination", >>> + 5000, 0, 0) >>> + >>> /* Prefetching and cache-optimizations related parameters. Default >>> values are >>> usually set by machine description. */ >>> >>> --- gcc/dse.c.jj 2011-02-15 15:42:26.000000000 +0100 >>> +++ gcc/dse.c 2011-03-16 12:34:18.000000000 +0100 >>> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. >>> #include "optabs.h" >>> #include "dbgcnt.h" >>> #include "target.h" >>> +#include "params.h" >>> >>> /* This file contains three techniques for performing Dead Store >>> Elimination (dse). >>> @@ -387,6 +388,7 @@ static alloc_pool insn_info_pool; >>> /* The linked list of stores that are under consideration in this >>> basic block. */ >>> static insn_info_t active_local_stores; >>> +static int active_local_stores_len; >>> >>> struct bb_info >>> { >>> @@ -947,6 +949,7 @@ add_wild_read (bb_info_t bb_info) >>> } >>> insn_info->wild_read = true; >>> active_local_stores = NULL; >>> + active_local_stores_len = 0; >>> } >>> >>> >>> @@ -1538,6 +1541,7 @@ record_store (rtx body, bb_info_t bb_inf >>> { >>> insn_info_t insn_to_delete = ptr; >>> >>> + active_local_stores_len--; >>> if (last) >>> last->next_local_store = ptr->next_local_store; >>> else >>> @@ -2074,6 +2078,7 @@ check_mem_read_rtx (rtx *loc, void *data >>> if (dump_file) >>> dump_insn_info ("removing from active", i_ptr); >>> >>> + active_local_stores_len--; >>> if (last) >>> last->next_local_store = i_ptr->next_local_store; >>> else >>> @@ -2163,6 +2168,7 @@ check_mem_read_rtx (rtx *loc, void *data >>> if (dump_file) >>> dump_insn_info ("removing from active", i_ptr); >>> >>> + active_local_stores_len--; >>> if (last) >>> last->next_local_store = i_ptr->next_local_store; >>> else >>> @@ -2222,6 +2228,7 @@ check_mem_read_rtx (rtx *loc, void *data >>> if (dump_file) >>> dump_insn_info ("removing from active", i_ptr); >>> >>> + active_local_stores_len--; >>> if (last) >>> last->next_local_store = i_ptr->next_local_store; >>> else >>> @@ -2426,6 +2433,7 @@ scan_insn (bb_info_t bb_info, rtx insn) >>> if (dump_file) >>> dump_insn_info ("removing from active", i_ptr); >>> >>> + active_local_stores_len--; >>> if (last) >>> last->next_local_store = i_ptr->next_local_store; >>> else >>> @@ -2453,6 +2461,12 @@ scan_insn (bb_info_t bb_info, rtx insn) >>> fprintf (dump_file, "handling memset as BLKmode >>> store\n"); >>> if (mems_found == 1) >>> { >>> + if (active_local_stores_len++ >>> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) >>> + { >>> + active_local_stores_len = 1; >>> + active_local_stores = NULL; >>> + } >>> insn_info->next_local_store = active_local_stores; >>> active_local_stores = insn_info; >>> } >>> @@ -2496,6 +2510,12 @@ scan_insn (bb_info_t bb_info, rtx insn) >>> it as cannot delete. This simplifies the processing later. */ >>> if (mems_found == 1) >>> { >>> + if (active_local_stores_len++ >>> +>= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) >>> + { >>> + active_local_stores_len = 1; >>> + active_local_stores = NULL; >>> + } >>> insn_info->next_local_store = active_local_stores; >>> active_local_stores = insn_info; >>> } >>> @@ -2534,6 +2554,7 @@ remove_useless_values (cselib_val *base) >>> >>> if (del) >>> { >>> + active_local_stores_len--; >>> if (last) >>> last->next_local_store = insn_info->next_local_store; >>> else >>> @@ -2584,6 +2605,7 @@ dse_step1 (void) >>> = create_alloc_pool ("cse_store_info_pool", >>> sizeof (struct store_info), 100); >>> active_local_stores = NULL; >>> + active_local_stores_len = 0; >>> cselib_clear_table (); >>> >>> /* Scan the insns. */ >>> --- gcc/Makefile.in.jj 2011-02-02 16:30:50.000000000 +0100 >>> +++ gcc/Makefile.in 2011-03-16 12:26:12.000000000 +0100 >>> @@ -3070,7 +3070,7 @@ dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co >>> $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h >>> \ >>> $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \ >>> $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \ >>> - $(BITMAP_H) >>> + $(BITMAP_H) $(PARAMS_H) >>> fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) >>> \ >>> $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) >>> $(BASIC_BLOCK_H) \ >>> output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H) >>> \ >>> >>> >>> Jakub >>> >
--- gcc/params.def.jj 2011-02-15 15:42:27.000000000 +0100 +++ gcc/params.def 2011-03-16 12:20:16.000000000 +0100 @@ -698,6 +698,12 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS, "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass", 100, 0, 0) +/* This is the maximum number of active local stores RTL DSE will consider. */ +DEFPARAM (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES, + "max-dse-active-local-stores", + "Maximum number of active local stores in RTL dead store elimination", + 5000, 0, 0) + /* Prefetching and cache-optimizations related parameters. Default values are usually set by machine description. */ --- gcc/dse.c.jj 2011-02-15 15:42:26.000000000 +0100 +++ gcc/dse.c 2011-03-16 12:34:18.000000000 +0100 @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. #include "optabs.h" #include "dbgcnt.h" #include "target.h" +#include "params.h" /* This file contains three techniques for performing Dead Store Elimination (dse). @@ -387,6 +388,7 @@ static alloc_pool insn_info_pool; /* The linked list of stores that are under consideration in this basic block. */ static insn_info_t active_local_stores; +static int active_local_stores_len; struct bb_info { @@ -947,6 +949,7 @@ add_wild_read (bb_info_t bb_info) } insn_info->wild_read = true; active_local_stores = NULL; + active_local_stores_len = 0; } @@ -1538,6 +1541,7 @@ record_store (rtx body, bb_info_t bb_inf { insn_info_t insn_to_delete = ptr; + active_local_stores_len--; if (last) last->next_local_store = ptr->next_local_store; else @@ -2074,6 +2078,7 @@ check_mem_read_rtx (rtx *loc, void *data if (dump_file) dump_insn_info ("removing from active", i_ptr); + active_local_stores_len--; if (last) last->next_local_store = i_ptr->next_local_store; else @@ -2163,6 +2168,7 @@ check_mem_read_rtx (rtx *loc, void *data if (dump_file) dump_insn_info ("removing from active", i_ptr); + active_local_stores_len--; if (last) last->next_local_store = i_ptr->next_local_store; else @@ -2222,6 +2228,7 @@ check_mem_read_rtx (rtx *loc, void *data if (dump_file) dump_insn_info ("removing from active", i_ptr); + active_local_stores_len--; if (last) last->next_local_store = i_ptr->next_local_store; else @@ -2426,6 +2433,7 @@ scan_insn (bb_info_t bb_info, rtx insn) if (dump_file) dump_insn_info ("removing from active", i_ptr); + active_local_stores_len--; if (last) last->next_local_store = i_ptr->next_local_store; else @@ -2453,6 +2461,12 @@ scan_insn (bb_info_t bb_info, rtx insn) fprintf (dump_file, "handling memset as BLKmode store\n"); if (mems_found == 1) { + if (active_local_stores_len++ + >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) + { + active_local_stores_len = 1; + active_local_stores = NULL; + } insn_info->next_local_store = active_local_stores; active_local_stores = insn_info; } @@ -2496,6 +2510,12 @@ scan_insn (bb_info_t bb_info, rtx insn) it as cannot delete. This simplifies the processing later. */ if (mems_found == 1) { + if (active_local_stores_len++ + >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES)) + { + active_local_stores_len = 1; + active_local_stores = NULL; + } insn_info->next_local_store = active_local_stores; active_local_stores = insn_info; } @@ -2534,6 +2554,7 @@ remove_useless_values (cselib_val *base) if (del) { + active_local_stores_len--; if (last) last->next_local_store = insn_info->next_local_store; else @@ -2584,6 +2605,7 @@ dse_step1 (void) = create_alloc_pool ("cse_store_info_pool", sizeof (struct store_info), 100); active_local_stores = NULL; + active_local_stores_len = 0; cselib_clear_table (); /* Scan the insns. */ --- gcc/Makefile.in.jj 2011-02-02 16:30:50.000000000 +0100 +++ gcc/Makefile.in 2011-03-16 12:26:12.000000000 +0100 @@ -3070,7 +3070,7 @@ dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) co $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \ $(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) $(TIMEVAR_H) \ $(TREE_PASS_H) alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H) $(TARGET_H) \ - $(BITMAP_H) + $(BITMAP_H) $(PARAMS_H) fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(DIAGNOSTIC_CORE_H) insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \ output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) $(TREE_PASS_H) $(TARGET_H) \