Message ID | CAAe5K+VWqnrXetiq3kUE6Tbb45FOm-6ANh9HoJ4s25O1vJk33g@mail.gmail.com |
---|---|
State | New |
Headers | show |
> 2013-09-29 Teresa Johnson <tejohnson@google.com> > > * bb-reorder.c (find_rarely_executed_basic_blocks_and_crossing_edges): > Treat profile insanities conservatively. > * predict.c (probably_never_executed): New function. Treat profile > insanities conservatively. > (probably_never_executed_bb_p): Invoke probably_never_executed. > (probably_never_executed_edge_p): Invoke probably_never_executed. > > Index: bb-reorder.c > =================================================================== > --- bb-reorder.c (revision 202947) > +++ bb-reorder.c (working copy) > @@ -1564,8 +1564,25 @@ find_rarely_executed_basic_blocks_and_crossing_edg > /* Mark which partition (hot/cold) each basic block belongs in. */ > FOR_EACH_BB (bb) > { > + bool cold_bb = false; whitespace here > if (probably_never_executed_bb_p (cfun, bb)) > { > + /* Handle profile insanities created by upstream optimizations > + by also checking the incoming edge weights. If there is a non-cold > + incoming edge, conservatively prevent this block from being split > + into the cold section. */ > + cold_bb = true; > + FOR_EACH_EDGE (e, ei, bb->preds) > + { > + if (!probably_never_executed_edge_p (cfun, e)) > + { > + cold_bb = false; > + break; > + } > + } You can probably elimnate the extra braces. So we won't propagate deeper in the CFG, right? This change is OK. > + } > + if (cold_bb) > + { > BB_SET_PARTITION (bb, BB_COLD_PARTITION); > cold_bb_count++; > } > Index: predict.c > =================================================================== > --- predict.c (revision 202947) > +++ predict.c (working copy) > @@ -226,26 +226,26 @@ maybe_hot_edge_p (edge e) > } > > > -/* Return true in case BB is probably never executed. */ > > -bool > -probably_never_executed_bb_p (struct function *fun, const_basic_block bb) > +/* Return true if profile COUNT and FREQUENCY, or function FUN static > + node frequency reflects never being executed. */ > + > +static bool > +probably_never_executed (struct function *fun, > + gcov_type count, int frequency) > { > gcc_checking_assert (fun); > if (profile_status_for_function (fun) == PROFILE_READ) > { > - if ((bb->count * 4 + profile_info->runs / 2) / profile_info->runs > 0) > + if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0) > return false; > - if (!bb->frequency) > - return true; > - if (!ENTRY_BLOCK_PTR->frequency) > - return false; > - if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE) > - { > - return (RDIV (bb->frequency * ENTRY_BLOCK_PTR->count, > - ENTRY_BLOCK_PTR->frequency) > - < REG_BR_PROB_BASE / 4); > - } > + // If this is a profiled function (entry bb non-zero count), then base > + // the coldness decision on the frequency. This will handle cases where > + // counts are not updated properly during optimizations or expansion. > + if (ENTRY_BLOCK_PTR->count) > + return frequency == 0; > + // Unprofiled function, frequencies statically assigned. All bbs are > + // treated as cold. I would avoid combining C and C++ comments in the function. Did you get some data on how many basic blocks we now consider hot? The previous implemntation consdered block as never executed when frequencies indicates that it is executed in at most 1/4th of invocations of program. You essentially chnage to 1/10000. The first seems bit too high given the way we distribute probabilities in dojump and firends, second looks too low. The change introducing probably_never_executed with the current logic is OK. We may want to fine tune the ratio. Honza > return true; > } > if ((!profile_info || !flag_branch_probabilities) > @@ -256,19 +256,21 @@ maybe_hot_edge_p (edge e) > } > > > +/* Return true in case BB is probably never executed. */ > + > +bool > +probably_never_executed_bb_p (struct function *fun, const_basic_block bb) > +{ > + return probably_never_executed (fun, bb->count, bb->frequency); > +} > + > + > /* Return true in case edge E is probably never executed. */ > > bool > probably_never_executed_edge_p (struct function *fun, edge e) > { > - gcc_checking_assert (fun); > - if (profile_info && flag_branch_probabilities) > - return ((e->count + profile_info->runs / 2) / profile_info->runs) == 0; > - if ((!profile_info || !flag_branch_probabilities) > - && (cgraph_get_node (fun->decl)->frequency > - == NODE_FREQUENCY_UNLIKELY_EXECUTED)) > - return true; > - return false; > + return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e)); > } > > /* Return true if NODE should be optimized for size. */
On Wed, Oct 2, 2013 at 9:19 AM, Jan Hubicka <hubicka@ucw.cz> wrote: >> 2013-09-29 Teresa Johnson <tejohnson@google.com> >> >> * bb-reorder.c (find_rarely_executed_basic_blocks_and_crossing_edges): >> Treat profile insanities conservatively. >> * predict.c (probably_never_executed): New function. Treat profile >> insanities conservatively. >> (probably_never_executed_bb_p): Invoke probably_never_executed. >> (probably_never_executed_edge_p): Invoke probably_never_executed. >> >> Index: bb-reorder.c >> =================================================================== >> --- bb-reorder.c (revision 202947) >> +++ bb-reorder.c (working copy) >> @@ -1564,8 +1564,25 @@ find_rarely_executed_basic_blocks_and_crossing_edg >> /* Mark which partition (hot/cold) each basic block belongs in. */ >> FOR_EACH_BB (bb) >> { >> + bool cold_bb = false; > > whitespace here meaning add a line of whitespace? Ok, done. > >> if (probably_never_executed_bb_p (cfun, bb)) >> { >> + /* Handle profile insanities created by upstream optimizations >> + by also checking the incoming edge weights. If there is a non-cold >> + incoming edge, conservatively prevent this block from being split >> + into the cold section. */ >> + cold_bb = true; >> + FOR_EACH_EDGE (e, ei, bb->preds) >> + { >> + if (!probably_never_executed_edge_p (cfun, e)) >> + { >> + cold_bb = false; >> + break; >> + } >> + } > > You can probably elimnate the extra braces. > So we won't propagate deeper in the CFG, right? Done. > > This change is OK. > >> + } >> + if (cold_bb) >> + { >> BB_SET_PARTITION (bb, BB_COLD_PARTITION); >> cold_bb_count++; >> } >> Index: predict.c >> =================================================================== >> --- predict.c (revision 202947) >> +++ predict.c (working copy) >> @@ -226,26 +226,26 @@ maybe_hot_edge_p (edge e) >> } >> >> >> -/* Return true in case BB is probably never executed. */ >> >> -bool >> -probably_never_executed_bb_p (struct function *fun, const_basic_block bb) >> +/* Return true if profile COUNT and FREQUENCY, or function FUN static >> + node frequency reflects never being executed. */ >> + >> +static bool >> +probably_never_executed (struct function *fun, >> + gcov_type count, int frequency) >> { >> gcc_checking_assert (fun); >> if (profile_status_for_function (fun) == PROFILE_READ) >> { >> - if ((bb->count * 4 + profile_info->runs / 2) / profile_info->runs > 0) >> + if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0) >> return false; >> - if (!bb->frequency) >> - return true; >> - if (!ENTRY_BLOCK_PTR->frequency) >> - return false; >> - if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE) >> - { >> - return (RDIV (bb->frequency * ENTRY_BLOCK_PTR->count, >> - ENTRY_BLOCK_PTR->frequency) >> - < REG_BR_PROB_BASE / 4); >> - } >> + // If this is a profiled function (entry bb non-zero count), then base >> + // the coldness decision on the frequency. This will handle cases where >> + // counts are not updated properly during optimizations or expansion. >> + if (ENTRY_BLOCK_PTR->count) >> + return frequency == 0; >> + // Unprofiled function, frequencies statically assigned. All bbs are >> + // treated as cold. > > I would avoid combining C and C++ comments in the function. Fixed. > Did you get some data on how many basic blocks we now consider hot? No, I can do that. > > The previous implemntation consdered block as never executed when frequencies > indicates that it is executed in at most 1/4th of invocations of program. > You essentially chnage to 1/10000. The first seems bit too high given the > way we distribute probabilities in dojump and firends, second looks too low. But why do we want to consider blocks as "probably never executed" when the frequency suggests they are sometimes executed? AFAICT, there are 2 main callers of this routine: 1) function splitting in bb-layout 2) function cgraph node weight Where #2 will affect optimization of the function for size and also function layout by the linker. I would argue that for function splitting, we really want to know when it is probably *never* executed - i.e. completely cold, since the cost of jumping back and forth to the cold section is likely to be high. I am not sure for #2 what the right ratio is. For function layout, we may also want to place only really cold *never* executed functions into the cold section, but I am less sure about optimization for size. Perhaps we really need two different interfaces to test for different levels of coldness: probably_never_executed() -> returns true when there is profile information for the function and the bb has 0 count and 0 frequency. -> invoked from bb-reorder.cc to drive function splitting -> may want to consider invoking this as an additional check before putting function into unlikely text section in the future. possibly_never_executed() -> essentially the existing logic in probably_never_executed_bb_p -> invoked when marking the cgraph node > > The change introducing probably_never_executed with the current logic is OK. Ok, I will commit the two approved parts for now (the change to bb-reorder.c and the addition of probably_never_executed that uses the existing logic from probably_never_executed_bb_p. Thanks, Teresa > We may want to fine tune the ratio. > > Honza >> return true; >> } >> if ((!profile_info || !flag_branch_probabilities) >> @@ -256,19 +256,21 @@ maybe_hot_edge_p (edge e) >> } >> >> >> +/* Return true in case BB is probably never executed. */ >> + >> +bool >> +probably_never_executed_bb_p (struct function *fun, const_basic_block bb) >> +{ >> + return probably_never_executed (fun, bb->count, bb->frequency); >> +} >> + >> + >> /* Return true in case edge E is probably never executed. */ >> >> bool >> probably_never_executed_edge_p (struct function *fun, edge e) >> { >> - gcc_checking_assert (fun); >> - if (profile_info && flag_branch_probabilities) >> - return ((e->count + profile_info->runs / 2) / profile_info->runs) == 0; >> - if ((!profile_info || !flag_branch_probabilities) >> - && (cgraph_get_node (fun->decl)->frequency >> - == NODE_FREQUENCY_UNLIKELY_EXECUTED)) >> - return true; >> - return false; >> + return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e)); >> } >> >> /* Return true if NODE should be optimized for size. */
> But why do we want to consider blocks as "probably never executed" > when the frequency suggests they are sometimes executed? Well, probably never executed is mean to reffer to one run. If you have something like code handling fatal errors, you probably still want to have it in cold secion even if user may have trained the program on a testsuite that triggers them once or twice per thousdand of runs. We may just make the predicate more strict, but lets do that incrementally so we know how much things change. I am somewhat concerned that we are not that effective on breaking out cold code so -fprofile-use does not lead to as significant code size reductions as the theory would suggest, so perhaps I am just overfly conservative about this. Getting the splitting to work reliably is definitely going to be a win. > Perhaps we really need two different interfaces to test for different > levels of coldness: > > probably_never_executed() > -> returns true when there is profile information for the function > and the bb has 0 count and 0 frequency. > -> invoked from bb-reorder.cc to drive function splitting > -> may want to consider invoking this as an additional check before > putting function into unlikely text section in the future. > > possibly_never_executed() > -> essentially the existing logic in probably_never_executed_bb_p > -> invoked when marking the cgraph node Perhaps... Advantage of hot/normal/cold split is that it is easy to understand, but if necessary (i.e, it becomes impossible to tune well) we may add more stages... Honza
On Wed, Oct 2, 2013 at 9:19 AM, Jan Hubicka <hubicka@ucw.cz> wrote: >> 2013-09-29 Teresa Johnson <tejohnson@google.com> >> >> * bb-reorder.c (find_rarely_executed_basic_blocks_and_crossing_edges): >> Treat profile insanities conservatively. >> * predict.c (probably_never_executed): New function. Treat profile >> insanities conservatively. >> (probably_never_executed_bb_p): Invoke probably_never_executed. >> (probably_never_executed_edge_p): Invoke probably_never_executed. >> >> Index: bb-reorder.c >> =================================================================== >> --- bb-reorder.c (revision 202947) >> +++ bb-reorder.c (working copy) >> @@ -1564,8 +1564,25 @@ find_rarely_executed_basic_blocks_and_crossing_edg >> /* Mark which partition (hot/cold) each basic block belongs in. */ >> FOR_EACH_BB (bb) >> { >> + bool cold_bb = false; > > whitespace here > >> if (probably_never_executed_bb_p (cfun, bb)) >> { >> + /* Handle profile insanities created by upstream optimizations >> + by also checking the incoming edge weights. If there is a non-cold >> + incoming edge, conservatively prevent this block from being split >> + into the cold section. */ >> + cold_bb = true; >> + FOR_EACH_EDGE (e, ei, bb->preds) >> + { >> + if (!probably_never_executed_edge_p (cfun, e)) >> + { >> + cold_bb = false; >> + break; >> + } >> + } > > You can probably elimnate the extra braces. > So we won't propagate deeper in the CFG, right? > > This change is OK. > >> + } >> + if (cold_bb) >> + { >> BB_SET_PARTITION (bb, BB_COLD_PARTITION); >> cold_bb_count++; >> } >> Index: predict.c >> =================================================================== >> --- predict.c (revision 202947) >> +++ predict.c (working copy) >> @@ -226,26 +226,26 @@ maybe_hot_edge_p (edge e) >> } >> >> >> -/* Return true in case BB is probably never executed. */ >> >> -bool >> -probably_never_executed_bb_p (struct function *fun, const_basic_block bb) >> +/* Return true if profile COUNT and FREQUENCY, or function FUN static >> + node frequency reflects never being executed. */ >> + >> +static bool >> +probably_never_executed (struct function *fun, >> + gcov_type count, int frequency) >> { >> gcc_checking_assert (fun); >> if (profile_status_for_function (fun) == PROFILE_READ) >> { >> - if ((bb->count * 4 + profile_info->runs / 2) / profile_info->runs > 0) >> + if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0) >> return false; >> - if (!bb->frequency) >> - return true; >> - if (!ENTRY_BLOCK_PTR->frequency) >> - return false; >> - if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE) >> - { >> - return (RDIV (bb->frequency * ENTRY_BLOCK_PTR->count, >> - ENTRY_BLOCK_PTR->frequency) >> - < REG_BR_PROB_BASE / 4); >> - } >> + // If this is a profiled function (entry bb non-zero count), then base >> + // the coldness decision on the frequency. This will handle cases where >> + // counts are not updated properly during optimizations or expansion. >> + if (ENTRY_BLOCK_PTR->count) >> + return frequency == 0; >> + // Unprofiled function, frequencies statically assigned. All bbs are >> + // treated as cold. > > I would avoid combining C and C++ comments in the function. > Did you get some data on how many basic blocks we now consider hot? > > The previous implemntation consdered block as never executed when frequencies > indicates that it is executed in at most 1/4th of invocations of program. > You essentially chnage to 1/10000. The first seems bit too high given the > way we distribute probabilities in dojump and firends, second looks too low. Actually, I don't think the current code is detecting when the frequencies indicate it was executed 1/4 time. The current code takes a ratio of the entry block count, and compares it to REG_BR_PROB_BASE/4, which seems like the wrong comparison for profile counts. Shouldn't this be something like: gcov_type computed_count = RDIV (frequency * ENTRY_BLOCK_PTR->count, ENTRY_BLOCK_PTR->frequency) if ((computed_count * 4 + profile_info->runs / 2) / profile_info->runs > 0) return false; return true; i.e. do the same check we do for bb->count above. And the check guarding this is looking for ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE, which also doesn't seem right, although we need to ensure that we don't overflow when multiplying by frequency. Teresa > > The change introducing probably_never_executed with the current logic is OK. > We may want to fine tune the ratio. > > Honza >> return true; >> } >> if ((!profile_info || !flag_branch_probabilities) >> @@ -256,19 +256,21 @@ maybe_hot_edge_p (edge e) >> } >> >> >> +/* Return true in case BB is probably never executed. */ >> + >> +bool >> +probably_never_executed_bb_p (struct function *fun, const_basic_block bb) >> +{ >> + return probably_never_executed (fun, bb->count, bb->frequency); >> +} >> + >> + >> /* Return true in case edge E is probably never executed. */ >> >> bool >> probably_never_executed_edge_p (struct function *fun, edge e) >> { >> - gcc_checking_assert (fun); >> - if (profile_info && flag_branch_probabilities) >> - return ((e->count + profile_info->runs / 2) / profile_info->runs) == 0; >> - if ((!profile_info || !flag_branch_probabilities) >> - && (cgraph_get_node (fun->decl)->frequency >> - == NODE_FREQUENCY_UNLIKELY_EXECUTED)) >> - return true; >> - return false; >> + return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e)); >> } >> >> /* Return true if NODE should be optimized for size. */
On Thu, Oct 3, 2013 at 6:41 AM, Teresa Johnson <tejohnson@google.com> wrote: > On Wed, Oct 2, 2013 at 9:19 AM, Jan Hubicka <hubicka@ucw.cz> wrote: >>> 2013-09-29 Teresa Johnson <tejohnson@google.com> >>> >>> * bb-reorder.c (find_rarely_executed_basic_blocks_and_crossing_edges): >>> Treat profile insanities conservatively. >>> * predict.c (probably_never_executed): New function. Treat profile >>> insanities conservatively. >>> (probably_never_executed_bb_p): Invoke probably_never_executed. >>> (probably_never_executed_edge_p): Invoke probably_never_executed. >>> >>> Index: bb-reorder.c >>> =================================================================== >>> --- bb-reorder.c (revision 202947) >>> +++ bb-reorder.c (working copy) >>> @@ -1564,8 +1564,25 @@ find_rarely_executed_basic_blocks_and_crossing_edg >>> /* Mark which partition (hot/cold) each basic block belongs in. */ >>> FOR_EACH_BB (bb) >>> { >>> + bool cold_bb = false; >> >> whitespace here >> >>> if (probably_never_executed_bb_p (cfun, bb)) >>> { >>> + /* Handle profile insanities created by upstream optimizations >>> + by also checking the incoming edge weights. If there is a non-cold >>> + incoming edge, conservatively prevent this block from being split >>> + into the cold section. */ >>> + cold_bb = true; >>> + FOR_EACH_EDGE (e, ei, bb->preds) >>> + { >>> + if (!probably_never_executed_edge_p (cfun, e)) >>> + { >>> + cold_bb = false; >>> + break; >>> + } >>> + } >> >> You can probably elimnate the extra braces. >> So we won't propagate deeper in the CFG, right? >> >> This change is OK. >> >>> + } >>> + if (cold_bb) >>> + { >>> BB_SET_PARTITION (bb, BB_COLD_PARTITION); >>> cold_bb_count++; >>> } >>> Index: predict.c >>> =================================================================== >>> --- predict.c (revision 202947) >>> +++ predict.c (working copy) >>> @@ -226,26 +226,26 @@ maybe_hot_edge_p (edge e) >>> } >>> >>> >>> -/* Return true in case BB is probably never executed. */ >>> >>> -bool >>> -probably_never_executed_bb_p (struct function *fun, const_basic_block bb) >>> +/* Return true if profile COUNT and FREQUENCY, or function FUN static >>> + node frequency reflects never being executed. */ >>> + >>> +static bool >>> +probably_never_executed (struct function *fun, >>> + gcov_type count, int frequency) >>> { >>> gcc_checking_assert (fun); >>> if (profile_status_for_function (fun) == PROFILE_READ) >>> { >>> - if ((bb->count * 4 + profile_info->runs / 2) / profile_info->runs > 0) >>> + if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0) >>> return false; >>> - if (!bb->frequency) >>> - return true; >>> - if (!ENTRY_BLOCK_PTR->frequency) >>> - return false; >>> - if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE) >>> - { >>> - return (RDIV (bb->frequency * ENTRY_BLOCK_PTR->count, >>> - ENTRY_BLOCK_PTR->frequency) >>> - < REG_BR_PROB_BASE / 4); >>> - } >>> + // If this is a profiled function (entry bb non-zero count), then base >>> + // the coldness decision on the frequency. This will handle cases where >>> + // counts are not updated properly during optimizations or expansion. >>> + if (ENTRY_BLOCK_PTR->count) >>> + return frequency == 0; >>> + // Unprofiled function, frequencies statically assigned. All bbs are >>> + // treated as cold. >> >> I would avoid combining C and C++ comments in the function. >> Did you get some data on how many basic blocks we now consider hot? >> >> The previous implemntation consdered block as never executed when frequencies >> indicates that it is executed in at most 1/4th of invocations of program. >> You essentially chnage to 1/10000. The first seems bit too high given the >> way we distribute probabilities in dojump and firends, second looks too low. > > Actually, I don't think the current code is detecting when the > frequencies indicate it was executed 1/4 time. The current code takes > a ratio of the entry block count, and compares it to > REG_BR_PROB_BASE/4, which seems like the wrong comparison for profile > counts. Shouldn't this be something like: > > gcov_type computed_count = RDIV (frequency * ENTRY_BLOCK_PTR->count, > ENTRY_BLOCK_PTR->frequency) > if ((computed_count * 4 + profile_info->runs / 2) / profile_info->runs > 0) > return false; > return true; > > i.e. do the same check we do for bb->count above. And the check > guarding this is looking for ENTRY_BLOCK_PTR->count < > REG_BR_PROB_BASE, which also doesn't seem right, although we need to > ensure that we don't overflow when multiplying by frequency. I have a variant of this change that works well, and has the proper overflow checking. This gets rid of 13 failures when there is 1 train run. Changing the required ratio to 1/100 training runs instead of 1/4 reduces the single train run failures by another 5, since smaller frequencies are handled even when the count has been truncated to 0. I found a couple more failures with 1 train run were due to inlining profile update issues, which I have a fix for. At that point, the failures are the same between the 1 train run and 100 train run cases. After that, there are 2 remaining failures (both with 1 train and 100 train runs), that go away when I disable loop unrolling, that I haven't looked at yet. I'll send a patch with the above changes though hopefully tonight. What do you think of changing the required execution count to profile run ratio to 1/100? Teresa > > Teresa > >> >> The change introducing probably_never_executed with the current logic is OK. >> We may want to fine tune the ratio. >> >> Honza >>> return true; >>> } >>> if ((!profile_info || !flag_branch_probabilities) >>> @@ -256,19 +256,21 @@ maybe_hot_edge_p (edge e) >>> } >>> >>> >>> +/* Return true in case BB is probably never executed. */ >>> + >>> +bool >>> +probably_never_executed_bb_p (struct function *fun, const_basic_block bb) >>> +{ >>> + return probably_never_executed (fun, bb->count, bb->frequency); >>> +} >>> + >>> + >>> /* Return true in case edge E is probably never executed. */ >>> >>> bool >>> probably_never_executed_edge_p (struct function *fun, edge e) >>> { >>> - gcc_checking_assert (fun); >>> - if (profile_info && flag_branch_probabilities) >>> - return ((e->count + profile_info->runs / 2) / profile_info->runs) == 0; >>> - if ((!profile_info || !flag_branch_probabilities) >>> - && (cgraph_get_node (fun->decl)->frequency >>> - == NODE_FREQUENCY_UNLIKELY_EXECUTED)) >>> - return true; >>> - return false; >>> + return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e)); >>> } >>> >>> /* Return true if NODE should be optimized for size. */ > > > > -- > Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
Index: bb-reorder.c =================================================================== --- bb-reorder.c (revision 202947) +++ bb-reorder.c (working copy) @@ -1564,8 +1564,25 @@ find_rarely_executed_basic_blocks_and_crossing_edg /* Mark which partition (hot/cold) each basic block belongs in. */ FOR_EACH_BB (bb) { + bool cold_bb = false; if (probably_never_executed_bb_p (cfun, bb)) { + /* Handle profile insanities created by upstream optimizations + by also checking the incoming edge weights. If there is a non-cold + incoming edge, conservatively prevent this block from being split + into the cold section. */ + cold_bb = true; + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (!probably_never_executed_edge_p (cfun, e)) + { + cold_bb = false; + break; + } + } + } + if (cold_bb) + { BB_SET_PARTITION (bb, BB_COLD_PARTITION); cold_bb_count++; } Index: predict.c =================================================================== --- predict.c (revision 202947) +++ predict.c (working copy) @@ -226,26 +226,26 @@ maybe_hot_edge_p (edge e) } -/* Return true in case BB is probably never executed. */ -bool -probably_never_executed_bb_p (struct function *fun, const_basic_block bb) +/* Return true if profile COUNT and FREQUENCY, or function FUN static + node frequency reflects never being executed. */ + +static bool +probably_never_executed (struct function *fun, + gcov_type count, int frequency) { gcc_checking_assert (fun); if (profile_status_for_function (fun) == PROFILE_READ) { - if ((bb->count * 4 + profile_info->runs / 2) / profile_info->runs > 0) + if ((count * 4 + profile_info->runs / 2) / profile_info->runs > 0) return false; - if (!bb->frequency) - return true; - if (!ENTRY_BLOCK_PTR->frequency) - return false; - if (ENTRY_BLOCK_PTR->count && ENTRY_BLOCK_PTR->count < REG_BR_PROB_BASE) - { - return (RDIV (bb->frequency * ENTRY_BLOCK_PTR->count, - ENTRY_BLOCK_PTR->frequency) - < REG_BR_PROB_BASE / 4); - } + // If this is a profiled function (entry bb non-zero count), then base + // the coldness decision on the frequency. This will handle cases where + // counts are not updated properly during optimizations or expansion. + if (ENTRY_BLOCK_PTR->count) + return frequency == 0; + // Unprofiled function, frequencies statically assigned. All bbs are + // treated as cold. return true; } if ((!profile_info || !flag_branch_probabilities) @@ -256,19 +256,21 @@ maybe_hot_edge_p (edge e) } +/* Return true in case BB is probably never executed. */ + +bool +probably_never_executed_bb_p (struct function *fun, const_basic_block bb) +{ + return probably_never_executed (fun, bb->count, bb->frequency); +} + + /* Return true in case edge E is probably never executed. */ bool probably_never_executed_edge_p (struct function *fun, edge e) { - gcc_checking_assert (fun); - if (profile_info && flag_branch_probabilities) - return ((e->count + profile_info->runs / 2) / profile_info->runs) == 0; - if ((!profile_info || !flag_branch_probabilities) - && (cgraph_get_node (fun->decl)->frequency - == NODE_FREQUENCY_UNLIKELY_EXECUTED)) - return true; - return false; + return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e)); } /* Return true if NODE should be optimized for size. */