Message ID | 20240622185557.1589179-10-ak@linux.intel.com |
---|---|
State | New |
Headers | show |
Series | [v8,01/12] Improve must tail in RTL backend | expand |
On Sat, Jun 22, 2024 at 9:00 PM Andi Kleen <ak@linux.intel.com> wrote: > > Move the error reporting for caller attributes to be > after the tail call discovery, so that we can give proper > error messages tagged to the calls. Hmm. This all gets a bit awkward. I realize that early checking gets us less compile-time unnecessarily spent for searching for a tail call - but at least for the musttail case parsing constraints should put a practical limit on how far to look? So what I wonder is whether it would be better to separate searching for a (musttail) candidate separate from validation? We could for example invoke find_tail_calls twice, once to find a musttail candidate (can there be multiple ones?) and once to validate and error? Would that make the delaying less awkward? > gcc/ChangeLog: > > * tree-tailcall.cc (maybe_error_musttail): Declare. > (suitable_for_tail_opt_p): Take call and report errors. > (suitable_for_tail_call_opt_p): Take call and report errors. > (find_tail_calls): Report caller errors after discovery. > (tree_optimize_tail_calls_1): Remove caller suitableness check. > --- > gcc/tree-tailcall.cc | 62 ++++++++++++++++++++++++++++++-------------- > 1 file changed, 43 insertions(+), 19 deletions(-) > > diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc > index 4687e20e61d0..a77fa1511415 100644 > --- a/gcc/tree-tailcall.cc > +++ b/gcc/tree-tailcall.cc > @@ -133,14 +133,20 @@ static tree m_acc, a_acc; > > static bitmap tailr_arg_needs_copy; > > +static void maybe_error_musttail (gcall *call, const char *err); > + > /* Returns false when the function is not suitable for tail call optimization > - from some reason (e.g. if it takes variable number of arguments). */ > + from some reason (e.g. if it takes variable number of arguments). CALL > + is call to report for. */ > > static bool > -suitable_for_tail_opt_p (void) > +suitable_for_tail_opt_p (gcall *call) > { > if (cfun->stdarg) > - return false; > + { > + maybe_error_musttail (call, _("caller uses stdargs")); > + return false; > + } > > return true; > } > @@ -148,35 +154,47 @@ suitable_for_tail_opt_p (void) > /* Returns false when the function is not suitable for tail call optimization > for some reason (e.g. if it takes variable number of arguments). > This test must pass in addition to suitable_for_tail_opt_p in order to make > - tail call discovery happen. */ > + tail call discovery happen. CALL is call to report error for. */ > > static bool > -suitable_for_tail_call_opt_p (void) > +suitable_for_tail_call_opt_p (gcall *call) > { > tree param; > > /* alloca (until we have stack slot life analysis) inhibits > sibling call optimizations, but not tail recursion. */ > if (cfun->calls_alloca) > - return false; > + { > + maybe_error_musttail (call, _("caller uses alloca")); > + return false; > + } > > /* If we are using sjlj exceptions, we may need to add a call to > _Unwind_SjLj_Unregister at exit of the function. Which means > that we cannot do any sibcall transformations. */ > if (targetm_common.except_unwind_info (&global_options) == UI_SJLJ > && current_function_has_exception_handlers ()) > - return false; > + { > + maybe_error_musttail (call, _("caller uses sjlj exceptions")); > + return false; > + } > > /* Any function that calls setjmp might have longjmp called from > any called function. ??? We really should represent this > properly in the CFG so that this needn't be special cased. */ > if (cfun->calls_setjmp) > - return false; > + { > + maybe_error_musttail (call, _("caller uses setjmp")); > + return false; > + } > > /* Various targets don't handle tail calls correctly in functions > that call __builtin_eh_return. */ > if (cfun->calls_eh_return) > - return false; > + { > + maybe_error_musttail (call, _("caller uses __builtin_eh_return")); > + return false; > + } > > /* ??? It is OK if the argument of a function is taken in some cases, > but not in all cases. See PR15387 and PR19616. Revisit for 4.1. */ > @@ -184,7 +202,10 @@ suitable_for_tail_call_opt_p (void) > param; > param = DECL_CHAIN (param)) > if (TREE_ADDRESSABLE (param)) > - return false; > + { > + maybe_error_musttail (call, _("address of caller arguments taken")); > + return false; > + } > > return true; > } > @@ -445,10 +466,12 @@ static live_vars_map *live_vars; > static vec<bitmap_head> live_vars_vec; > > /* Finds tailcalls falling into basic block BB. The list of found tailcalls is > - added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. */ > + added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. > + Update OPT_TAILCALLS as output parameter. */ > > static void > -find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail) > +find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, > + bool &opt_tailcalls) > { > tree ass_var = NULL_TREE, ret_var, func, param; > gimple *stmt; > @@ -526,11 +549,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail) > edge_iterator ei; > /* Recurse to the predecessors. */ > FOR_EACH_EDGE (e, ei, bb->preds) > - find_tail_calls (e->src, ret, only_musttail); > + find_tail_calls (e->src, ret, only_musttail, opt_tailcalls); > > return; > } > > + if (!suitable_for_tail_opt_p (call)) > + return; > + > + if (!suitable_for_tail_call_opt_p (call)) > + opt_tailcalls = false; > + > /* If the LHS of our call is not just a simple register or local > variable, we can't transform this into a tail or sibling call. > This situation happens, in (e.g.) "*p = foo()" where foo returns a > @@ -1200,17 +1229,12 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall) > tree param; > edge_iterator ei; > > - if (!suitable_for_tail_opt_p ()) > - return 0; > - if (opt_tailcalls) > - opt_tailcalls = suitable_for_tail_call_opt_p (); > - > FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > { > /* Only traverse the normal exits, i.e. those that end with return > statement. */ > if (safe_is_a <greturn *> (*gsi_last_bb (e->src))) > - find_tail_calls (e->src, &tailcalls, only_mustcall); > + find_tail_calls (e->src, &tailcalls, only_mustcall, opt_tailcalls); > } > > if (live_vars) > -- > 2.45.2 >
On Fri, Jul 05, 2024 at 01:45:17PM +0200, Richard Biener wrote: > On Sat, Jun 22, 2024 at 9:00 PM Andi Kleen <ak@linux.intel.com> wrote: > > > > Move the error reporting for caller attributes to be > > after the tail call discovery, so that we can give proper > > error messages tagged to the calls. > > Hmm. This all gets a bit awkward. I realize that early checking > gets us less compile-time unnecessarily spent for searching for > a tail call - but at least for the musttail case parsing constraints > should put a practical limit on how far to look? All the top level checks are for obscure situations, so it's unlikely that it makes much difference for compile time either way. > > So what I wonder is whether it would be better to separate > searching for a (musttail) candidate separate from validation? > > We could for example invoke find_tail_calls twice, once to > find a musttail candidate (can there be multiple ones?) and once > to validate and error? Would that make the delaying less awkward? There can be multiple musttails in a function, in theory one for every return. I'm not sure I see the awkward part? (other than perhaps the not-quite-natural accumulation of opt_tailcalls). There are alots of checks before and after discovery. This just moves them all to be after. If the top level checks were done based on a discovered list you would need extra loops to walk the candidates later and error. It wouldn't be any simpler at least. Overall the logic in this pass is rather convoluted and could deserve some cleanups and separation of concerns. e.g. it would be better to separate tail calls and tail recursion. But I'm not trying to rewrite the pass here. -Andi
On Sat, Jul 6, 2024 at 7:08 PM Andi Kleen <ak@linux.intel.com> wrote: > > On Fri, Jul 05, 2024 at 01:45:17PM +0200, Richard Biener wrote: > > On Sat, Jun 22, 2024 at 9:00 PM Andi Kleen <ak@linux.intel.com> wrote: > > > > > > Move the error reporting for caller attributes to be > > > after the tail call discovery, so that we can give proper > > > error messages tagged to the calls. > > > > Hmm. This all gets a bit awkward. I realize that early checking > > gets us less compile-time unnecessarily spent for searching for > > a tail call - but at least for the musttail case parsing constraints > > should put a practical limit on how far to look? > > All the top level checks are for obscure situations, so it's unlikely > that it makes much difference for compile time either way. > > > > > So what I wonder is whether it would be better to separate > > searching for a (musttail) candidate separate from validation? > > > > We could for example invoke find_tail_calls twice, once to > > find a musttail candidate (can there be multiple ones?) and once > > to validate and error? Would that make the delaying less awkward? > > There can be multiple musttails in a function, in theory > one for every return. > > I'm not sure I see the awkward part? (other than perhaps > the not-quite-natural accumulation of opt_tailcalls). There > are alots of checks before and after discovery. This just > moves them all to be after. > > If the top level checks were done based on a discovered > list you would need extra loops to walk the candidates > later and error. It wouldn't be any simpler at least. Thanks for clarifying. > Overall the logic in this pass is rather convoluted and > could deserve some cleanups and separation of concerns. > e.g. it would be better to separate tail calls and tail > recursion. But I'm not trying to rewrite the pass here. Understood. For a v9, can you squash the tree-tailcall.cc changes please? Thanks, Richard. > -Andi
> > Overall the logic in this pass is rather convoluted and > > could deserve some cleanups and separation of concerns. > > e.g. it would be better to separate tail calls and tail > > recursion. But I'm not trying to rewrite the pass here. > > Understood. For a v9, can you squash the tree-tailcall.cc changes > please? I squashed all the tree-tailcall error changes. The new pass is still a separate patch. I would prefer to keep it this way. BTW I'm surprised you prefer that. Normally smaller patches are better if bisecting is needed. -Andi
diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc index 4687e20e61d0..a77fa1511415 100644 --- a/gcc/tree-tailcall.cc +++ b/gcc/tree-tailcall.cc @@ -133,14 +133,20 @@ static tree m_acc, a_acc; static bitmap tailr_arg_needs_copy; +static void maybe_error_musttail (gcall *call, const char *err); + /* Returns false when the function is not suitable for tail call optimization - from some reason (e.g. if it takes variable number of arguments). */ + from some reason (e.g. if it takes variable number of arguments). CALL + is call to report for. */ static bool -suitable_for_tail_opt_p (void) +suitable_for_tail_opt_p (gcall *call) { if (cfun->stdarg) - return false; + { + maybe_error_musttail (call, _("caller uses stdargs")); + return false; + } return true; } @@ -148,35 +154,47 @@ suitable_for_tail_opt_p (void) /* Returns false when the function is not suitable for tail call optimization for some reason (e.g. if it takes variable number of arguments). This test must pass in addition to suitable_for_tail_opt_p in order to make - tail call discovery happen. */ + tail call discovery happen. CALL is call to report error for. */ static bool -suitable_for_tail_call_opt_p (void) +suitable_for_tail_call_opt_p (gcall *call) { tree param; /* alloca (until we have stack slot life analysis) inhibits sibling call optimizations, but not tail recursion. */ if (cfun->calls_alloca) - return false; + { + maybe_error_musttail (call, _("caller uses alloca")); + return false; + } /* If we are using sjlj exceptions, we may need to add a call to _Unwind_SjLj_Unregister at exit of the function. Which means that we cannot do any sibcall transformations. */ if (targetm_common.except_unwind_info (&global_options) == UI_SJLJ && current_function_has_exception_handlers ()) - return false; + { + maybe_error_musttail (call, _("caller uses sjlj exceptions")); + return false; + } /* Any function that calls setjmp might have longjmp called from any called function. ??? We really should represent this properly in the CFG so that this needn't be special cased. */ if (cfun->calls_setjmp) - return false; + { + maybe_error_musttail (call, _("caller uses setjmp")); + return false; + } /* Various targets don't handle tail calls correctly in functions that call __builtin_eh_return. */ if (cfun->calls_eh_return) - return false; + { + maybe_error_musttail (call, _("caller uses __builtin_eh_return")); + return false; + } /* ??? It is OK if the argument of a function is taken in some cases, but not in all cases. See PR15387 and PR19616. Revisit for 4.1. */ @@ -184,7 +202,10 @@ suitable_for_tail_call_opt_p (void) param; param = DECL_CHAIN (param)) if (TREE_ADDRESSABLE (param)) - return false; + { + maybe_error_musttail (call, _("address of caller arguments taken")); + return false; + } return true; } @@ -445,10 +466,12 @@ static live_vars_map *live_vars; static vec<bitmap_head> live_vars_vec; /* Finds tailcalls falling into basic block BB. The list of found tailcalls is - added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. */ + added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. + Update OPT_TAILCALLS as output parameter. */ static void -find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail) +find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, + bool &opt_tailcalls) { tree ass_var = NULL_TREE, ret_var, func, param; gimple *stmt; @@ -526,11 +549,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail) edge_iterator ei; /* Recurse to the predecessors. */ FOR_EACH_EDGE (e, ei, bb->preds) - find_tail_calls (e->src, ret, only_musttail); + find_tail_calls (e->src, ret, only_musttail, opt_tailcalls); return; } + if (!suitable_for_tail_opt_p (call)) + return; + + if (!suitable_for_tail_call_opt_p (call)) + opt_tailcalls = false; + /* If the LHS of our call is not just a simple register or local variable, we can't transform this into a tail or sibling call. This situation happens, in (e.g.) "*p = foo()" where foo returns a @@ -1200,17 +1229,12 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall) tree param; edge_iterator ei; - if (!suitable_for_tail_opt_p ()) - return 0; - if (opt_tailcalls) - opt_tailcalls = suitable_for_tail_call_opt_p (); - FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) { /* Only traverse the normal exits, i.e. those that end with return statement. */ if (safe_is_a <greturn *> (*gsi_last_bb (e->src))) - find_tail_calls (e->src, &tailcalls, only_mustcall); + find_tail_calls (e->src, &tailcalls, only_mustcall, opt_tailcalls); } if (live_vars)