diff mbox series

[v8,09/12] Delay caller error reporting for musttail

Message ID 20240622185557.1589179-10-ak@linux.intel.com
State New
Headers show
Series [v8,01/12] Improve must tail in RTL backend | expand

Commit Message

Andi Kleen June 22, 2024, 6:54 p.m. UTC
Move the error reporting for caller attributes to be
after the tail call discovery, so that we can give proper
error messages tagged to the calls.

gcc/ChangeLog:

	* tree-tailcall.cc (maybe_error_musttail): Declare.
	(suitable_for_tail_opt_p): Take call and report errors.
	(suitable_for_tail_call_opt_p): Take call and report errors.
	(find_tail_calls): Report caller errors after discovery.
	(tree_optimize_tail_calls_1): Remove caller suitableness check.
---
 gcc/tree-tailcall.cc | 62 ++++++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 19 deletions(-)

Comments

Richard Biener July 5, 2024, 11:45 a.m. UTC | #1
On Sat, Jun 22, 2024 at 9:00 PM Andi Kleen <ak@linux.intel.com> wrote:
>
> Move the error reporting for caller attributes to be
> after the tail call discovery, so that we can give proper
> error messages tagged to the calls.

Hmm.  This all gets a bit awkward.  I realize that early checking
gets us less compile-time unnecessarily spent for searching for
a tail call - but at least for the musttail case parsing constraints
should put a practical limit on how far to look?

So what I wonder is whether it would be better to separate
searching for a (musttail) candidate separate from validation?

We could for example invoke find_tail_calls twice, once to
find a musttail candidate (can there be multiple ones?) and once
to validate and error?  Would that make the delaying less awkward?

> gcc/ChangeLog:
>
>         * tree-tailcall.cc (maybe_error_musttail): Declare.
>         (suitable_for_tail_opt_p): Take call and report errors.
>         (suitable_for_tail_call_opt_p): Take call and report errors.
>         (find_tail_calls): Report caller errors after discovery.
>         (tree_optimize_tail_calls_1): Remove caller suitableness check.
> ---
>  gcc/tree-tailcall.cc | 62 ++++++++++++++++++++++++++++++--------------
>  1 file changed, 43 insertions(+), 19 deletions(-)
>
> diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
> index 4687e20e61d0..a77fa1511415 100644
> --- a/gcc/tree-tailcall.cc
> +++ b/gcc/tree-tailcall.cc
> @@ -133,14 +133,20 @@ static tree m_acc, a_acc;
>
>  static bitmap tailr_arg_needs_copy;
>
> +static void maybe_error_musttail (gcall *call, const char *err);
> +
>  /* Returns false when the function is not suitable for tail call optimization
> -   from some reason (e.g. if it takes variable number of arguments).  */
> +   from some reason (e.g. if it takes variable number of arguments). CALL
> +   is call to report for.  */
>
>  static bool
> -suitable_for_tail_opt_p (void)
> +suitable_for_tail_opt_p (gcall *call)
>  {
>    if (cfun->stdarg)
> -    return false;
> +    {
> +      maybe_error_musttail (call, _("caller uses stdargs"));
> +      return false;
> +    }
>
>    return true;
>  }
> @@ -148,35 +154,47 @@ suitable_for_tail_opt_p (void)
>  /* Returns false when the function is not suitable for tail call optimization
>     for some reason (e.g. if it takes variable number of arguments).
>     This test must pass in addition to suitable_for_tail_opt_p in order to make
> -   tail call discovery happen.  */
> +   tail call discovery happen. CALL is call to report error for.  */
>
>  static bool
> -suitable_for_tail_call_opt_p (void)
> +suitable_for_tail_call_opt_p (gcall *call)
>  {
>    tree param;
>
>    /* alloca (until we have stack slot life analysis) inhibits
>       sibling call optimizations, but not tail recursion.  */
>    if (cfun->calls_alloca)
> -    return false;
> +    {
> +      maybe_error_musttail (call, _("caller uses alloca"));
> +      return false;
> +    }
>
>    /* If we are using sjlj exceptions, we may need to add a call to
>       _Unwind_SjLj_Unregister at exit of the function.  Which means
>       that we cannot do any sibcall transformations.  */
>    if (targetm_common.except_unwind_info (&global_options) == UI_SJLJ
>        && current_function_has_exception_handlers ())
> -    return false;
> +    {
> +      maybe_error_musttail (call, _("caller uses sjlj exceptions"));
> +      return false;
> +    }
>
>    /* Any function that calls setjmp might have longjmp called from
>       any called function.  ??? We really should represent this
>       properly in the CFG so that this needn't be special cased.  */
>    if (cfun->calls_setjmp)
> -    return false;
> +    {
> +      maybe_error_musttail (call, _("caller uses setjmp"));
> +      return false;
> +    }
>
>    /* Various targets don't handle tail calls correctly in functions
>       that call __builtin_eh_return.  */
>    if (cfun->calls_eh_return)
> -    return false;
> +    {
> +      maybe_error_musttail (call, _("caller uses __builtin_eh_return"));
> +      return false;
> +    }
>
>    /* ??? It is OK if the argument of a function is taken in some cases,
>       but not in all cases.  See PR15387 and PR19616.  Revisit for 4.1.  */
> @@ -184,7 +202,10 @@ suitable_for_tail_call_opt_p (void)
>         param;
>         param = DECL_CHAIN (param))
>      if (TREE_ADDRESSABLE (param))
> -      return false;
> +      {
> +       maybe_error_musttail (call, _("address of caller arguments taken"));
> +       return false;
> +      }
>
>    return true;
>  }
> @@ -445,10 +466,12 @@ static live_vars_map *live_vars;
>  static vec<bitmap_head> live_vars_vec;
>
>  /* Finds tailcalls falling into basic block BB. The list of found tailcalls is
> -   added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail.  */
> +   added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail.
> +   Update OPT_TAILCALLS as output parameter.  */
>
>  static void
> -find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail)
> +find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
> +                bool &opt_tailcalls)
>  {
>    tree ass_var = NULL_TREE, ret_var, func, param;
>    gimple *stmt;
> @@ -526,11 +549,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail)
>        edge_iterator ei;
>        /* Recurse to the predecessors.  */
>        FOR_EACH_EDGE (e, ei, bb->preds)
> -       find_tail_calls (e->src, ret, only_musttail);
> +       find_tail_calls (e->src, ret, only_musttail, opt_tailcalls);
>
>        return;
>      }
>
> +  if (!suitable_for_tail_opt_p (call))
> +    return;
> +
> +  if (!suitable_for_tail_call_opt_p (call))
> +    opt_tailcalls = false;
> +
>    /* If the LHS of our call is not just a simple register or local
>       variable, we can't transform this into a tail or sibling call.
>       This situation happens, in (e.g.) "*p = foo()" where foo returns a
> @@ -1200,17 +1229,12 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall)
>    tree param;
>    edge_iterator ei;
>
> -  if (!suitable_for_tail_opt_p ())
> -    return 0;
> -  if (opt_tailcalls)
> -    opt_tailcalls = suitable_for_tail_call_opt_p ();
> -
>    FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
>      {
>        /* Only traverse the normal exits, i.e. those that end with return
>          statement.  */
>        if (safe_is_a <greturn *> (*gsi_last_bb (e->src)))
> -       find_tail_calls (e->src, &tailcalls, only_mustcall);
> +       find_tail_calls (e->src, &tailcalls, only_mustcall, opt_tailcalls);
>      }
>
>    if (live_vars)
> --
> 2.45.2
>
Andi Kleen July 6, 2024, 5:08 p.m. UTC | #2
On Fri, Jul 05, 2024 at 01:45:17PM +0200, Richard Biener wrote:
> On Sat, Jun 22, 2024 at 9:00 PM Andi Kleen <ak@linux.intel.com> wrote:
> >
> > Move the error reporting for caller attributes to be
> > after the tail call discovery, so that we can give proper
> > error messages tagged to the calls.
> 
> Hmm.  This all gets a bit awkward.  I realize that early checking
> gets us less compile-time unnecessarily spent for searching for
> a tail call - but at least for the musttail case parsing constraints
> should put a practical limit on how far to look?

All the top level checks are for obscure situations, so it's unlikely
that it makes much difference for compile time either way.

> 
> So what I wonder is whether it would be better to separate
> searching for a (musttail) candidate separate from validation?
> 
> We could for example invoke find_tail_calls twice, once to
> find a musttail candidate (can there be multiple ones?) and once
> to validate and error?  Would that make the delaying less awkward?

There can be multiple musttails in a function, in theory
one for every return.

I'm not sure I see the awkward part? (other than perhaps
the not-quite-natural accumulation of opt_tailcalls). There
are alots of checks before and after discovery. This just
moves them all to be after.

If the top level checks were done based on a discovered 
list you would need extra loops to walk the candidates 
later and error. It wouldn't be any simpler at least.

Overall the logic in this pass is rather convoluted and
could deserve some cleanups and separation of concerns.
e.g. it would be better to separate tail calls and tail
recursion. But I'm not trying to rewrite the pass here.

-Andi
Richard Biener July 8, 2024, 6:55 a.m. UTC | #3
On Sat, Jul 6, 2024 at 7:08 PM Andi Kleen <ak@linux.intel.com> wrote:
>
> On Fri, Jul 05, 2024 at 01:45:17PM +0200, Richard Biener wrote:
> > On Sat, Jun 22, 2024 at 9:00 PM Andi Kleen <ak@linux.intel.com> wrote:
> > >
> > > Move the error reporting for caller attributes to be
> > > after the tail call discovery, so that we can give proper
> > > error messages tagged to the calls.
> >
> > Hmm.  This all gets a bit awkward.  I realize that early checking
> > gets us less compile-time unnecessarily spent for searching for
> > a tail call - but at least for the musttail case parsing constraints
> > should put a practical limit on how far to look?
>
> All the top level checks are for obscure situations, so it's unlikely
> that it makes much difference for compile time either way.
>
> >
> > So what I wonder is whether it would be better to separate
> > searching for a (musttail) candidate separate from validation?
> >
> > We could for example invoke find_tail_calls twice, once to
> > find a musttail candidate (can there be multiple ones?) and once
> > to validate and error?  Would that make the delaying less awkward?
>
> There can be multiple musttails in a function, in theory
> one for every return.
>
> I'm not sure I see the awkward part? (other than perhaps
> the not-quite-natural accumulation of opt_tailcalls). There
> are alots of checks before and after discovery. This just
> moves them all to be after.
>
> If the top level checks were done based on a discovered
> list you would need extra loops to walk the candidates
> later and error. It wouldn't be any simpler at least.

Thanks for clarifying.

> Overall the logic in this pass is rather convoluted and
> could deserve some cleanups and separation of concerns.
> e.g. it would be better to separate tail calls and tail
> recursion. But I'm not trying to rewrite the pass here.

Understood.  For a v9, can you squash the tree-tailcall.cc changes
please?

Thanks,
Richard.

> -Andi
Andi Kleen July 8, 2024, 3:27 p.m. UTC | #4
> > Overall the logic in this pass is rather convoluted and
> > could deserve some cleanups and separation of concerns.
> > e.g. it would be better to separate tail calls and tail
> > recursion. But I'm not trying to rewrite the pass here.
> 
> Understood.  For a v9, can you squash the tree-tailcall.cc changes
> please?

I squashed all the tree-tailcall error changes. The new pass is still
a separate patch. I would prefer to keep it this way.

BTW I'm surprised you prefer that. Normally smaller patches are better
if bisecting is needed.

-Andi
diff mbox series

Patch

diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc
index 4687e20e61d0..a77fa1511415 100644
--- a/gcc/tree-tailcall.cc
+++ b/gcc/tree-tailcall.cc
@@ -133,14 +133,20 @@  static tree m_acc, a_acc;
 
 static bitmap tailr_arg_needs_copy;
 
+static void maybe_error_musttail (gcall *call, const char *err);
+
 /* Returns false when the function is not suitable for tail call optimization
-   from some reason (e.g. if it takes variable number of arguments).  */
+   from some reason (e.g. if it takes variable number of arguments). CALL
+   is call to report for.  */
 
 static bool
-suitable_for_tail_opt_p (void)
+suitable_for_tail_opt_p (gcall *call)
 {
   if (cfun->stdarg)
-    return false;
+    {
+      maybe_error_musttail (call, _("caller uses stdargs"));
+      return false;
+    }
 
   return true;
 }
@@ -148,35 +154,47 @@  suitable_for_tail_opt_p (void)
 /* Returns false when the function is not suitable for tail call optimization
    for some reason (e.g. if it takes variable number of arguments).
    This test must pass in addition to suitable_for_tail_opt_p in order to make
-   tail call discovery happen.  */
+   tail call discovery happen. CALL is call to report error for.  */
 
 static bool
-suitable_for_tail_call_opt_p (void)
+suitable_for_tail_call_opt_p (gcall *call)
 {
   tree param;
 
   /* alloca (until we have stack slot life analysis) inhibits
      sibling call optimizations, but not tail recursion.  */
   if (cfun->calls_alloca)
-    return false;
+    {
+      maybe_error_musttail (call, _("caller uses alloca"));
+      return false;
+    }
 
   /* If we are using sjlj exceptions, we may need to add a call to
      _Unwind_SjLj_Unregister at exit of the function.  Which means
      that we cannot do any sibcall transformations.  */
   if (targetm_common.except_unwind_info (&global_options) == UI_SJLJ
       && current_function_has_exception_handlers ())
-    return false;
+    {
+      maybe_error_musttail (call, _("caller uses sjlj exceptions"));
+      return false;
+    }
 
   /* Any function that calls setjmp might have longjmp called from
      any called function.  ??? We really should represent this
      properly in the CFG so that this needn't be special cased.  */
   if (cfun->calls_setjmp)
-    return false;
+    {
+      maybe_error_musttail (call, _("caller uses setjmp"));
+      return false;
+    }
 
   /* Various targets don't handle tail calls correctly in functions
      that call __builtin_eh_return.  */
   if (cfun->calls_eh_return)
-    return false;
+    {
+      maybe_error_musttail (call, _("caller uses __builtin_eh_return"));
+      return false;
+    }
 
   /* ??? It is OK if the argument of a function is taken in some cases,
      but not in all cases.  See PR15387 and PR19616.  Revisit for 4.1.  */
@@ -184,7 +202,10 @@  suitable_for_tail_call_opt_p (void)
        param;
        param = DECL_CHAIN (param))
     if (TREE_ADDRESSABLE (param))
-      return false;
+      {
+	maybe_error_musttail (call, _("address of caller arguments taken"));
+	return false;
+      }
 
   return true;
 }
@@ -445,10 +466,12 @@  static live_vars_map *live_vars;
 static vec<bitmap_head> live_vars_vec;
 
 /* Finds tailcalls falling into basic block BB. The list of found tailcalls is
-   added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail.  */
+   added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail.
+   Update OPT_TAILCALLS as output parameter.  */
 
 static void
-find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail)
+find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail,
+		 bool &opt_tailcalls)
 {
   tree ass_var = NULL_TREE, ret_var, func, param;
   gimple *stmt;
@@ -526,11 +549,17 @@  find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail)
       edge_iterator ei;
       /* Recurse to the predecessors.  */
       FOR_EACH_EDGE (e, ei, bb->preds)
-	find_tail_calls (e->src, ret, only_musttail);
+	find_tail_calls (e->src, ret, only_musttail, opt_tailcalls);
 
       return;
     }
 
+  if (!suitable_for_tail_opt_p (call))
+    return;
+
+  if (!suitable_for_tail_call_opt_p (call))
+    opt_tailcalls = false;
+
   /* If the LHS of our call is not just a simple register or local
      variable, we can't transform this into a tail or sibling call.
      This situation happens, in (e.g.) "*p = foo()" where foo returns a
@@ -1200,17 +1229,12 @@  tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall)
   tree param;
   edge_iterator ei;
 
-  if (!suitable_for_tail_opt_p ())
-    return 0;
-  if (opt_tailcalls)
-    opt_tailcalls = suitable_for_tail_call_opt_p ();
-
   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
     {
       /* Only traverse the normal exits, i.e. those that end with return
 	 statement.  */
       if (safe_is_a <greturn *> (*gsi_last_bb (e->src)))
-	find_tail_calls (e->src, &tailcalls, only_mustcall);
+	find_tail_calls (e->src, &tailcalls, only_mustcall, opt_tailcalls);
     }
 
   if (live_vars)