Message ID | 20240622185557.1589179-8-ak@linux.intel.com |
---|---|
State | New |
Headers | show |
Series | [v8,01/12] Improve must tail in RTL backend | expand |
On Sat, Jun 22, 2024 at 8:59 PM Andi Kleen <ak@linux.intel.com> wrote: > > Enable the tailcall optimization for non optimizing builds, > but in this case only checks calls that have the musttail attribute set. > This makes musttail work without optimization. > > This is done with a new late musttail pass that is only active when > not optimizing. The new pass relies on tree-cfg to discover musttails. > This avoids a ~0.8% compiler run time penalty at -O0. > > gcc/ChangeLog: > > * function.h (struct function): Add has_musttail. > * lto-streamer-in.cc (input_struct_function_base): Stream > has_musttail. > * lto-streamer-out.cc (output_struct_function_base): Dito. > * passes.def (pass_musttail): Add. > * tree-cfg.cc (notice_special_calls): Record has_musttail. > (clear_special_calls): Clear has_musttail. > * tree-pass.h (make_pass_musttail): Add. > * tree-tailcall.cc (find_tail_calls): Handle only_musttail > argument. > (tree_optimize_tail_calls_1): Pass on only_musttail. > (execute_tail_calls): Pass only_musttail as false. > (class pass_musttail): Add. > (make_pass_musttail): Add. > --- > gcc/function.h | 3 ++ > gcc/lto-streamer-in.cc | 1 + > gcc/lto-streamer-out.cc | 1 + > gcc/passes.def | 1 + > gcc/tree-cfg.cc | 3 ++ > gcc/tree-pass.h | 1 + > gcc/tree-tailcall.cc | 66 +++++++++++++++++++++++++++++++++++------ > 7 files changed, 67 insertions(+), 9 deletions(-) > > diff --git a/gcc/function.h b/gcc/function.h > index c0ba6cc1531a..fbeadeaf4104 100644 > --- a/gcc/function.h > +++ b/gcc/function.h > @@ -430,6 +430,9 @@ struct GTY(()) function { > /* Nonzero when the tail call has been identified. */ > unsigned int tail_call_marked : 1; > > + /* Has musttail marked calls. */ > + unsigned int has_musttail : 1; > + > /* Nonzero if the current function contains a #pragma GCC unroll. */ > unsigned int has_unroll : 1; > > diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc > index ad0ca24007a0..2e592be80823 100644 > --- a/gcc/lto-streamer-in.cc > +++ b/gcc/lto-streamer-in.cc > @@ -1325,6 +1325,7 @@ input_struct_function_base (struct function *fn, class data_in *data_in, > fn->calls_eh_return = bp_unpack_value (&bp, 1); > fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1); > fn->has_simduid_loops = bp_unpack_value (&bp, 1); > + fn->has_musttail = bp_unpack_value (&bp, 1); > fn->assume_function = bp_unpack_value (&bp, 1); > fn->va_list_fpr_size = bp_unpack_value (&bp, 8); > fn->va_list_gpr_size = bp_unpack_value (&bp, 8); > diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc > index d4f728094ed5..0be381abbd96 100644 > --- a/gcc/lto-streamer-out.cc > +++ b/gcc/lto-streamer-out.cc > @@ -2290,6 +2290,7 @@ output_struct_function_base (struct output_block *ob, struct function *fn) > bp_pack_value (&bp, fn->calls_eh_return, 1); > bp_pack_value (&bp, fn->has_force_vectorize_loops, 1); > bp_pack_value (&bp, fn->has_simduid_loops, 1); > + bp_pack_value (&bp, fn->has_musttail, 1); > bp_pack_value (&bp, fn->assume_function, 1); > bp_pack_value (&bp, fn->va_list_fpr_size, 8); > bp_pack_value (&bp, fn->va_list_gpr_size, 8); > diff --git a/gcc/passes.def b/gcc/passes.def > index 041229e47a68..5b5390e6ac0b 100644 > --- a/gcc/passes.def > +++ b/gcc/passes.def > @@ -444,6 +444,7 @@ along with GCC; see the file COPYING3. If not see > NEXT_PASS (pass_tsan_O0); > NEXT_PASS (pass_sanopt); > NEXT_PASS (pass_cleanup_eh); > + NEXT_PASS (pass_musttail); > NEXT_PASS (pass_lower_resx); > NEXT_PASS (pass_nrv); > NEXT_PASS (pass_gimple_isel); > diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc > index 7fb7b92966be..e6fd1294b958 100644 > --- a/gcc/tree-cfg.cc > +++ b/gcc/tree-cfg.cc > @@ -2290,6 +2290,8 @@ notice_special_calls (gcall *call) > cfun->calls_alloca = true; > if (flags & ECF_RETURNS_TWICE) > cfun->calls_setjmp = true; > + if (gimple_call_must_tail_p (call)) > + cfun->has_musttail = true; > } > > > @@ -2301,6 +2303,7 @@ clear_special_calls (void) > { > cfun->calls_alloca = false; > cfun->calls_setjmp = false; > + cfun->has_musttail = false; > } > > /* Remove PHI nodes associated with basic block BB and all edges out of BB. */ > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index edebb2be245d..59e53558034f 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -368,6 +368,7 @@ extern gimple_opt_pass *make_pass_sra (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_sra_early (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tail_recursion (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tail_calls (gcc::context *ctxt); > +extern gimple_opt_pass *make_pass_musttail (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_fix_loops (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt); > extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt); > diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc > index e9f7f8a12b3a..0c6df10e64f7 100644 > --- a/gcc/tree-tailcall.cc > +++ b/gcc/tree-tailcall.cc > @@ -408,10 +408,10 @@ static live_vars_map *live_vars; > static vec<bitmap_head> live_vars_vec; > > /* Finds tailcalls falling into basic block BB. The list of found tailcalls is > - added to the start of RET. */ > + added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. */ > > static void > -find_tail_calls (basic_block bb, struct tailcall **ret) > +find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail) > { > tree ass_var = NULL_TREE, ret_var, func, param; > gimple *stmt; > @@ -445,6 +445,9 @@ find_tail_calls (basic_block bb, struct tailcall **ret) > if (is_gimple_call (stmt)) > { > call = as_a <gcall *> (stmt); > + /* Handle only musttail calls when not optimizing. */ > + if (only_musttail && !gimple_call_must_tail_p (call)) > + return; > ass_var = gimple_call_lhs (call); > break; > } > @@ -467,7 +470,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret) > edge_iterator ei; > /* Recurse to the predecessors. */ > FOR_EACH_EDGE (e, ei, bb->preds) > - find_tail_calls (e->src, ret); > + find_tail_calls (e->src, ret, only_musttail); > > return; > } > @@ -528,7 +531,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret) > func = gimple_call_fndecl (call); > if (func > && !fndecl_built_in_p (func) > - && recursive_call_p (current_function_decl, func)) > + && recursive_call_p (current_function_decl, func) > + && !only_musttail) > { > tree arg; > > @@ -1094,10 +1098,11 @@ create_tailcall_accumulator (const char *label, basic_block bb, tree init) > } > > /* Optimizes tail calls in the function, turning the tail recursion > - into iteration. */ > + into iteration. When ONLY_MUSTCALL is true only optimize mustcall > + marked calls. */ > > static unsigned int > -tree_optimize_tail_calls_1 (bool opt_tailcalls) > +tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall) > { > edge e; > bool phis_constructed = false; > @@ -1117,7 +1122,7 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls) > /* Only traverse the normal exits, i.e. those that end with return > statement. */ > if (safe_is_a <greturn *> (*gsi_last_bb (e->src))) > - find_tail_calls (e->src, &tailcalls); > + find_tail_calls (e->src, &tailcalls, only_mustcall); > } > > if (live_vars) > @@ -1228,7 +1233,7 @@ gate_tail_calls (void) > static unsigned int > execute_tail_calls (void) > { > - return tree_optimize_tail_calls_1 (true); > + return tree_optimize_tail_calls_1 (true, false); > } > > namespace { > @@ -1261,7 +1266,7 @@ public: > bool gate (function *) final override { return gate_tail_calls (); } > unsigned int execute (function *) final override > { > - return tree_optimize_tail_calls_1 (false); > + return tree_optimize_tail_calls_1 (false, false); > } > > }; // class pass_tail_recursion > @@ -1312,3 +1317,46 @@ make_pass_tail_calls (gcc::context *ctxt) > { > return new pass_tail_calls (ctxt); > } > + > +namespace { > + > +const pass_data pass_data_musttail = > +{ > + GIMPLE_PASS, /* type */ > + "musttail", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_NONE, /* tv_id */ > + ( PROP_cfg | PROP_ssa ), /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + 0, /* todo_flags_finish */ > +}; > + > +class pass_musttail : public gimple_opt_pass > +{ > +public: > + pass_musttail (gcc::context *ctxt) > + : gimple_opt_pass (pass_data_musttail, ctxt) > + {} > + > + /* opt_pass methods: */ > + /* This pass is only used when not optimizing to make [[musttail]] still > + work. */ > + bool gate (function *) final override { return !flag_optimize_sibling_calls; } Shouldn't this check f->has_musttail only? That is, I would expect -fno-optimize-sibling-calls to still tail-call [[musttail]]? The comment says the pass only runs when not optimizing - so maybe you wanted to do return optimize == 0;? Otherwise this patch looks OK. > + unsigned int execute (function *f) final override > + { > + if (!f->has_musttail) > + return 0; > + return tree_optimize_tail_calls_1 (true, true); > + } > + > +}; // class pass_musttail > + > +} // anon namespace > + > +gimple_opt_pass * > +make_pass_musttail (gcc::context *ctxt) > +{ > + return new pass_musttail (ctxt); > +} > -- > 2.45.2 >
> > +class pass_musttail : public gimple_opt_pass > > +{ > > +public: > > + pass_musttail (gcc::context *ctxt) > > + : gimple_opt_pass (pass_data_musttail, ctxt) > > + {} > > + > > + /* opt_pass methods: */ > > + /* This pass is only used when not optimizing to make [[musttail]] still > > + work. */ > > + bool gate (function *) final override { return !flag_optimize_sibling_calls; } > > Shouldn't this check f->has_musttail only? That is, I would expect > -fno-optimize-sibling-calls to still tail-call [[musttail]]? The comment says > the pass only runs when not optimizing - so maybe you wanted to do > return optimize == 0;? When flag_optimize_sibling_call is set the other tailcall pass will take care of the musttails. It is only needed when that one doesn't run. So I think looking at that flag is correct. But I should move the f->has_musttail check into the gate (done) and clarified the comment because it is not specific to optimizing. Thanks, -Andi
On Sat, Jul 6, 2024 at 6:07 PM Andi Kleen <ak@linux.intel.com> wrote: > > > > +class pass_musttail : public gimple_opt_pass > > > +{ > > > +public: > > > + pass_musttail (gcc::context *ctxt) > > > + : gimple_opt_pass (pass_data_musttail, ctxt) > > > + {} > > > + > > > + /* opt_pass methods: */ > > > + /* This pass is only used when not optimizing to make [[musttail]] still > > > + work. */ > > > + bool gate (function *) final override { return !flag_optimize_sibling_calls; } > > > > Shouldn't this check f->has_musttail only? That is, I would expect > > -fno-optimize-sibling-calls to still tail-call [[musttail]]? The comment says > > the pass only runs when not optimizing - so maybe you wanted to do > > return optimize == 0;? > > When flag_optimize_sibling_call is set the other tailcall pass will > take care of the musttails. It is only needed when that one doesn't run. > So I think looking at that flag is correct. Ah, I see. So this pass is responsible for both -O0 and -fno-optimized-sibling-calls. But I'm quite sure the other pass doesn't run with -O0 -foptimize-sibling-calls, does it? > But I should move the f->has_musttail check into the gate (done) and > clarified the comment because it is not specific to optimizing. Yeah, clarifying the comment would be nice. Thanks, Richard. > Thanks, > -Andi
On Mon, Jul 08, 2024 at 08:53:27AM +0200, Richard Biener wrote: > Ah, I see. So this pass is responsible for both -O0 and > -fno-optimized-sibling-calls. > But I'm quite sure the other pass doesn't run with -O0 > -foptimize-sibling-calls, does it? It does run: ./cc1 -O0 -fdump-passes -foptimize-sibling-calls t.c 2>&1 | grep tail tree-tailr1 : ON tree-tailr2 : ON tree-tailc : ON But I suspect without the earlier expand patch to adjust the cfg rebuild it may ICE on some of the targets. -Andi
> Am 08.07.2024 um 17:22 schrieb Andi Kleen <ak@linux.intel.com>: > > On Mon, Jul 08, 2024 at 08:53:27AM +0200, Richard Biener wrote: >> Ah, I see. So this pass is responsible for both -O0 and >> -fno-optimized-sibling-calls. >> But I'm quite sure the other pass doesn't run with -O0 >> -foptimize-sibling-calls, does it? > > It does run: > > ./cc1 -O0 -fdump-passes -foptimize-sibling-calls t.c 2>&1 | grep tail > tree-tailr1 : ON > tree-tailr2 : ON > tree-tailc : ON I would not trust -fdump-passes, IIRC that just executes the gate functions. > But I suspect without the earlier expand patch to adjust the cfg rebuild it may > ICE on some of the targets. > > -Andi
On Mon, Jul 08, 2024 at 05:27:53PM +0200, Richard Biener wrote: > > > > Am 08.07.2024 um 17:22 schrieb Andi Kleen <ak@linux.intel.com>: > > > > On Mon, Jul 08, 2024 at 08:53:27AM +0200, Richard Biener wrote: > >> Ah, I see. So this pass is responsible for both -O0 and > >> -fno-optimized-sibling-calls. > >> But I'm quite sure the other pass doesn't run with -O0 > >> -foptimize-sibling-calls, does it? > > > > It does run: > > > > ./cc1 -O0 -fdump-passes -foptimize-sibling-calls t.c 2>&1 | grep tail > > tree-tailr1 : ON > > tree-tailr2 : ON > > tree-tailc : ON > > I would not trust -fdump-passes, IIRC that just executes the gate functions. You're right it's not invoked according to gdb. So the musttail gate needs a || optimize == 0. -Andi
diff --git a/gcc/function.h b/gcc/function.h index c0ba6cc1531a..fbeadeaf4104 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -430,6 +430,9 @@ struct GTY(()) function { /* Nonzero when the tail call has been identified. */ unsigned int tail_call_marked : 1; + /* Has musttail marked calls. */ + unsigned int has_musttail : 1; + /* Nonzero if the current function contains a #pragma GCC unroll. */ unsigned int has_unroll : 1; diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc index ad0ca24007a0..2e592be80823 100644 --- a/gcc/lto-streamer-in.cc +++ b/gcc/lto-streamer-in.cc @@ -1325,6 +1325,7 @@ input_struct_function_base (struct function *fn, class data_in *data_in, fn->calls_eh_return = bp_unpack_value (&bp, 1); fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1); fn->has_simduid_loops = bp_unpack_value (&bp, 1); + fn->has_musttail = bp_unpack_value (&bp, 1); fn->assume_function = bp_unpack_value (&bp, 1); fn->va_list_fpr_size = bp_unpack_value (&bp, 8); fn->va_list_gpr_size = bp_unpack_value (&bp, 8); diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc index d4f728094ed5..0be381abbd96 100644 --- a/gcc/lto-streamer-out.cc +++ b/gcc/lto-streamer-out.cc @@ -2290,6 +2290,7 @@ output_struct_function_base (struct output_block *ob, struct function *fn) bp_pack_value (&bp, fn->calls_eh_return, 1); bp_pack_value (&bp, fn->has_force_vectorize_loops, 1); bp_pack_value (&bp, fn->has_simduid_loops, 1); + bp_pack_value (&bp, fn->has_musttail, 1); bp_pack_value (&bp, fn->assume_function, 1); bp_pack_value (&bp, fn->va_list_fpr_size, 8); bp_pack_value (&bp, fn->va_list_gpr_size, 8); diff --git a/gcc/passes.def b/gcc/passes.def index 041229e47a68..5b5390e6ac0b 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -444,6 +444,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_tsan_O0); NEXT_PASS (pass_sanopt); NEXT_PASS (pass_cleanup_eh); + NEXT_PASS (pass_musttail); NEXT_PASS (pass_lower_resx); NEXT_PASS (pass_nrv); NEXT_PASS (pass_gimple_isel); diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index 7fb7b92966be..e6fd1294b958 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -2290,6 +2290,8 @@ notice_special_calls (gcall *call) cfun->calls_alloca = true; if (flags & ECF_RETURNS_TWICE) cfun->calls_setjmp = true; + if (gimple_call_must_tail_p (call)) + cfun->has_musttail = true; } @@ -2301,6 +2303,7 @@ clear_special_calls (void) { cfun->calls_alloca = false; cfun->calls_setjmp = false; + cfun->has_musttail = false; } /* Remove PHI nodes associated with basic block BB and all edges out of BB. */ diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index edebb2be245d..59e53558034f 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -368,6 +368,7 @@ extern gimple_opt_pass *make_pass_sra (gcc::context *ctxt); extern gimple_opt_pass *make_pass_sra_early (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tail_recursion (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tail_calls (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_musttail (gcc::context *ctxt); extern gimple_opt_pass *make_pass_fix_loops (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt); diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc index e9f7f8a12b3a..0c6df10e64f7 100644 --- a/gcc/tree-tailcall.cc +++ b/gcc/tree-tailcall.cc @@ -408,10 +408,10 @@ static live_vars_map *live_vars; static vec<bitmap_head> live_vars_vec; /* Finds tailcalls falling into basic block BB. The list of found tailcalls is - added to the start of RET. */ + added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. */ static void -find_tail_calls (basic_block bb, struct tailcall **ret) +find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail) { tree ass_var = NULL_TREE, ret_var, func, param; gimple *stmt; @@ -445,6 +445,9 @@ find_tail_calls (basic_block bb, struct tailcall **ret) if (is_gimple_call (stmt)) { call = as_a <gcall *> (stmt); + /* Handle only musttail calls when not optimizing. */ + if (only_musttail && !gimple_call_must_tail_p (call)) + return; ass_var = gimple_call_lhs (call); break; } @@ -467,7 +470,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret) edge_iterator ei; /* Recurse to the predecessors. */ FOR_EACH_EDGE (e, ei, bb->preds) - find_tail_calls (e->src, ret); + find_tail_calls (e->src, ret, only_musttail); return; } @@ -528,7 +531,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret) func = gimple_call_fndecl (call); if (func && !fndecl_built_in_p (func) - && recursive_call_p (current_function_decl, func)) + && recursive_call_p (current_function_decl, func) + && !only_musttail) { tree arg; @@ -1094,10 +1098,11 @@ create_tailcall_accumulator (const char *label, basic_block bb, tree init) } /* Optimizes tail calls in the function, turning the tail recursion - into iteration. */ + into iteration. When ONLY_MUSTCALL is true only optimize mustcall + marked calls. */ static unsigned int -tree_optimize_tail_calls_1 (bool opt_tailcalls) +tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_mustcall) { edge e; bool phis_constructed = false; @@ -1117,7 +1122,7 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls) /* Only traverse the normal exits, i.e. those that end with return statement. */ if (safe_is_a <greturn *> (*gsi_last_bb (e->src))) - find_tail_calls (e->src, &tailcalls); + find_tail_calls (e->src, &tailcalls, only_mustcall); } if (live_vars) @@ -1228,7 +1233,7 @@ gate_tail_calls (void) static unsigned int execute_tail_calls (void) { - return tree_optimize_tail_calls_1 (true); + return tree_optimize_tail_calls_1 (true, false); } namespace { @@ -1261,7 +1266,7 @@ public: bool gate (function *) final override { return gate_tail_calls (); } unsigned int execute (function *) final override { - return tree_optimize_tail_calls_1 (false); + return tree_optimize_tail_calls_1 (false, false); } }; // class pass_tail_recursion @@ -1312,3 +1317,46 @@ make_pass_tail_calls (gcc::context *ctxt) { return new pass_tail_calls (ctxt); } + +namespace { + +const pass_data pass_data_musttail = +{ + GIMPLE_PASS, /* type */ + "musttail", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_musttail : public gimple_opt_pass +{ +public: + pass_musttail (gcc::context *ctxt) + : gimple_opt_pass (pass_data_musttail, ctxt) + {} + + /* opt_pass methods: */ + /* This pass is only used when not optimizing to make [[musttail]] still + work. */ + bool gate (function *) final override { return !flag_optimize_sibling_calls; } + unsigned int execute (function *f) final override + { + if (!f->has_musttail) + return 0; + return tree_optimize_tail_calls_1 (true, true); + } + +}; // class pass_musttail + +} // anon namespace + +gimple_opt_pass * +make_pass_musttail (gcc::context *ctxt) +{ + return new pass_musttail (ctxt); +}