Message ID | 20240305214521.326316-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2] tree-profile: Don't instrument an IFUNC resolver nor its callees | expand |
On Tue, Mar 5, 2024 at 1:45 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > We can't instrument an IFUNC resolver nor its callees as it may require > TLS which hasn't been set up yet when the dynamic linker is resolving > IFUNC symbols. > > Add an IFUNC resolver caller marker to cgraph_node and set it if the > function is called by an IFUNC resolver. Update tree_profiling to skip > functions called by IFUNC resolver. > > Tested with profiledbootstrap on Fedora 39/x86-64. > > gcc/ChangeLog: > > PR tree-optimization/114115 > * cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes. > (cgraph_node): Add called_by_ifunc_resolver. > * cgraphunit.cc (symbol_table::compile): Call > symtab_node::check_ifunc_callee_symtab_nodes. > * symtab.cc (check_ifunc_resolver): New. > (ifunc_ref_map): Likewise. > (is_caller_ifunc_resolver): Likewise. > (symtab_node::check_ifunc_callee_symtab_nodes): Likewise. > * tree-profile.cc (tree_profiling): Do not instrument an IFUNC > resolver nor its callees. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/114115 > * gcc.dg/pr114115.c: New test. > --- > gcc/cgraph.h | 6 +++ > gcc/cgraphunit.cc | 2 + > gcc/symtab.cc | 89 +++++++++++++++++++++++++++++++++ > gcc/testsuite/gcc.dg/pr114115.c | 24 +++++++++ > gcc/tree-profile.cc | 4 ++ > 5 files changed, 125 insertions(+) > create mode 100644 gcc/testsuite/gcc.dg/pr114115.c > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index 47f35e8078d..a8c3224802c 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -479,6 +479,9 @@ public: > Return NULL if there's no such node. */ > static symtab_node *get_for_asmname (const_tree asmname); > > + /* Check symbol table for callees of IFUNC resolvers. */ > + static void check_ifunc_callee_symtab_nodes (void); > + > /* Verify symbol table for internal consistency. */ > static DEBUG_FUNCTION void verify_symtab_nodes (void); > > @@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node > redefined_extern_inline (false), tm_may_enter_irr (false), > ipcp_clone (false), declare_variant_alt (false), > calls_declare_variant_alt (false), gc_candidate (false), > + called_by_ifunc_resolver (false), > m_uid (uid), m_summary_id (-1) > {} > > @@ -1495,6 +1499,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node > is set for local SIMD clones when they are created and cleared if the > vectorizer uses them. */ > unsigned gc_candidate : 1; > + /* Set if the function is called by an IFUNC resolver. */ > + unsigned called_by_ifunc_resolver : 1; > > private: > /* Unique id of the node. */ > diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc > index d200166f7e9..2bd0289ffba 100644 > --- a/gcc/cgraphunit.cc > +++ b/gcc/cgraphunit.cc > @@ -2317,6 +2317,8 @@ symbol_table::compile (void) > > symtab_node::checking_verify_symtab_nodes (); > > + symtab_node::check_ifunc_callee_symtab_nodes (); > + > timevar_push (TV_CGRAPHOPT); > if (pre_ipa_mem_report) > dump_memory_report ("Memory consumption before IPA"); > diff --git a/gcc/symtab.cc b/gcc/symtab.cc > index 4c7e3c135ca..3256133891d 100644 > --- a/gcc/symtab.cc > +++ b/gcc/symtab.cc > @@ -1369,6 +1369,95 @@ symtab_node::verify (void) > timevar_pop (TV_CGRAPH_VERIFY); > } > > +/* Return true and set *DATA to true if NODE is an ifunc resolver. */ > + > +static bool > +check_ifunc_resolver (cgraph_node *node, void *data) > +{ > + if (node->ifunc_resolver) > + { > + bool *is_ifunc_resolver = (bool *) data; > + *is_ifunc_resolver = true; > + return true; > + } > + return false; > +} > + > +static auto_bitmap ifunc_ref_map; > + > +/* Return true if any caller of NODE is an ifunc resolver. */ > + > +static bool > +is_caller_ifunc_resolver (cgraph_node *node) > +{ > + bool is_ifunc_resolver = false; > + > + for (cgraph_edge *e = node->callers; e; e = e->next_caller) > + { > + /* Return true if caller is known to be an IFUNC resolver. */ > + if (e->caller->called_by_ifunc_resolver) > + return true; > + > + /* Check for recursive call. */ > + if (e->caller == node) > + continue; > + > + /* Skip if it has been visited. */ > + unsigned int uid = e->caller->get_uid (); > + if (bitmap_bit_p (ifunc_ref_map, uid)) > + continue; > + bitmap_set_bit (ifunc_ref_map, uid); > + > + if (is_caller_ifunc_resolver (e->caller)) > + { > + /* Return true if caller is an IFUNC resolver. */ > + e->caller->called_by_ifunc_resolver = true; > + return true; > + } > + > + /* Check if caller's alias is an IFUNC resolver. */ > + e->caller->call_for_symbol_and_aliases (check_ifunc_resolver, > + &is_ifunc_resolver, > + true); > + if (is_ifunc_resolver) > + { > + /* Return true if caller's alias is an IFUNC resolver. */ > + e->caller->called_by_ifunc_resolver = true; > + return true; > + } > + } > + > + return false; > +} > + > +/* Check symbol table for callees of IFUNC resolvers. */ > + > +void > +symtab_node::check_ifunc_callee_symtab_nodes (void) > +{ > + symtab_node *node; > + > + FOR_EACH_SYMBOL (node) > + { > + cgraph_node *cnode = dyn_cast <cgraph_node *> (node); > + if (!cnode) > + continue; > + > + unsigned int uid = cnode->get_uid (); > + if (bitmap_bit_p (ifunc_ref_map, uid)) > + continue; > + bitmap_set_bit (ifunc_ref_map, uid); > + > + bool is_ifunc_resolver = false; > + cnode->call_for_symbol_and_aliases (check_ifunc_resolver, > + &is_ifunc_resolver, true); > + if (is_ifunc_resolver || is_caller_ifunc_resolver (cnode)) > + cnode->called_by_ifunc_resolver = true; > + } > + > + bitmap_clear (ifunc_ref_map); > +} > + > /* Verify symbol table for internal consistency. */ > > DEBUG_FUNCTION void > diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c > new file mode 100644 > index 00000000000..2629f591877 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr114115.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */ > +/* { dg-require-profiling "-fprofile-generate" } */ > +/* { dg-require-ifunc "" } */ > + > +void *foo_ifunc2() __attribute__((ifunc("foo_resolver"))); > + > +void bar(void) > +{ > +} > + > +static int f3() > +{ > + bar (); > + return 5; > +} > + > +void (*foo_resolver(void))(void) > +{ > + f3(); > + return bar; > +} > + > +/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" "optimized" } } */ > diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc > index aed13e2b1bc..2e55a5d8402 100644 > --- a/gcc/tree-profile.cc > +++ b/gcc/tree-profile.cc > @@ -848,6 +848,10 @@ tree_profiling (void) > } > } > > + /* Do not instrument an IFUNC resolver nor its callees. */ > + if (node->called_by_ifunc_resolver) > + continue; > + > push_cfun (DECL_STRUCT_FUNCTION (node->decl)); > > if (dump_file) > -- > 2.44.0 > PING.
> On Tue, Mar 5, 2024 at 1:45 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > We can't instrument an IFUNC resolver nor its callees as it may require > > TLS which hasn't been set up yet when the dynamic linker is resolving > > IFUNC symbols. > > > > Add an IFUNC resolver caller marker to cgraph_node and set it if the > > function is called by an IFUNC resolver. Update tree_profiling to skip > > functions called by IFUNC resolver. > > > > Tested with profiledbootstrap on Fedora 39/x86-64. > > > > gcc/ChangeLog: > > > > PR tree-optimization/114115 > > * cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes. > > (cgraph_node): Add called_by_ifunc_resolver. > > * cgraphunit.cc (symbol_table::compile): Call > > symtab_node::check_ifunc_callee_symtab_nodes. > > * symtab.cc (check_ifunc_resolver): New. > > (ifunc_ref_map): Likewise. > > (is_caller_ifunc_resolver): Likewise. > > (symtab_node::check_ifunc_callee_symtab_nodes): Likewise. > > * tree-profile.cc (tree_profiling): Do not instrument an IFUNC > > resolver nor its callees. > > > > gcc/testsuite/ChangeLog: > > > > PR tree-optimization/114115 > > * gcc.dg/pr114115.c: New test. > > PING. I am bit worried about commonly used functions getting "infected" by being called once from ifunc resolver. I think we only use thread local storage for indirect call profiling, so we may just disable indirect call profiling for these functions. Also the patch will be noop with -flto -flto-partition=max, so probably we need to compute this flag at WPA time and stream to partitions. Honza
On Tue, Apr 2, 2024 at 7:50 AM Jan Hubicka <hubicka@ucw.cz> wrote: > > > On Tue, Mar 5, 2024 at 1:45 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > > > We can't instrument an IFUNC resolver nor its callees as it may require > > > TLS which hasn't been set up yet when the dynamic linker is resolving > > > IFUNC symbols. > > > > > > Add an IFUNC resolver caller marker to cgraph_node and set it if the > > > function is called by an IFUNC resolver. Update tree_profiling to skip > > > functions called by IFUNC resolver. > > > > > > Tested with profiledbootstrap on Fedora 39/x86-64. > > > > > > gcc/ChangeLog: > > > > > > PR tree-optimization/114115 > > > * cgraph.h (symtab_node): Add check_ifunc_callee_symtab_nodes. > > > (cgraph_node): Add called_by_ifunc_resolver. > > > * cgraphunit.cc (symbol_table::compile): Call > > > symtab_node::check_ifunc_callee_symtab_nodes. > > > * symtab.cc (check_ifunc_resolver): New. > > > (ifunc_ref_map): Likewise. > > > (is_caller_ifunc_resolver): Likewise. > > > (symtab_node::check_ifunc_callee_symtab_nodes): Likewise. > > > * tree-profile.cc (tree_profiling): Do not instrument an IFUNC > > > resolver nor its callees. > > > > > > gcc/testsuite/ChangeLog: > > > > > > PR tree-optimization/114115 > > > * gcc.dg/pr114115.c: New test. > > > > PING. > > I am bit worried about commonly used functions getting "infected" by > being called once from ifunc resolver. I think we only use thread local > storage for indirect call profiling, so we may just disable indirect > call profiling for these functions. Will change it. > Also the patch will be noop with -flto -flto-partition=max, so probably > we need to compute this flag at WPA time and stream to partitions. > Why is it a nop with -flto -flto-partition=max? I got (gdb) bt #0 symtab_node::check_ifunc_callee_symtab_nodes () at /export/gnu/import/git/gitlab/x86-gcc/gcc/symtab.cc:1440 #1 0x0000000000e487d3 in symbol_table::compile (this=0x7fffea006000) at /export/gnu/import/git/gitlab/x86-gcc/gcc/cgraphunit.cc:2320 #2 0x0000000000d23ecf in lto_main () at /export/gnu/import/git/gitlab/x86-gcc/gcc/lto/lto.cc:687 #3 0x00000000015254d2 in compile_file () at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:449 #4 0x00000000015284a4 in do_compile () at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2154 #5 0x0000000001528864 in toplev::main (this=0x7fffffffd84a, argc=16, argv=0x42261f0) at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2310 #6 0x00000000030a3fe2 in main (argc=16, argv=0x7fffffffd958) at /export/gnu/import/git/gitlab/x86-gcc/gcc/main.cc:39 Do you have a testcase to show that it is a nop?
> > I am bit worried about commonly used functions getting "infected" by > > being called once from ifunc resolver. I think we only use thread local > > storage for indirect call profiling, so we may just disable indirect > > call profiling for these functions. > > Will change it. > > > Also the patch will be noop with -flto -flto-partition=max, so probably > > we need to compute this flag at WPA time and stream to partitions. > > > > Why is it a nop with -flto -flto-partition=max? I got > > (gdb) bt > #0 symtab_node::check_ifunc_callee_symtab_nodes () > at /export/gnu/import/git/gitlab/x86-gcc/gcc/symtab.cc:1440 > #1 0x0000000000e487d3 in symbol_table::compile (this=0x7fffea006000) > at /export/gnu/import/git/gitlab/x86-gcc/gcc/cgraphunit.cc:2320 > #2 0x0000000000d23ecf in lto_main () > at /export/gnu/import/git/gitlab/x86-gcc/gcc/lto/lto.cc:687 > #3 0x00000000015254d2 in compile_file () > at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:449 > #4 0x00000000015284a4 in do_compile () > at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2154 > #5 0x0000000001528864 in toplev::main (this=0x7fffffffd84a, argc=16, > argv=0x42261f0) at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2310 > #6 0x00000000030a3fe2 in main (argc=16, argv=0x7fffffffd958) > at /export/gnu/import/git/gitlab/x86-gcc/gcc/main.cc:39 > > Do you have a testcase to show that it is a nop? Aha, sorry. I tought this is run during late optimization, but it is done early, so LTo partitioning does not mix things up. So current patch modified to disable only instrumentation that needs TLS should be fine. Honza > > -- > H.J.
On Tue, Apr 2, 2024 at 10:03 AM Jan Hubicka <hubicka@ucw.cz> wrote: > > > > I am bit worried about commonly used functions getting "infected" by > > > being called once from ifunc resolver. I think we only use thread local > > > storage for indirect call profiling, so we may just disable indirect > > > call profiling for these functions. > > > > Will change it. > > > > > Also the patch will be noop with -flto -flto-partition=max, so probably > > > we need to compute this flag at WPA time and stream to partitions. > > > > > > > Why is it a nop with -flto -flto-partition=max? I got > > > > (gdb) bt > > #0 symtab_node::check_ifunc_callee_symtab_nodes () > > at /export/gnu/import/git/gitlab/x86-gcc/gcc/symtab.cc:1440 > > #1 0x0000000000e487d3 in symbol_table::compile (this=0x7fffea006000) > > at /export/gnu/import/git/gitlab/x86-gcc/gcc/cgraphunit.cc:2320 > > #2 0x0000000000d23ecf in lto_main () > > at /export/gnu/import/git/gitlab/x86-gcc/gcc/lto/lto.cc:687 > > #3 0x00000000015254d2 in compile_file () > > at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:449 > > #4 0x00000000015284a4 in do_compile () > > at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2154 > > #5 0x0000000001528864 in toplev::main (this=0x7fffffffd84a, argc=16, > > argv=0x42261f0) at /export/gnu/import/git/gitlab/x86-gcc/gcc/toplev.cc:2310 > > #6 0x00000000030a3fe2 in main (argc=16, argv=0x7fffffffd958) > > at /export/gnu/import/git/gitlab/x86-gcc/gcc/main.cc:39 > > > > Do you have a testcase to show that it is a nop? > Aha, sorry. I tought this is run during late optimization, but it is > done early, so LTo partitioning does not mix things up. So current > patch modified to disable only instrumentation that needs TLS should be > fine. > Done. Here is the v3 patch: https://gcc.gnu.org/pipermail/gcc-patches/2024-April/648733.html
diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 47f35e8078d..a8c3224802c 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -479,6 +479,9 @@ public: Return NULL if there's no such node. */ static symtab_node *get_for_asmname (const_tree asmname); + /* Check symbol table for callees of IFUNC resolvers. */ + static void check_ifunc_callee_symtab_nodes (void); + /* Verify symbol table for internal consistency. */ static DEBUG_FUNCTION void verify_symtab_nodes (void); @@ -896,6 +899,7 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node redefined_extern_inline (false), tm_may_enter_irr (false), ipcp_clone (false), declare_variant_alt (false), calls_declare_variant_alt (false), gc_candidate (false), + called_by_ifunc_resolver (false), m_uid (uid), m_summary_id (-1) {} @@ -1495,6 +1499,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node is set for local SIMD clones when they are created and cleared if the vectorizer uses them. */ unsigned gc_candidate : 1; + /* Set if the function is called by an IFUNC resolver. */ + unsigned called_by_ifunc_resolver : 1; private: /* Unique id of the node. */ diff --git a/gcc/cgraphunit.cc b/gcc/cgraphunit.cc index d200166f7e9..2bd0289ffba 100644 --- a/gcc/cgraphunit.cc +++ b/gcc/cgraphunit.cc @@ -2317,6 +2317,8 @@ symbol_table::compile (void) symtab_node::checking_verify_symtab_nodes (); + symtab_node::check_ifunc_callee_symtab_nodes (); + timevar_push (TV_CGRAPHOPT); if (pre_ipa_mem_report) dump_memory_report ("Memory consumption before IPA"); diff --git a/gcc/symtab.cc b/gcc/symtab.cc index 4c7e3c135ca..3256133891d 100644 --- a/gcc/symtab.cc +++ b/gcc/symtab.cc @@ -1369,6 +1369,95 @@ symtab_node::verify (void) timevar_pop (TV_CGRAPH_VERIFY); } +/* Return true and set *DATA to true if NODE is an ifunc resolver. */ + +static bool +check_ifunc_resolver (cgraph_node *node, void *data) +{ + if (node->ifunc_resolver) + { + bool *is_ifunc_resolver = (bool *) data; + *is_ifunc_resolver = true; + return true; + } + return false; +} + +static auto_bitmap ifunc_ref_map; + +/* Return true if any caller of NODE is an ifunc resolver. */ + +static bool +is_caller_ifunc_resolver (cgraph_node *node) +{ + bool is_ifunc_resolver = false; + + for (cgraph_edge *e = node->callers; e; e = e->next_caller) + { + /* Return true if caller is known to be an IFUNC resolver. */ + if (e->caller->called_by_ifunc_resolver) + return true; + + /* Check for recursive call. */ + if (e->caller == node) + continue; + + /* Skip if it has been visited. */ + unsigned int uid = e->caller->get_uid (); + if (bitmap_bit_p (ifunc_ref_map, uid)) + continue; + bitmap_set_bit (ifunc_ref_map, uid); + + if (is_caller_ifunc_resolver (e->caller)) + { + /* Return true if caller is an IFUNC resolver. */ + e->caller->called_by_ifunc_resolver = true; + return true; + } + + /* Check if caller's alias is an IFUNC resolver. */ + e->caller->call_for_symbol_and_aliases (check_ifunc_resolver, + &is_ifunc_resolver, + true); + if (is_ifunc_resolver) + { + /* Return true if caller's alias is an IFUNC resolver. */ + e->caller->called_by_ifunc_resolver = true; + return true; + } + } + + return false; +} + +/* Check symbol table for callees of IFUNC resolvers. */ + +void +symtab_node::check_ifunc_callee_symtab_nodes (void) +{ + symtab_node *node; + + FOR_EACH_SYMBOL (node) + { + cgraph_node *cnode = dyn_cast <cgraph_node *> (node); + if (!cnode) + continue; + + unsigned int uid = cnode->get_uid (); + if (bitmap_bit_p (ifunc_ref_map, uid)) + continue; + bitmap_set_bit (ifunc_ref_map, uid); + + bool is_ifunc_resolver = false; + cnode->call_for_symbol_and_aliases (check_ifunc_resolver, + &is_ifunc_resolver, true); + if (is_ifunc_resolver || is_caller_ifunc_resolver (cnode)) + cnode->called_by_ifunc_resolver = true; + } + + bitmap_clear (ifunc_ref_map); +} + /* Verify symbol table for internal consistency. */ DEBUG_FUNCTION void diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c new file mode 100644 index 00000000000..2629f591877 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr114115.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */ +/* { dg-require-profiling "-fprofile-generate" } */ +/* { dg-require-ifunc "" } */ + +void *foo_ifunc2() __attribute__((ifunc("foo_resolver"))); + +void bar(void) +{ +} + +static int f3() +{ + bar (); + return 5; +} + +void (*foo_resolver(void))(void) +{ + f3(); + return bar; +} + +/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" "optimized" } } */ diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc index aed13e2b1bc..2e55a5d8402 100644 --- a/gcc/tree-profile.cc +++ b/gcc/tree-profile.cc @@ -848,6 +848,10 @@ tree_profiling (void) } } + /* Do not instrument an IFUNC resolver nor its callees. */ + if (node->called_by_ifunc_resolver) + continue; + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); if (dump_file)