Message ID | ri6a6n8e85o.fsf@suse.cz |
---|---|
State | New |
Headers | show |
Series | [RFC] ipa: Adjust references to identify read-only globals | expand |
> Hi, > > gcc/ChangeLog: > > 2021-06-29 Martin Jambor <mjambor@suse.cz> > > * cgraph.h (ipa_replace_map): New field force_load_ref. > * ipa-prop.h (ipa_param_descriptor): Reduce precision of move_cost, > aded new flag load_dereferenced, adjusted comments. > (ipa_get_param_dereferenced): New function. > (ipa_set_param_dereferenced): Likewise. > * cgraphclones.c (cgraph_node::create_virtual_clone): Follow it. > * ipa-cp.c: Include gimple.h. > (ipcp_discover_new_direct_edges): Take into account dereferenced flag. > (get_replacement_map): New parameter force_load_ref, set the > appropriate flag in ipa_replace_map if set. > (struct symbol_and_index_together): New type. > (adjust_references_in_act_callers): New function. > (adjust_references_in_caller): Likewise. > (create_specialized_node): When appropriate, call > adjust_references_in_caller and force only load references. > * ipa-prop.c (load_from_dereferenced_name): New function. > (ipa_analyze_controlled_uses): Also detect loads from a > dereference, harden testing of call statements. > (ipa_write_node_info): Stream the dereferenced flag. > (ipa_read_node_info): Likewise. > (ipa_set_jf_constant): Also create refdesc when jump function > references a variable. > (cgraph_node_for_jfunc): Rename to symtab_node_for_jfunc, work > also on references of variables and return a symtab_node. Adjust > all callers. > (propagate_controlled_uses): Also remove references to VAR_DECLs. > > gcc/testsuite/ChangeLog: > > 2021-06-29 Martin Jambor <mjambor@suse.cz> > > * gcc.dg/ipa/remref-3.c: New test. > * gcc.dg/ipa/remref-4.c: Likewise. > * gcc.dg/ipa/remref-5.c: Likewise. > * gcc.dg/ipa/remref-6.c: Likewise. > --- > gcc/cgraph.h | 3 + > gcc/cgraphclones.c | 10 +- > gcc/ipa-cp.c | 146 ++++++++++++++++++++++-- > gcc/ipa-prop.c | 166 ++++++++++++++++++++++------ > gcc/ipa-prop.h | 27 ++++- > gcc/testsuite/gcc.dg/ipa/remref-3.c | 23 ++++ > gcc/testsuite/gcc.dg/ipa/remref-4.c | 31 ++++++ > gcc/testsuite/gcc.dg/ipa/remref-5.c | 38 +++++++ > gcc/testsuite/gcc.dg/ipa/remref-6.c | 24 ++++ > 9 files changed, 419 insertions(+), 49 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-3.c > create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-4.c > create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-5.c > create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-6.c > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index 9f4338fdf87..0fc20cd4517 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -700,6 +700,9 @@ struct GTY(()) ipa_replace_map > tree new_tree; > /* Parameter number to replace, when old_tree is NULL. */ > int parm_num; > + /* Set if the newly added reference should not be an address one, but a load > + one from the operand of the ADDR_EXPR in NEW_TREE. */ So this is in case where parameter p is used only as *p? I think the comment should be expanded to explain the situation or in a year I will not know why we need such a flag :) > @@ -4320,7 +4322,8 @@ gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest, > Return it or NULL if for some reason it cannot be created. */ > > static struct ipa_replace_map * > -get_replacement_map (class ipa_node_params *info, tree value, int parm_num) > +get_replacement_map (class ipa_node_params *info, tree value, int parm_num, > + bool force_load_ref) You want to comment the parameter here too.. > +/* At INDEX of a function being called by CS there is an ADDR_EXPR of a > + variable which is only dereferenced and which is represented by SYMBOL. See > + if we can remove ADDR reference in callers assosiated witht the call. */ > + > +static void > +adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index) > +{ > + ipa_edge_args *args = ipa_edge_args_sum->get (cs); > + ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index); > + if (jfunc->type == IPA_JF_CONST) > + { > + ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt, > + cs->lto_stmt_uid); > + if (!to_del) > + return; > + to_del->remove_reference (); > + if (dump_file) > + fprintf (dump_file, " Removed a reference from %s to %s.\n", > + cs->caller->dump_name (), symbol->dump_name ()); > + return; > + } > + > + if (jfunc->type != IPA_JF_PASS_THROUGH > + || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) > + return; > + > + int fidx = ipa_get_jf_pass_through_formal_id (jfunc); > + cgraph_node *caller = cs->caller; > + ipa_node_params *caller_info = ipa_node_params_sum->get (caller); > + /* TODO: This consistency check may be too big and not really > + that useful. Consider removing it. */ > + tree cst; > + if (caller_info->ipcp_orig_node) > + cst = caller_info->known_csts[fidx]; > + else > + { > + ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx); > + gcc_assert (lat->is_single_const ()); > + cst = lat->values->value; > + } > + gcc_assert (TREE_CODE (cst) == ADDR_EXPR > + && symtab_node::get (TREE_OPERAND (cst, 0)) == symbol); > + > + int cuses = ipa_get_controlled_uses (caller_info, fidx); > + if (cuses == IPA_UNDESCRIBED_USE) > + return; > + gcc_assert (cuses > 0); > + cuses--; > + ipa_set_controlled_uses (caller_info, fidx, cuses); > + if (cuses) > + return; > + > + if (caller_info->ipcp_orig_node) > + { > + /* Cloning machinery has created a reference here, we need to either > + remove it or change it to a read one. */ > + ipa_ref *to_del = caller->find_reference (symbol, NULL, 0); > + if (to_del && to_del->use == IPA_REF_ADDR) > + { > + to_del->remove_reference (); > + if (dump_file) > + fprintf (dump_file, " Removed a reference from %s to %s.\n", > + cs->caller->dump_name (), symbol->dump_name ()); > + if (ipa_get_param_load_dereferenced (caller_info, fidx)) > + { > + caller->create_reference (symbol, IPA_REF_LOAD, NULL); > + if (dump_file) > + fprintf (dump_file, > + " ...and replaced it with LOAD one.\n"); > + } > + } > + } > + > + symbol_and_index_together pack; > + pack.symbol = symbol; > + pack.index = fidx; > + caller->call_for_symbol_thunks_and_aliases (adjust_references_in_act_callers, > + &pack, true); > +} > + > + > /* Return true if we would like to remove a parameter from NODE when cloning it > with KNOWN_CSTS scalar constants. */ > > @@ -4595,15 +4708,28 @@ create_specialized_node (struct cgraph_node *node, > for (i = 0; i < count; i++) > { > tree t = known_csts[i]; > - if (t) > - { > - struct ipa_replace_map *replace_map; > + if (!t) > + continue; > > - gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); > - replace_map = get_replacement_map (info, t, i); > - if (replace_map) > - vec_safe_push (replace_trees, replace_map); > + gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); > + > + bool load_ref = false; > + symtab_node *ref_symbol; > + if (TREE_CODE (t) == ADDR_EXPR > + && TREE_CODE (TREE_OPERAND (t, 0)) == VAR_DECL Can't you see more complicate expressions like &var[10]? So you want to find the base term here I think. > +/* Return true EXPR is a load from a dereference of SSA_NAME NAME. */ > + > +static bool > +load_from_dereferenced_name (tree expr, tree name) > +{ > + tree base = get_base_address (expr); > + return (TREE_CODE (base) == MEM_REF > + && TREE_OPERAND (base, 0) == name); > +} Similarly here. > /* The parameter is used. */ > unsigned used : 1; > unsigned used_by_ipa_predicates : 1; > unsigned used_by_indirect_call : 1; > unsigned used_by_polymorphic_call : 1; > + /* Set to true when in addition to being used in call statements, the > + parameterr has also been used for loads (but not for wtires, does not > + escape, etc.). */ > + unsigned load_dereferenced : 1; Some typos here :) (and also I think comment should explain why we need the flag) The patch looks reasonable to me, Honza
> -----Original Message----- > From: Gcc-patches <gcc-patches- > bounces+jiangning=os.amperecomputing.com@gcc.gnu.org> On Behalf Of > Martin Jambor > Sent: Wednesday, June 30, 2021 4:19 AM > To: GCC Patches <gcc-patches@gcc.gnu.org> > Cc: Jan Hubicka <hubicka@ucw.cz> > Subject: [RFC] ipa: Adjust references to identify read-only globals > > Hi, > > this patch has been motivated by SPEC 2017's 544.nab_r in which there is a > static variable which is never written to and so zero throughout the run-time > of the benchmark. However, it is passed by reference to a function in which > it is read and (after some multiplications) passed into __builtin_exp which in > turn unnecessarily consumes almost 10% of the total benchmark run-time. I do see ~8.5% runtime reduction on aarch64. > The situation is illustrated by the added testcase remref-3.c. > > The patch adds a flag to ipa-prop descriptor of each parameter to mark such > parameters. IPA-CP and inling then take the effort to remove IPA_REF_ADDR > references in the caller and only add IPA_REF_LOAD reference to the > clone/overall inlined function. This is sufficient for subsequent symbol table > analysis code to identify the read-only variable as such and optimize the code. > > I plan to compile a number of packages with the patch to test it some more > and get a bit better idea of its impact. But it has passed bootstrap, > LTObootstrap and testing on x86_64-linux and i686-linux and so unless I find > any problem, I would like to commit it at some point next month without any > major changes, so I'd be grateful for any feedback even now. I see 3 cases in SPEC2017 failed to compile on aarch64, i.e. 521.wrf_r, 527.cam4_r, 554.roms_r. For example, pre_step3d.fppized.f90:1260:35: internal compiler error: Segmentation fault 1260 | CALL wclock_on (ng, iNLM, 22) | ^ 0x1645c6b internal_error(char const*, ...) ???:0 0xe1f4f4 place_block_symbol(rtx_def*) ???:0 0x84ab33 use_anchored_address(rtx_def*) ???:0 0x868203 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) ???:0 0x868793 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) ???:0 0x75b593 expand_call(tree_node*, rtx_def*, int) ???:0 0x86a09f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) ???:0 Please submit a full bug report Thanks, -Jiangning
On Tue, Jul 20, 2021 at 10:54 AM JiangNing OS via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > > -----Original Message----- > > From: Gcc-patches <gcc-patches- > > bounces+jiangning=os.amperecomputing.com@gcc.gnu.org> On Behalf Of > > Martin Jambor > > Sent: Wednesday, June 30, 2021 4:19 AM > > To: GCC Patches <gcc-patches@gcc.gnu.org> > > Cc: Jan Hubicka <hubicka@ucw.cz> > > Subject: [RFC] ipa: Adjust references to identify read-only globals > > > > Hi, > > > > this patch has been motivated by SPEC 2017's 544.nab_r in which there is a > > static variable which is never written to and so zero throughout the run-time > > of the benchmark. However, it is passed by reference to a function in which > > it is read and (after some multiplications) passed into __builtin_exp which in > > turn unnecessarily consumes almost 10% of the total benchmark run-time. > > I do see ~8.5% runtime reduction on aarch64. > > > The situation is illustrated by the added testcase remref-3.c. > > > > The patch adds a flag to ipa-prop descriptor of each parameter to mark such > > parameters. IPA-CP and inling then take the effort to remove IPA_REF_ADDR > > references in the caller and only add IPA_REF_LOAD reference to the > > clone/overall inlined function. This is sufficient for subsequent symbol table > > analysis code to identify the read-only variable as such and optimize the code. > > > > I plan to compile a number of packages with the patch to test it some more > > and get a bit better idea of its impact. But it has passed bootstrap, > > LTObootstrap and testing on x86_64-linux and i686-linux and so unless I find > > any problem, I would like to commit it at some point next month without any > > major changes, so I'd be grateful for any feedback even now. > > I see 3 cases in SPEC2017 failed to compile on aarch64, i.e. 521.wrf_r, 527.cam4_r, 554.roms_r. For example, > > pre_step3d.fppized.f90:1260:35: internal compiler error: Segmentation fault > 1260 | CALL wclock_on (ng, iNLM, 22) > | ^ > 0x1645c6b internal_error(char const*, ...) > ???:0 > 0xe1f4f4 place_block_symbol(rtx_def*) > ???:0 > 0x84ab33 use_anchored_address(rtx_def*) > ???:0 > 0x868203 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) > ???:0 > 0x868793 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) > ???:0 > 0x75b593 expand_call(tree_node*, rtx_def*, int) > ???:0 > 0x86a09f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) > ???:0 > Please submit a full bug report Please file a bugreport and provide a (possibly reduced) testcase. Thanks, Richard. > Thanks, > -Jiangning
Hi, On Tue, Jul 20 2021, Richard Biener wrote: > On Tue, Jul 20, 2021 at 10:54 AM JiangNing OS via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: >> >> > -----Original Message----- >> > From: Gcc-patches <gcc-patches- >> > bounces+jiangning=os.amperecomputing.com@gcc.gnu.org> On Behalf Of >> > Martin Jambor >> > Sent: Wednesday, June 30, 2021 4:19 AM >> > To: GCC Patches <gcc-patches@gcc.gnu.org> >> > Cc: Jan Hubicka <hubicka@ucw.cz> >> > Subject: [RFC] ipa: Adjust references to identify read-only globals >> > >> > Hi, >> > >> > this patch has been motivated by SPEC 2017's 544.nab_r in which there is a >> > static variable which is never written to and so zero throughout the run-time >> > of the benchmark. However, it is passed by reference to a function in which >> > it is read and (after some multiplications) passed into __builtin_exp which in >> > turn unnecessarily consumes almost 10% of the total benchmark run-time. >> >> I do see ~8.5% runtime reduction on aarch64. >> >> > The situation is illustrated by the added testcase remref-3.c. >> > >> > The patch adds a flag to ipa-prop descriptor of each parameter to mark such >> > parameters. IPA-CP and inling then take the effort to remove IPA_REF_ADDR >> > references in the caller and only add IPA_REF_LOAD reference to the >> > clone/overall inlined function. This is sufficient for subsequent symbol table >> > analysis code to identify the read-only variable as such and optimize the code. >> > >> > I plan to compile a number of packages with the patch to test it some more >> > and get a bit better idea of its impact. But it has passed bootstrap, >> > LTObootstrap and testing on x86_64-linux and i686-linux and so unless I find >> > any problem, I would like to commit it at some point next month without any >> > major changes, so I'd be grateful for any feedback even now. >> >> I see 3 cases in SPEC2017 failed to compile on aarch64, i.e. 521.wrf_r, 527.cam4_r, 554.roms_r. For example, >> >> pre_step3d.fppized.f90:1260:35: internal compiler error: Segmentation fault >> 1260 | CALL wclock_on (ng, iNLM, 22) >> | ^ >> 0x1645c6b internal_error(char const*, ...) >> ???:0 >> 0xe1f4f4 place_block_symbol(rtx_def*) >> ???:0 >> 0x84ab33 use_anchored_address(rtx_def*) >> ???:0 >> 0x868203 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) >> ???:0 >> 0x868793 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) >> ???:0 >> 0x75b593 expand_call(tree_node*, rtx_def*, int) >> ???:0 >> 0x86a09f expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) >> ???:0 >> Please submit a full bug report > > Please file a bugreport and provide a (possibly reduced) testcase. > The patch is not yet committed, so I don't think a bug-report (in bugzilla) is in order. At least after I fixed a bug pointed out in Honza's review, I cannot replicate any ICE building any of 521.wrf_r, 527.cam4_r, 554.roms_r on x86_64, at least without LTO. But with LTO, I get an undefined symbol link error building 527.cam4_r which is of course certainly a bug in the patch. I will investigate and hopefully fix it and re-post the patch but then I would appreciate if you checked it on aarch64 for me. Thanks, Martin
Hi, On Tue, Jul 20 2021, Martin Jambor wrote: > On Tue, Jul 20 2021, Richard Biener wrote: >> On Tue, Jul 20, 2021 at 10:54 AM JiangNing OS via Gcc-patches >> <gcc-patches@gcc.gnu.org> wrote: [...] >>> > this patch has been motivated by SPEC 2017's 544.nab_r in which there is a >>> > static variable which is never written to and so zero throughout the run-time >>> > of the benchmark. However, it is passed by reference to a function in which >>> > it is read and (after some multiplications) passed into __builtin_exp which in >>> > turn unnecessarily consumes almost 10% of the total benchmark run-time. >>> >>> I do see ~8.5% runtime reduction on aarch64. >>> >>> > The situation is illustrated by the added testcase remref-3.c. >>> > >>> > The patch adds a flag to ipa-prop descriptor of each parameter to mark such >>> > parameters. IPA-CP and inling then take the effort to remove IPA_REF_ADDR >>> > references in the caller and only add IPA_REF_LOAD reference to the >>> > clone/overall inlined function. This is sufficient for subsequent symbol table >>> > analysis code to identify the read-only variable as such and optimize the code. >>> > >>> > I plan to compile a number of packages with the patch to test it some more >>> > and get a bit better idea of its impact. But it has passed bootstrap, >>> > LTObootstrap and testing on x86_64-linux and i686-linux and so unless I find >>> > any problem, I would like to commit it at some point next month without any >>> > major changes, so I'd be grateful for any feedback even now. >>> >>> I see 3 cases in SPEC2017 failed to compile on aarch64, i.e. 521.wrf_r, 527.cam4_r, 554.roms_r. For example, >>> [...] > > At least after I fixed a bug pointed out in Honza's review, I cannot > replicate any ICE building any of 521.wrf_r, 527.cam4_r, 554.roms_r on > x86_64, at least without LTO. But with LTO, I get an undefined symbol > link error building 527.cam4_r which is of course certainly a bug in the > patch. I will investigate and hopefully fix it and re-post the patch > but then I would appreciate if you checked it on aarch64 for me. > I have fixed that bug and another one in an assert condition and the result is below. I have re-tested the updated patch including bootstrap and testing, LTO-bootstrap and LTO-profiledbootstrap on x86_64 and bootstrap and testing on aarch64. I have also used it to build SPEC2017 on x86_64 with and without LTO and on aarch64 without LTO (LTO build is underway, but my arch64 machine is an old AMD Seattle one) and I have also built a few random packages with it. Since it has been pre-approved by Honza, I would like to commit it to master soon. Nevertheless, Jiangning, I am OK to wait a day or so if you can give it another test on your setup. Thanks, Martin ipa: Adjust references to identify read-only globals this patch has been motivated by SPEC 2017's 544.nab_r in which there is a static variable which is never written to and so zero throughout the run-time of the benchmark. However, it is passed by reference to a function in which it is read and (after some multiplications) passed into __builtin_exp which in turn unnecessarily consumes almost 10% of the total benchmark run-time. The situation is illustrated by the added testcase remref-3.c. The patch adds a flag to ipa-prop descriptor of each parameter to mark such parameters. IPA-CP and inling then take the effort to remove IPA_REF_ADDR references in the caller and only add IPA_REF_LOAD reference to the clone/overall inlined function. This is sufficient for subsequent symbol table analysis code to identify the read-only variable as such and optimize the code. There are two changes from the RFC version posted to the list earlier. First, three missing calls to get_base_address were added (there was another one in an assert). Second, references are not stripped off the callers if the cloned function cannot change the signature. The second change reveals a real shortcoming stemming from the fact we cannot adjust function prototypes with fnspecs. But that is a more general problem. gcc/ChangeLog: 2021-07-20 Martin Jambor <mjambor@suse.cz> * cgraph.h (ipa_replace_map): New field force_load_ref. * ipa-prop.h (ipa_param_descriptor): Reduce precision of move_cost, aded new flag load_dereferenced, adjusted comments. (ipa_get_param_dereferenced): New function. (ipa_set_param_dereferenced): Likewise. * cgraphclones.c (cgraph_node::create_virtual_clone): Follow it. * ipa-cp.c: Include gimple.h. (ipcp_discover_new_direct_edges): Take into account dereferenced flag. (get_replacement_map): New parameter force_load_ref, set the appropriate flag in ipa_replace_map if set. (struct symbol_and_index_together): New type. (adjust_refs_in_act_callers): New function. (adjust_references_in_caller): Likewise. (create_specialized_node): When appropriate, call adjust_references_in_caller and force only load references. * ipa-prop.c (load_from_dereferenced_name): New function. (ipa_analyze_controlled_uses): Also detect loads from a dereference, harden testing of call statements. (ipa_write_node_info): Stream the dereferenced flag. (ipa_read_node_info): Likewise. (ipa_set_jf_constant): Also create refdesc when jump function references a variable. (cgraph_node_for_jfunc): Rename to symtab_node_for_jfunc, work also on references of variables and return a symtab_node. Adjust all callers. (propagate_controlled_uses): Also remove references to VAR_DECLs. gcc/testsuite/ChangeLog: 2021-06-29 Martin Jambor <mjambor@suse.cz> * gcc.dg/ipa/remref-3.c: New test. * gcc.dg/ipa/remref-4.c: Likewise. * gcc.dg/ipa/remref-5.c: Likewise. * gcc.dg/ipa/remref-6.c: Likewise. --- gcc/cgraph.h | 4 + gcc/cgraphclones.c | 10 +- gcc/ipa-cp.c | 156 ++++++++++++++++++++++++-- gcc/ipa-prop.c | 166 ++++++++++++++++++++++------ gcc/ipa-prop.h | 29 ++++- gcc/testsuite/gcc.dg/ipa/remref-3.c | 23 ++++ gcc/testsuite/gcc.dg/ipa/remref-4.c | 31 ++++++ gcc/testsuite/gcc.dg/ipa/remref-5.c | 38 +++++++ gcc/testsuite/gcc.dg/ipa/remref-6.c | 24 ++++ 9 files changed, 431 insertions(+), 50 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-3.c create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-4.c create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-5.c create mode 100644 gcc/testsuite/gcc.dg/ipa/remref-6.c diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 9f4338fdf87..eb32962e240 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -700,6 +700,10 @@ struct GTY(()) ipa_replace_map tree new_tree; /* Parameter number to replace, when old_tree is NULL. */ int parm_num; + /* Set if the newly added reference should not be an address one, but a load + one from the operand of the ADDR_EXPR in NEW_TREE. This is for cases when + the corresponding parameter p is used only as *p. */ + unsigned force_load_ref : 1; }; enum cgraph_simd_clone_arg_type diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 7e463acab91..086e13a8371 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -633,7 +633,15 @@ cgraph_node::create_virtual_clone (vec<cgraph_edge *> redirect_callers, || in_lto_p) new_node->unique_name = true; FOR_EACH_VEC_SAFE_ELT (tree_map, i, map) - new_node->maybe_create_reference (map->new_tree, NULL); + { + tree repl = map->new_tree; + if (map->force_load_ref) + { + gcc_assert (TREE_CODE (repl) == ADDR_EXPR); + repl = get_base_address (TREE_OPERAND (repl, 0)); + } + new_node->maybe_create_reference (repl, NULL); + } if (ipa_transforms_to_apply.exists ()) new_node->ipa_transforms_to_apply diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 57c18af2bab..c81cbd0e2b6 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -106,6 +106,7 @@ along with GCC; see the file COPYING3. If not see #include "backend.h" #include "tree.h" #include "gimple-expr.h" +#include "gimple.h" #include "predict.h" #include "alloc-pool.h" #include "tree-pass.h" @@ -4008,7 +4009,8 @@ ipcp_discover_new_direct_edges (struct cgraph_node *node, { ipa_node_params *info = ipa_node_params_sum->get (node); int c = ipa_get_controlled_uses (info, param_index); - if (c != IPA_UNDESCRIBED_USE) + if (c != IPA_UNDESCRIBED_USE + && !ipa_get_param_load_dereferenced (info, param_index)) { struct ipa_ref *to_del; @@ -4317,10 +4319,14 @@ gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest, } /* Construct a replacement map for a know VALUE for a formal parameter PARAM. - Return it or NULL if for some reason it cannot be created. */ + Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF + should be set to true when the reference created for the constant should be + a load one and not an address one because the corresponding parameter p is + only used as *p. */ static struct ipa_replace_map * -get_replacement_map (class ipa_node_params *info, tree value, int parm_num) +get_replacement_map (class ipa_node_params *info, tree value, int parm_num, + bool force_load_ref) { struct ipa_replace_map *replace_map; @@ -4332,10 +4338,15 @@ get_replacement_map (class ipa_node_params *info, tree value, int parm_num) fprintf (dump_file, " with const "); print_generic_expr (dump_file, value); - fprintf (dump_file, "\n"); + + if (force_load_ref) + fprintf (dump_file, " - forcing load reference\n"); + else + fprintf (dump_file, "\n"); } replace_map->parm_num = parm_num; replace_map->new_tree = value; + replace_map->force_load_ref = force_load_ref; return replace_map; } @@ -4488,6 +4499,113 @@ update_specialized_profile (struct cgraph_node *new_node, dump_profile_updates (orig_node, new_node); } +static void adjust_references_in_caller (cgraph_edge *cs, + symtab_node *symbol, int index); + +/* Simple structure to pass a symbol and index (with same meaning as parameters + of adjust_references_in_caller) through a void* parameter of a + call_for_symbol_thunks_and_aliases callback. */ +struct symbol_and_index_together +{ + symtab_node *symbol; + int index; +}; + +/* Worker callback of call_for_symbol_thunks_and_aliases to recursively call + adjust_references_in_caller on edges up in the call-graph, if necessary. */ +static bool +adjust_refs_in_act_callers (struct cgraph_node *node, void *data) +{ + symbol_and_index_together *pack = (symbol_and_index_together *) data; + for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) + if (!cs->caller->thunk) + adjust_references_in_caller (cs, pack->symbol, pack->index); + return false; +} + +/* At INDEX of a function being called by CS there is an ADDR_EXPR of a + variable which is only dereferenced and which is represented by SYMBOL. See + if we can remove ADDR reference in callers assosiated witht the call. */ + +static void +adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index) +{ + ipa_edge_args *args = ipa_edge_args_sum->get (cs); + ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index); + if (jfunc->type == IPA_JF_CONST) + { + ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt, + cs->lto_stmt_uid); + if (!to_del) + return; + to_del->remove_reference (); + if (dump_file) + fprintf (dump_file, " Removed a reference from %s to %s.\n", + cs->caller->dump_name (), symbol->dump_name ()); + return; + } + + if (jfunc->type != IPA_JF_PASS_THROUGH + || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) + return; + + int fidx = ipa_get_jf_pass_through_formal_id (jfunc); + cgraph_node *caller = cs->caller; + ipa_node_params *caller_info = ipa_node_params_sum->get (caller); + /* TODO: This consistency check may be too big and not really + that useful. Consider removing it. */ + tree cst; + if (caller_info->ipcp_orig_node) + cst = caller_info->known_csts[fidx]; + else + { + ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx); + gcc_assert (lat->is_single_const ()); + cst = lat->values->value; + } + gcc_assert (TREE_CODE (cst) == ADDR_EXPR + && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0))) + == symbol)); + + int cuses = ipa_get_controlled_uses (caller_info, fidx); + if (cuses == IPA_UNDESCRIBED_USE) + return; + gcc_assert (cuses > 0); + cuses--; + ipa_set_controlled_uses (caller_info, fidx, cuses); + if (cuses) + return; + + if (caller_info->ipcp_orig_node) + { + /* Cloning machinery has created a reference here, we need to either + remove it or change it to a read one. */ + ipa_ref *to_del = caller->find_reference (symbol, NULL, 0); + if (to_del && to_del->use == IPA_REF_ADDR) + { + to_del->remove_reference (); + if (dump_file) + fprintf (dump_file, " Removed a reference from %s to %s.\n", + cs->caller->dump_name (), symbol->dump_name ()); + if (ipa_get_param_load_dereferenced (caller_info, fidx)) + { + caller->create_reference (symbol, IPA_REF_LOAD, NULL); + if (dump_file) + fprintf (dump_file, + " ...and replaced it with LOAD one.\n"); + } + } + } + + symbol_and_index_together pack; + pack.symbol = symbol; + pack.index = fidx; + if (caller->can_change_signature) + caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers, + &pack, true); +} + + /* Return true if we would like to remove a parameter from NODE when cloning it with KNOWN_CSTS scalar constants. */ @@ -4595,15 +4713,31 @@ create_specialized_node (struct cgraph_node *node, for (i = 0; i < count; i++) { tree t = known_csts[i]; - if (t) - { - struct ipa_replace_map *replace_map; + if (!t) + continue; - gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); - replace_map = get_replacement_map (info, t, i); - if (replace_map) - vec_safe_push (replace_trees, replace_map); + gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); + + bool load_ref = false; + symtab_node *ref_symbol; + if (TREE_CODE (t) == ADDR_EXPR) + { + tree base = get_base_address (TREE_OPERAND (t, 0)); + if (TREE_CODE (base) == VAR_DECL + && ipa_get_controlled_uses (info, i) == 0 + && ipa_get_param_load_dereferenced (info, i) + && (ref_symbol = symtab_node::get (base))) + { + load_ref = true; + if (node->can_change_signature) + for (cgraph_edge *caller : callers) + adjust_references_in_caller (caller, ref_symbol, i); + } } + + ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref); + if (replace_map) + vec_safe_push (replace_trees, replace_map); } auto_vec<cgraph_edge *, 2> self_recursive_calls; for (i = callers.length () - 1; i >= 0; i--) diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index f74d2e17b69..a9b1b3c8aef 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -544,7 +544,9 @@ ipa_set_jf_constant (struct ipa_jump_func *jfunc, tree constant, jfunc->value.constant.value = unshare_expr_without_location (constant); if (TREE_CODE (constant) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (constant, 0)) == FUNCTION_DECL) + && (TREE_CODE (TREE_OPERAND (constant, 0)) == FUNCTION_DECL + || (TREE_CODE (TREE_OPERAND (constant, 0)) == VAR_DECL + && TREE_STATIC (TREE_OPERAND (constant, 0))))) { struct ipa_cst_ref_desc *rdesc; @@ -2876,6 +2878,16 @@ ipa_analyze_params_uses_in_bb (struct ipa_func_body_info *fbi, basic_block bb) visit_ref_for_mod_analysis); } +/* Return true EXPR is a load from a dereference of SSA_NAME NAME. */ + +static bool +load_from_dereferenced_name (tree expr, tree name) +{ + tree base = get_base_address (expr); + return (TREE_CODE (base) == MEM_REF + && TREE_OPERAND (base, 0) == name); +} + /* Calculate controlled uses of parameters of NODE. */ static void @@ -2886,7 +2898,8 @@ ipa_analyze_controlled_uses (struct cgraph_node *node) for (int i = 0; i < ipa_get_param_count (info); i++) { tree parm = ipa_get_param (info, i); - int controlled_uses = 0; + int call_uses = 0; + bool load_dereferenced = false; /* For SSA regs see if parameter is used. For non-SSA we compute the flag during modification analysis. */ @@ -2897,27 +2910,77 @@ ipa_analyze_controlled_uses (struct cgraph_node *node) if (ddef && !has_zero_uses (ddef)) { imm_use_iterator imm_iter; - use_operand_p use_p; + gimple *stmt; ipa_set_param_used (info, i, true); - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ddef) - if (!is_gimple_call (USE_STMT (use_p))) - { - if (!is_gimple_debug (USE_STMT (use_p))) - { - controlled_uses = IPA_UNDESCRIBED_USE; - break; - } - } - else - controlled_uses++; + FOR_EACH_IMM_USE_STMT (stmt, imm_iter, ddef) + { + if (is_gimple_debug (stmt)) + continue; + + int all_stmt_uses = 0; + use_operand_p use_p; + FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) + all_stmt_uses++; + + if (is_gimple_call (stmt)) + { + if (gimple_call_internal_p (stmt)) + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + int recognized_stmt_uses; + if (gimple_call_fn (stmt) == ddef) + recognized_stmt_uses = 1; + else + recognized_stmt_uses = 0; + unsigned arg_count = gimple_call_num_args (stmt); + for (unsigned i = 0; i < arg_count; i++) + { + tree arg = gimple_call_arg (stmt, i); + if (arg == ddef) + recognized_stmt_uses++; + else if (load_from_dereferenced_name (arg, ddef)) + { + load_dereferenced = true; + recognized_stmt_uses++; + } + } + + if (recognized_stmt_uses != all_stmt_uses) + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + if (call_uses >= 0) + call_uses += all_stmt_uses; + } + else if (gimple_assign_single_p (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); + if (all_stmt_uses != 1 + || !load_from_dereferenced_name (rhs, ddef)) + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + load_dereferenced = true; + } + else + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + } } else - controlled_uses = 0; + call_uses = 0; } else - controlled_uses = IPA_UNDESCRIBED_USE; - ipa_set_controlled_uses (info, i, controlled_uses); + call_uses = IPA_UNDESCRIBED_USE; + ipa_set_controlled_uses (info, i, call_uses); + ipa_set_param_load_dereferenced (info, i, load_dereferenced); } } @@ -3640,16 +3703,17 @@ jfunc_rdesc_usable (struct ipa_jump_func *jfunc) declaration, return the associated call graph node. Otherwise return NULL. */ -static cgraph_node * -cgraph_node_for_jfunc (struct ipa_jump_func *jfunc) +static symtab_node * +symtab_node_for_jfunc (struct ipa_jump_func *jfunc) { gcc_checking_assert (jfunc->type == IPA_JF_CONST); tree cst = ipa_get_jf_constant (jfunc); if (TREE_CODE (cst) != ADDR_EXPR - || TREE_CODE (TREE_OPERAND (cst, 0)) != FUNCTION_DECL) + || (TREE_CODE (TREE_OPERAND (cst, 0)) != FUNCTION_DECL + && TREE_CODE (TREE_OPERAND (cst, 0)) != VAR_DECL)) return NULL; - return cgraph_node::get (TREE_OPERAND (cst, 0)); + return symtab_node::get (TREE_OPERAND (cst, 0)); } @@ -3666,7 +3730,7 @@ try_decrement_rdesc_refcount (struct ipa_jump_func *jfunc) && (rdesc = jfunc_rdesc_usable (jfunc)) && --rdesc->refcount == 0) { - symtab_node *symbol = cgraph_node_for_jfunc (jfunc); + symtab_node *symbol = symtab_node_for_jfunc (jfunc); if (!symbol) return false; @@ -3721,8 +3785,8 @@ try_make_edge_direct_simple_call (struct cgraph_edge *ie, gcc_checking_assert (cs->callee && (cs != ie || jfunc->type != IPA_JF_CONST - || !cgraph_node_for_jfunc (jfunc) - || cs->callee == cgraph_node_for_jfunc (jfunc))); + || !symtab_node_for_jfunc (jfunc) + || cs->callee == symtab_node_for_jfunc (jfunc))); ok = try_decrement_rdesc_refcount (jfunc); gcc_checking_assert (ok); } @@ -4085,7 +4149,15 @@ propagate_controlled_uses (struct cgraph_edge *cs) == NOP_EXPR || c == IPA_UNDESCRIBED_USE); c = combine_controlled_uses_counters (c, d); ipa_set_controlled_uses (new_root_info, src_idx, c); - if (c == 0 && new_root_info->ipcp_orig_node) + bool lderef = true; + if (c != IPA_UNDESCRIBED_USE) + { + lderef = (ipa_get_param_load_dereferenced (new_root_info, src_idx) + || ipa_get_param_load_dereferenced (old_root_info, i)); + ipa_set_param_load_dereferenced (new_root_info, src_idx, lderef); + } + + if (c == 0 && !lderef && new_root_info->ipcp_orig_node) { struct cgraph_node *n; struct ipa_ref *ref; @@ -4114,17 +4186,27 @@ propagate_controlled_uses (struct cgraph_edge *cs) if (rdesc->refcount == 0) { tree cst = ipa_get_jf_constant (jf); - struct cgraph_node *n; gcc_checking_assert (TREE_CODE (cst) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (cst, 0)) - == FUNCTION_DECL); - n = cgraph_node::get (TREE_OPERAND (cst, 0)); + && ((TREE_CODE (TREE_OPERAND (cst, 0)) + == FUNCTION_DECL) + || (TREE_CODE (TREE_OPERAND (cst, 0)) + == VAR_DECL))); + + symtab_node *n = symtab_node::get (TREE_OPERAND (cst, 0)); if (n) { struct cgraph_node *clone; - bool ok; - ok = remove_described_reference (n, rdesc); - gcc_checking_assert (ok); + bool removed = remove_described_reference (n, rdesc); + /* The reference might have been removed by IPA-CP. */ + if (removed + && ipa_get_param_load_dereferenced (old_root_info, i)) + { + new_root->create_reference (n, IPA_REF_LOAD, NULL); + if (dump_file) + fprintf (dump_file, "ipa-prop: ...replaced it with " + " LOAD one from %s to %s.\n", + new_root->dump_name (), n->dump_name ()); + } clone = cs->caller; while (clone->inlined_to @@ -4347,7 +4429,7 @@ ipa_edge_args_sum_t::duplicate (cgraph_edge *src, cgraph_edge *dst, else if (src->caller == dst->caller) { struct ipa_ref *ref; - symtab_node *n = cgraph_node_for_jfunc (src_jf); + symtab_node *n = symtab_node_for_jfunc (src_jf); gcc_checking_assert (n); ref = src->caller->find_reference (n, src->call_stmt, src->lto_stmt_uid); @@ -4574,7 +4656,9 @@ ipa_print_node_params (FILE *f, struct cgraph_node *node) if (c == IPA_UNDESCRIBED_USE) fprintf (f, " undescribed_use"); else - fprintf (f, " controlled_uses=%i", c); + fprintf (f, " controlled_uses=%i %s", c, + ipa_get_param_load_dereferenced (info, i) + ? "(load_dereferenced)" : ""); fprintf (f, "\n"); } } @@ -4969,7 +5053,13 @@ ipa_write_node_info (struct output_block *ob, struct cgraph_node *node) gcc_assert (!info->node_enqueued); gcc_assert (!info->ipcp_orig_node); for (j = 0; j < ipa_get_param_count (info); j++) - bp_pack_value (&bp, ipa_is_param_used (info, j), 1); + { + /* TODO: We could just not stream the bit in the undescribed case. */ + bool d = (ipa_get_controlled_uses (info, j) != IPA_UNDESCRIBED_USE) + ? ipa_get_param_load_dereferenced (info, j) : true; + bp_pack_value (&bp, d, 1); + bp_pack_value (&bp, ipa_is_param_used (info, j), 1); + } streamer_write_bitpack (&bp); for (j = 0; j < ipa_get_param_count (info); j++) { @@ -5095,10 +5185,14 @@ ipa_read_node_info (class lto_input_block *ib, struct cgraph_node *node, bp = streamer_read_bitpack (ib); for (k = 0; k < param_count; k++) { + bool load_dereferenced = bp_unpack_value (&bp, 1); bool used = bp_unpack_value (&bp, 1); if (prevails) - ipa_set_param_used (info, k, used); + { + ipa_set_param_load_dereferenced (info, k, load_dereferenced); + ipa_set_param_used (info, k, used); + } } for (k = 0; k < param_count; k++) { diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 3d28a6e8640..de35e3969f4 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -550,14 +550,21 @@ struct GTY(()) ipa_param_descriptor tree decl_or_type; /* If all uses of the parameter are described by ipa-prop structures, this says how many there are. If any use could not be described by means of - ipa-prop structures, this is IPA_UNDESCRIBED_USE. */ + ipa-prop structures (which include flag dereferenced below), this is + IPA_UNDESCRIBED_USE. */ int controlled_uses; - unsigned int move_cost : 28; + unsigned int move_cost : 27; /* The parameter is used. */ unsigned used : 1; unsigned used_by_ipa_predicates : 1; unsigned used_by_indirect_call : 1; unsigned used_by_polymorphic_call : 1; + /* Set to true when in addition to being used in call statements, the + parameter has also been used for loads (but not for writes, does not + escape, etc.). This allows us to identify parameters p which are only + used as *p, and so when we propagate a constant to them, we can generate a + LOAD and not ADDR reference to them. */ + unsigned load_dereferenced : 1; }; /* ipa_node_params stores information related to formal parameters of functions @@ -795,6 +802,24 @@ ipa_set_controlled_uses (class ipa_node_params *info, int i, int val) (*info->descriptors)[i].controlled_uses = val; } +/* Assuming a parameter does not have IPA_UNDESCRIBED_USE controlled uses, + return flag which indicates it has been dereferenced but only in a load. */ +static inline int +ipa_get_param_load_dereferenced (class ipa_node_params *info, int i) +{ + gcc_assert (ipa_get_controlled_uses (info, i) != IPA_UNDESCRIBED_USE); + return (*info->descriptors)[i].load_dereferenced; +} + +/* Set the load_dereferenced flag of a given parameter. */ + +static inline void +ipa_set_param_load_dereferenced (class ipa_node_params *info, int i, bool val) +{ + gcc_checking_assert (info->descriptors); + (*info->descriptors)[i].load_dereferenced = val; +} + /* Return the used flag corresponding to the Ith formal parameter of the function associated with INFO. */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-3.c b/gcc/testsuite/gcc.dg/ipa/remref-3.c new file mode 100644 index 00000000000..8a82ca65c68 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-cp-details -fdump-tree-optimized" } */ + +static double global = 0.0; + +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +void entry() +{ + foo(&global); +} + +/* { dg-final { scan-ipa-dump "Removed a reference" "cp" } } */ +/* { dg-final { scan-tree-dump-not "builtin_exp" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-4.c b/gcc/testsuite/gcc.dg/ipa/remref-4.c new file mode 100644 index 00000000000..5a55437e971 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-4.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-cp-details -fdump-tree-optimized" } */ + +static double global = 0.0; +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +double last_value; + +static void bar(double *ptr) +{ + last_value = *ptr; + foo (ptr); +} + +void entry() +{ + bar (&global); +} + +/* { dg-final { scan-ipa-dump "Removed a reference" "cp" } } */ +/* { dg-final { scan-ipa-dump "replaced it with LOAD" "cp" } } */ +/* { dg-final { scan-tree-dump-not "builtin_exp" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-5.c b/gcc/testsuite/gcc.dg/ipa/remref-5.c new file mode 100644 index 00000000000..c520e34d809 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-early-inlining -fdump-ipa-cp-details -fdump-tree-optimized" } */ + +static double global = 0.0; +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +double last_value; + +static void bar(double *ptr) +{ + last_value = *ptr; + for (unsigned i = 0; i < 200; i++) + foo (ptr); +} + +void entry() +{ + bar (&global); +} + +void decoy(double *ptr) +{ + bar (ptr); +} + + +/* { dg-final { scan-ipa-dump "Removed a reference" "cp" } } */ +/* { dg-final { scan-ipa-dump "replaced it with LOAD" "cp" } } */ +/* { dg-final { scan-tree-dump-times "builtin_exp" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-6.c b/gcc/testsuite/gcc.dg/ipa/remref-6.c new file mode 100644 index 00000000000..de3649315c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-6.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-early-inlining -fno-ipa-cp -fdump-tree-optimized" } */ + +static double global = 0.0; + +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +void entry() +{ + foo(&global); +} + +/* { dg-final { scan-ipa-dump "Removed a reference" "inline" } } */ +/* { dg-final { scan-ipa-dump "replaced it with LOAD" "inline" } } */ +/* { dg-final { scan-tree-dump-not "builtin_exp" "optimized" } } */
> Since it has been pre-approved by Honza, I would like to commit it to master > soon. Nevertheless, Jiangning, I am OK to wait a day or so if you can give it > another test on your setup. > I failed to apply your patch, so could you please provide a patch file instead? patch: **** malformed patch at line 45: @@ -4008,7 +4009,8 @@ ipcp_discover_new_direct_edges (struct cgraph_node *node, Thanks, -Jiangning
Hi, On Tue, Jul 27 2021, JiangNing OS wrote: >> Since it has been pre-approved by Honza, I would like to commit it to master >> soon. Nevertheless, Jiangning, I am OK to wait a day or so if you can give it >> another test on your setup. >> > > I failed to apply your patch, so could you please provide a patch file instead? I have committed the patch this morning as commit 13586172d0b - so please try that. But I really hope the issues are resolved. Thanks, Martin
> -----Original Message----- > From: Martin Jambor <mjambor@suse.cz> > Sent: Tuesday, July 27, 2021 5:39 PM > To: JiangNing OS <jiangning@os.amperecomputing.com>; Richard Biener > <richard.guenther@gmail.com> > Cc: GCC Patches <gcc-patches@gcc.gnu.org>; Jan Hubicka <hubicka@ucw.cz> > Subject: RE: [RFC] ipa: Adjust references to identify read-only globals > > Hi, > > On Tue, Jul 27 2021, JiangNing OS wrote: > >> Since it has been pre-approved by Honza, I would like to commit it to > >> master soon. Nevertheless, Jiangning, I am OK to wait a day or so if > >> you can give it another test on your setup. > >> > > > > I failed to apply your patch, so could you please provide a patch file instead? > > I have committed the patch this morning as commit 13586172d0b - so please > try that. > > But I really hope the issues are resolved. Yes. The ICEs I saw before are fixed on trunk. Thanks, -Jiangning
diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 9f4338fdf87..0fc20cd4517 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -700,6 +700,9 @@ struct GTY(()) ipa_replace_map tree new_tree; /* Parameter number to replace, when old_tree is NULL. */ int parm_num; + /* Set if the newly added reference should not be an address one, but a load + one from the operand of the ADDR_EXPR in NEW_TREE. */ + unsigned force_load_ref : 1; }; enum cgraph_simd_clone_arg_type diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 9f86463b42d..8ec58769c80 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -636,7 +636,15 @@ cgraph_node::create_virtual_clone (vec<cgraph_edge *> redirect_callers, || in_lto_p) new_node->unique_name = true; FOR_EACH_VEC_SAFE_ELT (tree_map, i, map) - new_node->maybe_create_reference (map->new_tree, NULL); + { + tree repl = map->new_tree; + if (map->force_load_ref) + { + gcc_assert (TREE_CODE (repl) == ADDR_EXPR); + repl = TREE_OPERAND (repl, 0); + } + new_node->maybe_create_reference (repl, NULL); + } if (ipa_transforms_to_apply.exists ()) new_node->ipa_transforms_to_apply diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 57c18af2bab..0ad5ffae354 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -106,6 +106,7 @@ along with GCC; see the file COPYING3. If not see #include "backend.h" #include "tree.h" #include "gimple-expr.h" +#include "gimple.h" #include "predict.h" #include "alloc-pool.h" #include "tree-pass.h" @@ -4008,7 +4009,8 @@ ipcp_discover_new_direct_edges (struct cgraph_node *node, { ipa_node_params *info = ipa_node_params_sum->get (node); int c = ipa_get_controlled_uses (info, param_index); - if (c != IPA_UNDESCRIBED_USE) + if (c != IPA_UNDESCRIBED_USE + && !ipa_get_param_load_dereferenced (info, param_index)) { struct ipa_ref *to_del; @@ -4320,7 +4322,8 @@ gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest, Return it or NULL if for some reason it cannot be created. */ static struct ipa_replace_map * -get_replacement_map (class ipa_node_params *info, tree value, int parm_num) +get_replacement_map (class ipa_node_params *info, tree value, int parm_num, + bool force_load_ref) { struct ipa_replace_map *replace_map; @@ -4332,10 +4335,15 @@ get_replacement_map (class ipa_node_params *info, tree value, int parm_num) fprintf (dump_file, " with const "); print_generic_expr (dump_file, value); - fprintf (dump_file, "\n"); + + if (force_load_ref) + fprintf (dump_file, " - forcing load reference\n"); + else + fprintf (dump_file, "\n"); } replace_map->parm_num = parm_num; replace_map->new_tree = value; + replace_map->force_load_ref = force_load_ref; return replace_map; } @@ -4488,6 +4496,111 @@ update_specialized_profile (struct cgraph_node *new_node, dump_profile_updates (orig_node, new_node); } +static void adjust_references_in_caller (cgraph_edge *cs, + symtab_node *symbol, int index); + +/* Simple structure to pass a symbol and index (with same meaning as parameters + of adjust_references_in_caller) through a void* parameter of a + call_for_symbol_thunks_and_aliases callback. */ +struct symbol_and_index_together +{ + symtab_node *symbol; + int index; +}; + +/* Worker callback of call_for_symbol_thunks_and_aliases to recursively call + adjust_references_in_caller on edges up in the call-graph, if necessary. */ +static bool +adjust_references_in_act_callers (struct cgraph_node *node, void *data) +{ + symbol_and_index_together *pack = (symbol_and_index_together *) data; + for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) + if (!cs->caller->thunk) + adjust_references_in_caller (cs, pack->symbol, pack->index); + return false; +} + +/* At INDEX of a function being called by CS there is an ADDR_EXPR of a + variable which is only dereferenced and which is represented by SYMBOL. See + if we can remove ADDR reference in callers assosiated witht the call. */ + +static void +adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index) +{ + ipa_edge_args *args = ipa_edge_args_sum->get (cs); + ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index); + if (jfunc->type == IPA_JF_CONST) + { + ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt, + cs->lto_stmt_uid); + if (!to_del) + return; + to_del->remove_reference (); + if (dump_file) + fprintf (dump_file, " Removed a reference from %s to %s.\n", + cs->caller->dump_name (), symbol->dump_name ()); + return; + } + + if (jfunc->type != IPA_JF_PASS_THROUGH + || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) + return; + + int fidx = ipa_get_jf_pass_through_formal_id (jfunc); + cgraph_node *caller = cs->caller; + ipa_node_params *caller_info = ipa_node_params_sum->get (caller); + /* TODO: This consistency check may be too big and not really + that useful. Consider removing it. */ + tree cst; + if (caller_info->ipcp_orig_node) + cst = caller_info->known_csts[fidx]; + else + { + ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx); + gcc_assert (lat->is_single_const ()); + cst = lat->values->value; + } + gcc_assert (TREE_CODE (cst) == ADDR_EXPR + && symtab_node::get (TREE_OPERAND (cst, 0)) == symbol); + + int cuses = ipa_get_controlled_uses (caller_info, fidx); + if (cuses == IPA_UNDESCRIBED_USE) + return; + gcc_assert (cuses > 0); + cuses--; + ipa_set_controlled_uses (caller_info, fidx, cuses); + if (cuses) + return; + + if (caller_info->ipcp_orig_node) + { + /* Cloning machinery has created a reference here, we need to either + remove it or change it to a read one. */ + ipa_ref *to_del = caller->find_reference (symbol, NULL, 0); + if (to_del && to_del->use == IPA_REF_ADDR) + { + to_del->remove_reference (); + if (dump_file) + fprintf (dump_file, " Removed a reference from %s to %s.\n", + cs->caller->dump_name (), symbol->dump_name ()); + if (ipa_get_param_load_dereferenced (caller_info, fidx)) + { + caller->create_reference (symbol, IPA_REF_LOAD, NULL); + if (dump_file) + fprintf (dump_file, + " ...and replaced it with LOAD one.\n"); + } + } + } + + symbol_and_index_together pack; + pack.symbol = symbol; + pack.index = fidx; + caller->call_for_symbol_thunks_and_aliases (adjust_references_in_act_callers, + &pack, true); +} + + /* Return true if we would like to remove a parameter from NODE when cloning it with KNOWN_CSTS scalar constants. */ @@ -4595,15 +4708,28 @@ create_specialized_node (struct cgraph_node *node, for (i = 0; i < count; i++) { tree t = known_csts[i]; - if (t) - { - struct ipa_replace_map *replace_map; + if (!t) + continue; - gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); - replace_map = get_replacement_map (info, t, i); - if (replace_map) - vec_safe_push (replace_trees, replace_map); + gcc_checking_assert (TREE_CODE (t) != TREE_BINFO); + + bool load_ref = false; + symtab_node *ref_symbol; + if (TREE_CODE (t) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (t, 0)) == VAR_DECL + && ipa_get_controlled_uses (info, i) == 0 + && ipa_get_param_load_dereferenced (info, i) + && (ref_symbol = symtab_node::get (TREE_OPERAND (t, 0)))) + { + load_ref = true; + for (cgraph_edge *caller : callers) + adjust_references_in_caller (caller, ref_symbol, i); } + + struct ipa_replace_map *replace_map = get_replacement_map (info, t, i, + load_ref); + if (replace_map) + vec_safe_push (replace_trees, replace_map); } auto_vec<cgraph_edge *, 2> self_recursive_calls; for (i = callers.length () - 1; i >= 0; i--) diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index f74d2e17b69..a9b1b3c8aef 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -544,7 +544,9 @@ ipa_set_jf_constant (struct ipa_jump_func *jfunc, tree constant, jfunc->value.constant.value = unshare_expr_without_location (constant); if (TREE_CODE (constant) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (constant, 0)) == FUNCTION_DECL) + && (TREE_CODE (TREE_OPERAND (constant, 0)) == FUNCTION_DECL + || (TREE_CODE (TREE_OPERAND (constant, 0)) == VAR_DECL + && TREE_STATIC (TREE_OPERAND (constant, 0))))) { struct ipa_cst_ref_desc *rdesc; @@ -2876,6 +2878,16 @@ ipa_analyze_params_uses_in_bb (struct ipa_func_body_info *fbi, basic_block bb) visit_ref_for_mod_analysis); } +/* Return true EXPR is a load from a dereference of SSA_NAME NAME. */ + +static bool +load_from_dereferenced_name (tree expr, tree name) +{ + tree base = get_base_address (expr); + return (TREE_CODE (base) == MEM_REF + && TREE_OPERAND (base, 0) == name); +} + /* Calculate controlled uses of parameters of NODE. */ static void @@ -2886,7 +2898,8 @@ ipa_analyze_controlled_uses (struct cgraph_node *node) for (int i = 0; i < ipa_get_param_count (info); i++) { tree parm = ipa_get_param (info, i); - int controlled_uses = 0; + int call_uses = 0; + bool load_dereferenced = false; /* For SSA regs see if parameter is used. For non-SSA we compute the flag during modification analysis. */ @@ -2897,27 +2910,77 @@ ipa_analyze_controlled_uses (struct cgraph_node *node) if (ddef && !has_zero_uses (ddef)) { imm_use_iterator imm_iter; - use_operand_p use_p; + gimple *stmt; ipa_set_param_used (info, i, true); - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ddef) - if (!is_gimple_call (USE_STMT (use_p))) - { - if (!is_gimple_debug (USE_STMT (use_p))) - { - controlled_uses = IPA_UNDESCRIBED_USE; - break; - } - } - else - controlled_uses++; + FOR_EACH_IMM_USE_STMT (stmt, imm_iter, ddef) + { + if (is_gimple_debug (stmt)) + continue; + + int all_stmt_uses = 0; + use_operand_p use_p; + FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) + all_stmt_uses++; + + if (is_gimple_call (stmt)) + { + if (gimple_call_internal_p (stmt)) + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + int recognized_stmt_uses; + if (gimple_call_fn (stmt) == ddef) + recognized_stmt_uses = 1; + else + recognized_stmt_uses = 0; + unsigned arg_count = gimple_call_num_args (stmt); + for (unsigned i = 0; i < arg_count; i++) + { + tree arg = gimple_call_arg (stmt, i); + if (arg == ddef) + recognized_stmt_uses++; + else if (load_from_dereferenced_name (arg, ddef)) + { + load_dereferenced = true; + recognized_stmt_uses++; + } + } + + if (recognized_stmt_uses != all_stmt_uses) + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + if (call_uses >= 0) + call_uses += all_stmt_uses; + } + else if (gimple_assign_single_p (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); + if (all_stmt_uses != 1 + || !load_from_dereferenced_name (rhs, ddef)) + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + load_dereferenced = true; + } + else + { + call_uses = IPA_UNDESCRIBED_USE; + break; + } + } } else - controlled_uses = 0; + call_uses = 0; } else - controlled_uses = IPA_UNDESCRIBED_USE; - ipa_set_controlled_uses (info, i, controlled_uses); + call_uses = IPA_UNDESCRIBED_USE; + ipa_set_controlled_uses (info, i, call_uses); + ipa_set_param_load_dereferenced (info, i, load_dereferenced); } } @@ -3640,16 +3703,17 @@ jfunc_rdesc_usable (struct ipa_jump_func *jfunc) declaration, return the associated call graph node. Otherwise return NULL. */ -static cgraph_node * -cgraph_node_for_jfunc (struct ipa_jump_func *jfunc) +static symtab_node * +symtab_node_for_jfunc (struct ipa_jump_func *jfunc) { gcc_checking_assert (jfunc->type == IPA_JF_CONST); tree cst = ipa_get_jf_constant (jfunc); if (TREE_CODE (cst) != ADDR_EXPR - || TREE_CODE (TREE_OPERAND (cst, 0)) != FUNCTION_DECL) + || (TREE_CODE (TREE_OPERAND (cst, 0)) != FUNCTION_DECL + && TREE_CODE (TREE_OPERAND (cst, 0)) != VAR_DECL)) return NULL; - return cgraph_node::get (TREE_OPERAND (cst, 0)); + return symtab_node::get (TREE_OPERAND (cst, 0)); } @@ -3666,7 +3730,7 @@ try_decrement_rdesc_refcount (struct ipa_jump_func *jfunc) && (rdesc = jfunc_rdesc_usable (jfunc)) && --rdesc->refcount == 0) { - symtab_node *symbol = cgraph_node_for_jfunc (jfunc); + symtab_node *symbol = symtab_node_for_jfunc (jfunc); if (!symbol) return false; @@ -3721,8 +3785,8 @@ try_make_edge_direct_simple_call (struct cgraph_edge *ie, gcc_checking_assert (cs->callee && (cs != ie || jfunc->type != IPA_JF_CONST - || !cgraph_node_for_jfunc (jfunc) - || cs->callee == cgraph_node_for_jfunc (jfunc))); + || !symtab_node_for_jfunc (jfunc) + || cs->callee == symtab_node_for_jfunc (jfunc))); ok = try_decrement_rdesc_refcount (jfunc); gcc_checking_assert (ok); } @@ -4085,7 +4149,15 @@ propagate_controlled_uses (struct cgraph_edge *cs) == NOP_EXPR || c == IPA_UNDESCRIBED_USE); c = combine_controlled_uses_counters (c, d); ipa_set_controlled_uses (new_root_info, src_idx, c); - if (c == 0 && new_root_info->ipcp_orig_node) + bool lderef = true; + if (c != IPA_UNDESCRIBED_USE) + { + lderef = (ipa_get_param_load_dereferenced (new_root_info, src_idx) + || ipa_get_param_load_dereferenced (old_root_info, i)); + ipa_set_param_load_dereferenced (new_root_info, src_idx, lderef); + } + + if (c == 0 && !lderef && new_root_info->ipcp_orig_node) { struct cgraph_node *n; struct ipa_ref *ref; @@ -4114,17 +4186,27 @@ propagate_controlled_uses (struct cgraph_edge *cs) if (rdesc->refcount == 0) { tree cst = ipa_get_jf_constant (jf); - struct cgraph_node *n; gcc_checking_assert (TREE_CODE (cst) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (cst, 0)) - == FUNCTION_DECL); - n = cgraph_node::get (TREE_OPERAND (cst, 0)); + && ((TREE_CODE (TREE_OPERAND (cst, 0)) + == FUNCTION_DECL) + || (TREE_CODE (TREE_OPERAND (cst, 0)) + == VAR_DECL))); + + symtab_node *n = symtab_node::get (TREE_OPERAND (cst, 0)); if (n) { struct cgraph_node *clone; - bool ok; - ok = remove_described_reference (n, rdesc); - gcc_checking_assert (ok); + bool removed = remove_described_reference (n, rdesc); + /* The reference might have been removed by IPA-CP. */ + if (removed + && ipa_get_param_load_dereferenced (old_root_info, i)) + { + new_root->create_reference (n, IPA_REF_LOAD, NULL); + if (dump_file) + fprintf (dump_file, "ipa-prop: ...replaced it with " + " LOAD one from %s to %s.\n", + new_root->dump_name (), n->dump_name ()); + } clone = cs->caller; while (clone->inlined_to @@ -4347,7 +4429,7 @@ ipa_edge_args_sum_t::duplicate (cgraph_edge *src, cgraph_edge *dst, else if (src->caller == dst->caller) { struct ipa_ref *ref; - symtab_node *n = cgraph_node_for_jfunc (src_jf); + symtab_node *n = symtab_node_for_jfunc (src_jf); gcc_checking_assert (n); ref = src->caller->find_reference (n, src->call_stmt, src->lto_stmt_uid); @@ -4574,7 +4656,9 @@ ipa_print_node_params (FILE *f, struct cgraph_node *node) if (c == IPA_UNDESCRIBED_USE) fprintf (f, " undescribed_use"); else - fprintf (f, " controlled_uses=%i", c); + fprintf (f, " controlled_uses=%i %s", c, + ipa_get_param_load_dereferenced (info, i) + ? "(load_dereferenced)" : ""); fprintf (f, "\n"); } } @@ -4969,7 +5053,13 @@ ipa_write_node_info (struct output_block *ob, struct cgraph_node *node) gcc_assert (!info->node_enqueued); gcc_assert (!info->ipcp_orig_node); for (j = 0; j < ipa_get_param_count (info); j++) - bp_pack_value (&bp, ipa_is_param_used (info, j), 1); + { + /* TODO: We could just not stream the bit in the undescribed case. */ + bool d = (ipa_get_controlled_uses (info, j) != IPA_UNDESCRIBED_USE) + ? ipa_get_param_load_dereferenced (info, j) : true; + bp_pack_value (&bp, d, 1); + bp_pack_value (&bp, ipa_is_param_used (info, j), 1); + } streamer_write_bitpack (&bp); for (j = 0; j < ipa_get_param_count (info); j++) { @@ -5095,10 +5185,14 @@ ipa_read_node_info (class lto_input_block *ib, struct cgraph_node *node, bp = streamer_read_bitpack (ib); for (k = 0; k < param_count; k++) { + bool load_dereferenced = bp_unpack_value (&bp, 1); bool used = bp_unpack_value (&bp, 1); if (prevails) - ipa_set_param_used (info, k, used); + { + ipa_set_param_load_dereferenced (info, k, load_dereferenced); + ipa_set_param_used (info, k, used); + } } for (k = 0; k < param_count; k++) { diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 3d28a6e8640..619f5c31274 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -550,14 +550,19 @@ struct GTY(()) ipa_param_descriptor tree decl_or_type; /* If all uses of the parameter are described by ipa-prop structures, this says how many there are. If any use could not be described by means of - ipa-prop structures, this is IPA_UNDESCRIBED_USE. */ + ipa-prop structures (which include flag dereferenced below), this is + IPA_UNDESCRIBED_USE. */ int controlled_uses; - unsigned int move_cost : 28; + unsigned int move_cost : 27; /* The parameter is used. */ unsigned used : 1; unsigned used_by_ipa_predicates : 1; unsigned used_by_indirect_call : 1; unsigned used_by_polymorphic_call : 1; + /* Set to true when in addition to being used in call statements, the + parameterr has also been used for loads (but not for wtires, does not + escape, etc.). */ + unsigned load_dereferenced : 1; }; /* ipa_node_params stores information related to formal parameters of functions @@ -795,6 +800,24 @@ ipa_set_controlled_uses (class ipa_node_params *info, int i, int val) (*info->descriptors)[i].controlled_uses = val; } +/* Assuming a parameter does not have IPA_UNDESCRIBED_USE controlled uses, + return flag which indicates it has been dereferenced but only in a load. */ +static inline int +ipa_get_param_load_dereferenced (class ipa_node_params *info, int i) +{ + gcc_assert (ipa_get_controlled_uses (info, i) != IPA_UNDESCRIBED_USE); + return (*info->descriptors)[i].load_dereferenced; +} + +/* Set the load_dereferenced flag of a given parameter. */ + +static inline void +ipa_set_param_load_dereferenced (class ipa_node_params *info, int i, bool val) +{ + gcc_checking_assert (info->descriptors); + (*info->descriptors)[i].load_dereferenced = val; +} + /* Return the used flag corresponding to the Ith formal parameter of the function associated with INFO. */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-3.c b/gcc/testsuite/gcc.dg/ipa/remref-3.c new file mode 100644 index 00000000000..8a82ca65c68 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-cp-details -fdump-tree-optimized" } */ + +static double global = 0.0; + +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +void entry() +{ + foo(&global); +} + +/* { dg-final { scan-ipa-dump "Removed a reference" "cp" } } */ +/* { dg-final { scan-tree-dump-not "builtin_exp" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-4.c b/gcc/testsuite/gcc.dg/ipa/remref-4.c new file mode 100644 index 00000000000..5a55437e971 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-4.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-cp-details -fdump-tree-optimized" } */ + +static double global = 0.0; +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +double last_value; + +static void bar(double *ptr) +{ + last_value = *ptr; + foo (ptr); +} + +void entry() +{ + bar (&global); +} + +/* { dg-final { scan-ipa-dump "Removed a reference" "cp" } } */ +/* { dg-final { scan-ipa-dump "replaced it with LOAD" "cp" } } */ +/* { dg-final { scan-tree-dump-not "builtin_exp" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-5.c b/gcc/testsuite/gcc.dg/ipa/remref-5.c new file mode 100644 index 00000000000..c520e34d809 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-early-inlining -fdump-ipa-cp-details -fdump-tree-optimized" } */ + +static double global = 0.0; +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +double last_value; + +static void bar(double *ptr) +{ + last_value = *ptr; + for (unsigned i = 0; i < 200; i++) + foo (ptr); +} + +void entry() +{ + bar (&global); +} + +void decoy(double *ptr) +{ + bar (ptr); +} + + +/* { dg-final { scan-ipa-dump "Removed a reference" "cp" } } */ +/* { dg-final { scan-ipa-dump "replaced it with LOAD" "cp" } } */ +/* { dg-final { scan-tree-dump-times "builtin_exp" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/ipa/remref-6.c b/gcc/testsuite/gcc.dg/ipa/remref-6.c new file mode 100644 index 00000000000..de3649315c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/remref-6.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-early-inlining -fno-ipa-cp -fdump-tree-optimized" } */ + +static double global = 0.0; + +double foo_temp5; + +static void foo(double *ptr) { + static double abcd; + double v, exp_res; + v = *ptr; + exp_res = __builtin_exp(v); + foo_temp5 = exp_res * abcd; + abcd += foo_temp5; +} + +void entry() +{ + foo(&global); +} + +/* { dg-final { scan-ipa-dump "Removed a reference" "inline" } } */ +/* { dg-final { scan-ipa-dump "replaced it with LOAD" "inline" } } */ +/* { dg-final { scan-tree-dump-not "builtin_exp" "optimized" } } */