Message ID | LV2PR01MB78393E51D5BC32B6C070E3CFF7312@LV2PR01MB7839.prod.exchangelabs.com |
---|---|
State | New |
Headers | show |
Series | [1/2] Refactor final_value_replacement_loop [PR90594] | expand |
On Fri, Dec 6, 2024 at 2:56 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote: > > This patch refactors the procedure in tree-scalar-evolution.cc in order to partially export its functionality to other module, so decomposes it to several relatively independent utility functions. > > Thanks, > Feng > --- > gcc/ > PR tree-optimization/90594 > * tree-scalar-evolution.cc (simple_scev_final_value): New function. > (apply_scev_final_value_replacement): Likewise. > (final_value_replacement_loop): Call new functions. > --- > gcc/tree-scalar-evolution.cc | 288 ++++++++++++++++++++--------------- > 1 file changed, 165 insertions(+), 123 deletions(-) > > diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc > index abb2bad7773..5733632aa78 100644 > --- a/gcc/tree-scalar-evolution.cc > +++ b/gcc/tree-scalar-evolution.cc > @@ -3775,13 +3775,17 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, > return fold_build2 (code1, type, inv, match_op[0]); > } > > -/* Do final value replacement for LOOP, return true if we did anything. */ > +/* For induction VALUE of LOOP, return true if its SCEV is simple enough that > + its final value at loop exit could be directly calculated based on the > + initial value and loop niter, and this value is recorded in FINAL_VALUE, > + also set REWRITE_OVERFLOW to true in the case that we need to rewrite the > + final value to avoid overflow UB when replacement would really happen > + later. */ > > -bool > -final_value_replacement_loop (class loop *loop) > +static bool > +simple_scev_final_value (class loop *loop, tree value, tree *final_value, > + bool *rewrite_overflow) > { > - /* If we do not know exact number of iterations of the loop, we cannot > - replace the final value. */ > edge exit = single_exit (loop); > if (!exit) > return false; > @@ -3790,100 +3794,170 @@ final_value_replacement_loop (class loop *loop) > if (niter == chrec_dont_know) > return false; > > - /* Ensure that it is possible to insert new statements somewhere. */ > - if (!single_pred_p (exit->dest)) > - split_loop_exit_edge (exit); > - > - /* Set stmt insertion pointer. All stmts are inserted before this point. */ > + /* TODO: allow float value for fast math. */ > + if (!POINTER_TYPE_P (TREE_TYPE (value)) > + && !INTEGRAL_TYPE_P (TREE_TYPE (value))) > + return false; > > class loop *ex_loop > - = superloop_at_depth (loop, > - loop_depth (exit->dest->loop_father) + 1); > + = superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1); > > - bool any = false; > - gphi_iterator psi; > - for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); ) > + bool folded_casts; > + tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value, > + &folded_casts); > + tree bitinv_def, bit_def; > + unsigned HOST_WIDE_INT niter_num; > + > + if (def != chrec_dont_know) > + def = compute_overall_effect_of_inner_loop (ex_loop, def); > + > + /* Handle bitop with invariant induction expression. > + > + .i.e > + for (int i =0 ;i < 32; i++) > + tmp &= bit2; > + if bit2 is an invariant in loop which could simple to tmp &= bit2. */ > + else if ((bitinv_def > + = analyze_and_compute_bitop_with_inv_effect (loop, > + value, niter))) > + def = bitinv_def; > + > + /* Handle bitwise induction expression. > + > + .i.e. > + for (int i = 0; i != 64; i+=3) > + res &= ~(1UL << i); > + > + RES can't be analyzed out by SCEV because it is not polynomially > + expressible, but in fact final value of RES can be replaced by > + RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} > + being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ > + else if (tree_fits_uhwi_p (niter) > + && (niter_num = tree_to_uhwi (niter)) != 0 > + && niter_num < TYPE_PRECISION (TREE_TYPE (value)) > + && (bit_def > + = analyze_and_compute_bitwise_induction_effect (loop, value, > + niter_num))) > + def = bit_def; > + > + bool cond_overflow_p; > + if (!tree_does_not_contain_chrecs (def) > + || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) > + /* Moving the computation from the loop may prolong life range > + of some ssa names, which may cause problems if they appear > + on abnormal edges. */ > + || contains_abnormal_ssa_name_p (def) > + /* Do not emit expensive expressions. The rationale is that > + when someone writes a code like > + > + while (n > 45) n -= 45; > + > + he probably knows that n is not large, and does not want it > + to be turned into n %= 45. */ > + || expression_expensive_p (def, &cond_overflow_p)) > + return false; > + > + *final_value = def; > + > + if ((folded_casts > + && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def)) > + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def))) > + || cond_overflow_p) > + *rewrite_overflow = true; > + else > + *rewrite_overflow = false; > + > + return true; > +} > + > +/* Given a loop closed PHI, replace it with a new assignment from its > + FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to > + rewrite expressions in FINAL_VALUE to avoid overflow UB. When FINAL_VALUE > + is constant, we could just propagate the constant, however, sometimes we > + have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set > + to true. */ > + > +static void > +apply_scev_final_value_replacement (gphi *phi, tree final_value, > + bool rewrite_overflow, bool always_keep) > +{ > + tree rslt = gimple_phi_result (phi); > + gphi_iterator psi = gsi_for_phi (phi); > + gimple_stmt_iterator gsi; > + location_t loc = gimple_phi_arg_location (phi, 0); > + basic_block bb = gimple_bb (phi); > + gimple_seq stmts; > + > + /* This is a degenerate PHI with only one argument. */ > + gcc_assert (gimple_phi_num_args (phi) == 1); > + > + remove_phi_node (&psi, false); > + > + /* Propagate constants immediately. */ > + if (CONSTANT_CLASS_P (final_value) > + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt)) > { > - gphi *phi = psi.phi (); > - tree rslt = PHI_RESULT (phi); > - tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit); > - tree def = phidef; > - if (virtual_operand_p (def)) > - { > - gsi_next (&psi); > - continue; > - } > + replace_uses_by (rslt, final_value); > + > + /* Sometimes, we need to leave an unused initialization around to avoid > + invalidating the SCEV cache. */ > + if (!always_keep) > + return; > + } > + > + tree def = force_gimple_operand (final_value, &stmts, false, NULL_TREE); > + gassign *ass = gimple_build_assign (rslt, def); > + > + gimple_set_location (ass, loc); > + gimple_seq_add_stmt (&stmts, ass); > > - if (!POINTER_TYPE_P (TREE_TYPE (def)) > - && !INTEGRAL_TYPE_P (TREE_TYPE (def))) > + /* If def's type has undefined overflow and there were folded casts, rewrite > + all stmts added for def into arithmetics with defined overflow > + behavior. */ > + if (rewrite_overflow) > + { > + gsi = gsi_start (stmts); > + while (!gsi_end_p (gsi)) > { > - gsi_next (&psi); > - continue; > + gimple *stmt = gsi_stmt (gsi); > + if (is_gimple_assign (stmt) > + && arith_code_with_undefined_signed_overflow > + (gimple_assign_rhs_code (stmt))) > + rewrite_to_defined_overflow (&gsi); > + gsi_next (&gsi); > } > + } > > - bool folded_casts; > - def = analyze_scalar_evolution_in_loop (ex_loop, loop, def, > - &folded_casts); > + gsi = gsi_after_labels (bb); > + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); > +} > > - tree bitinv_def, bit_def; > - unsigned HOST_WIDE_INT niter_num; > +/* Do final value replacement for LOOP, return true if we did anything. */ > + > +bool > +final_value_replacement_loop (class loop *loop) > +{ > + /* If we do not know exact number of iterations of the loop, we cannot > + replace the final value. */ > + edge exit = single_exit (loop); > + if (!exit || number_of_latch_executions (loop) == chrec_dont_know) > + return false; > > - if (def != chrec_dont_know) > - def = compute_overall_effect_of_inner_loop (ex_loop, def); > + /* Ensure that it is possible to insert new statements somewhere. */ > + if (!single_pred_p (exit->dest)) > + split_loop_exit_edge (exit); > > - /* Handle bitop with invariant induction expression. > + bool any = false; > + gphi_iterator psi; > + for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); ) > + { > + gphi *phi = psi.phi (); > + tree rslt = PHI_RESULT (phi); > + tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit); > + bool rewrite_overflow; > > - .i.e > - for (int i =0 ;i < 32; i++) > - tmp &= bit2; > - if bit2 is an invariant in loop which could simple to > - tmp &= bit2. */ > - else if ((bitinv_def > - = analyze_and_compute_bitop_with_inv_effect (loop, > - phidef, niter))) > - def = bitinv_def; > - > - /* Handle bitwise induction expression. > - > - .i.e. > - for (int i = 0; i != 64; i+=3) > - res &= ~(1UL << i); > - > - RES can't be analyzed out by SCEV because it is not polynomially > - expressible, but in fact final value of RES can be replaced by > - RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} > - being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ > - else if (tree_fits_uhwi_p (niter) > - && (niter_num = tree_to_uhwi (niter)) != 0 > - && niter_num < TYPE_PRECISION (TREE_TYPE (phidef)) > - && (bit_def > - = analyze_and_compute_bitwise_induction_effect (loop, > - phidef, > - niter_num))) > - def = bit_def; > - > - bool cond_overflow_p; > - if (!tree_does_not_contain_chrecs (def) > - || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) > - /* Moving the computation from the loop may prolong life range > - of some ssa names, which may cause problems if they appear > - on abnormal edges. */ > - || contains_abnormal_ssa_name_p (def) > - /* Do not emit expensive expressions. The rationale is that > - when someone writes a code like > - > - while (n > 45) n -= 45; > - > - he probably knows that n is not large, and does not want it > - to be turned into n %= 45. */ > - || expression_expensive_p (def, &cond_overflow_p)) > + if (!simple_scev_final_value (loop, def, &def, &rewrite_overflow)) > { > - if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - fprintf (dump_file, "not replacing:\n "); > - print_gimple_stmt (dump_file, phi, 0); > - fprintf (dump_file, "\n"); > - } Can you keep this dump please? Otherwise OK. Thanks, Richard. > gsi_next (&psi); > continue; > } > @@ -3904,43 +3978,11 @@ final_value_replacement_loop (class loop *loop) > to a GIMPLE sequence or to a statement list (keeping this a > GENERIC interface). */ > def = unshare_expr (def); > - auto loc = gimple_phi_arg_location (phi, exit->dest_idx); > - remove_phi_node (&psi, false); > - > - /* Propagate constants immediately, but leave an unused initialization > - around to avoid invalidating the SCEV cache. */ > - if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt)) > - replace_uses_by (rslt, def); > - > - /* Create the replacement statements. */ > - gimple_seq stmts; > - def = force_gimple_operand (def, &stmts, false, NULL_TREE); > - gassign *ass = gimple_build_assign (rslt, def); > - gimple_set_location (ass, loc); > - gimple_seq_add_stmt (&stmts, ass); > - > - /* If def's type has undefined overflow and there were folded > - casts, rewrite all stmts added for def into arithmetics > - with defined overflow behavior. */ > - if ((folded_casts > - && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def)) > - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def))) > - || cond_overflow_p) > - { > - gimple_stmt_iterator gsi2; > - gsi2 = gsi_start (stmts); > - while (!gsi_end_p (gsi2)) > - { > - gimple *stmt = gsi_stmt (gsi2); > - if (is_gimple_assign (stmt) > - && arith_code_with_undefined_signed_overflow > - (gimple_assign_rhs_code (stmt))) > - rewrite_to_defined_overflow (&gsi2); > - gsi_next (&gsi2); > - } > - } > - gimple_stmt_iterator gsi = gsi_after_labels (exit->dest); > - gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); > + > + /* Advance iterator before the PHI is removed. */ > + gsi_next (&psi); > + apply_scev_final_value_replacement (phi, def, rewrite_overflow, true); > + > if (dump_file) > { > fprintf (dump_file, " final stmt:\n "); > -- > 2.17.1
Updated the patch according to comments. OK for trunk? Thanks, Feng --- gcc/ PR tree-optimization/90594 * tree-scalar-evolution.cc (get_scev_final_value): New function. (apply_scev_final_value_replacement): Likewise. (final_value_replacement_loop): Call new functions. * tree-scalar-evolution.h (get_scev_final_value): New function declaration. (apply_scev_final_value_replacement): Likewise. (scev_const_prop): Remove unused declaration. --- gcc/tree-scalar-evolution.cc | 294 +++++++++++++++++++++-------------- gcc/tree-scalar-evolution.h | 3 +- 2 files changed, 179 insertions(+), 118 deletions(-) diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index abb2bad7773..3c3719e8e64 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -3775,6 +3775,170 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, return fold_build2 (code1, type, inv, match_op[0]); } +/* For induction VALUE of LOOP, return its final value at loop exit if it could + be directly calculated based on the initial value and loop niter, also set + REWRITE_OVERFLOW to true in the case that we need to rewrite the final value + to avoid overflow UB when replacement would really happen later. Otherwise, + empty value is returned. The flag CONSIDER_COST specifies whether we care + about if the value is expensive or not. */ + +tree +get_scev_final_value (class loop *loop, tree value, bool *rewrite_overflow, + bool consider_cost) +{ + edge exit = single_exit (loop); + if (!exit) + return NULL_TREE; + + tree niter = number_of_latch_executions (loop); + if (niter == chrec_dont_know) + return NULL_TREE; + + class loop *ex_loop + = superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1); + + bool folded_casts; + tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value, + &folded_casts); + tree bitinv_def, bit_def; + unsigned HOST_WIDE_INT niter_num; + + if (def != chrec_dont_know) + def = compute_overall_effect_of_inner_loop (ex_loop, def); + + /* Handle bitop with invariant induction expression. + + .i.e + for (int i =0 ;i < 32; i++) + tmp &= bit2; + if bit2 is an invariant in loop which could simple to tmp &= bit2. */ + else if ((bitinv_def + = analyze_and_compute_bitop_with_inv_effect (loop, + value, niter))) + def = bitinv_def; + + /* Handle bitwise induction expression. + + .i.e. + for (int i = 0; i != 64; i+=3) + res &= ~(1UL << i); + + RES can't be analyzed out by SCEV because it is not polynomially + expressible, but in fact final value of RES can be replaced by + RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} + being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ + else if (tree_fits_uhwi_p (niter) + && (niter_num = tree_to_uhwi (niter)) != 0 + && niter_num < TYPE_PRECISION (TREE_TYPE (value)) + && (bit_def + = analyze_and_compute_bitwise_induction_effect (loop, value, + niter_num))) + def = bit_def; + + bool cond_overflow_p = false; + + if (!tree_does_not_contain_chrecs (def) + || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) + /* Moving the computation from the loop may prolong life range + of some ssa names, which may cause problems if they appear + on abnormal edges. */ + || contains_abnormal_ssa_name_p (def) + /* Do not emit expensive expressions. The rationale is that + when someone writes a code like + + while (n > 45) n -= 45; + + he probably knows that n is not large, and does not want it + to be turned into n %= 45. */ + || (consider_cost && expression_expensive_p (def, &cond_overflow_p))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "skip scev final value:\n "); + print_generic_expr (dump_file, value); + fprintf (dump_file, " -> "); + print_generic_expr (dump_file, def); + fprintf (dump_file, "\n"); + } + return NULL_TREE; + } + + if (rewrite_overflow) + { + *rewrite_overflow = false; + + if ((folded_casts + && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def)) + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def))) + || cond_overflow_p) + *rewrite_overflow = true; + } + + return def; +} + +/* Given a loop closed PHI, replace it with a new assignment from its + FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to + rewrite expressions in FINAL_VALUE to avoid overflow UB. When FINAL_VALUE + is constant, we could just propagate the constant, however, sometimes we + have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set + to true. */ + +void +apply_scev_final_value_replacement (gphi *phi, tree final_value, + bool rewrite_overflow, bool always_keep) +{ + tree rslt = gimple_phi_result (phi); + gphi_iterator psi = gsi_for_phi (phi); + gimple_stmt_iterator gsi; + location_t loc = gimple_phi_arg_location (phi, 0); + basic_block bb = gimple_bb (phi); + gimple_seq stmts; + + /* This is a degenerate PHI with only one argument. */ + gcc_assert (gimple_phi_num_args (phi) == 1); + + remove_phi_node (&psi, false); + + /* Propagate constants immediately. */ + if (CONSTANT_CLASS_P (final_value) + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt)) + { + replace_uses_by (rslt, final_value); + + /* Sometimes, we need to leave an unused initialization around to avoid + invalidating the SCEV cache. */ + if (!always_keep) + return; + } + + tree def = force_gimple_operand (final_value, &stmts, false, NULL_TREE); + gassign *ass = gimple_build_assign (rslt, def); + + gimple_set_location (ass, loc); + gimple_seq_add_stmt (&stmts, ass); + + /* If def's type has undefined overflow and there were folded casts, rewrite + all stmts added for def into arithmetics with defined overflow + behavior. */ + if (rewrite_overflow) + { + gsi = gsi_start (stmts); + while (!gsi_end_p (gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_assign (stmt) + && arith_code_with_undefined_signed_overflow + (gimple_assign_rhs_code (stmt))) + rewrite_to_defined_overflow (&gsi); + gsi_next (&gsi); + } + } + + gsi = gsi_after_labels (bb); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); +} + /* Do final value replacement for LOOP, return true if we did anything. */ bool @@ -3783,37 +3947,25 @@ final_value_replacement_loop (class loop *loop) /* If we do not know exact number of iterations of the loop, we cannot replace the final value. */ edge exit = single_exit (loop); - if (!exit) - return false; - - tree niter = number_of_latch_executions (loop); - if (niter == chrec_dont_know) + if (!exit || number_of_latch_executions (loop) == chrec_dont_know) return false; /* Ensure that it is possible to insert new statements somewhere. */ if (!single_pred_p (exit->dest)) split_loop_exit_edge (exit); - /* Set stmt insertion pointer. All stmts are inserted before this point. */ - - class loop *ex_loop - = superloop_at_depth (loop, - loop_depth (exit->dest->loop_father) + 1); - bool any = false; gphi_iterator psi; for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); ) { gphi *phi = psi.phi (); tree rslt = PHI_RESULT (phi); - tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit); - tree def = phidef; - if (virtual_operand_p (def)) - { - gsi_next (&psi); - continue; - } + tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit); + bool rewrite_overflow; + /* Now only allow integral types. This check also excludes virtual + ssa name who has void type. TODO: support float value for fast + math. */ if (!POINTER_TYPE_P (TREE_TYPE (def)) && !INTEGRAL_TYPE_P (TREE_TYPE (def))) { @@ -3821,69 +3973,9 @@ final_value_replacement_loop (class loop *loop) continue; } - bool folded_casts; - def = analyze_scalar_evolution_in_loop (ex_loop, loop, def, - &folded_casts); - - tree bitinv_def, bit_def; - unsigned HOST_WIDE_INT niter_num; - - if (def != chrec_dont_know) - def = compute_overall_effect_of_inner_loop (ex_loop, def); - - /* Handle bitop with invariant induction expression. - - .i.e - for (int i =0 ;i < 32; i++) - tmp &= bit2; - if bit2 is an invariant in loop which could simple to - tmp &= bit2. */ - else if ((bitinv_def - = analyze_and_compute_bitop_with_inv_effect (loop, - phidef, niter))) - def = bitinv_def; - - /* Handle bitwise induction expression. - - .i.e. - for (int i = 0; i != 64; i+=3) - res &= ~(1UL << i); - - RES can't be analyzed out by SCEV because it is not polynomially - expressible, but in fact final value of RES can be replaced by - RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} - being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ - else if (tree_fits_uhwi_p (niter) - && (niter_num = tree_to_uhwi (niter)) != 0 - && niter_num < TYPE_PRECISION (TREE_TYPE (phidef)) - && (bit_def - = analyze_and_compute_bitwise_induction_effect (loop, - phidef, - niter_num))) - def = bit_def; - - bool cond_overflow_p; - if (!tree_does_not_contain_chrecs (def) - || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) - /* Moving the computation from the loop may prolong life range - of some ssa names, which may cause problems if they appear - on abnormal edges. */ - || contains_abnormal_ssa_name_p (def) - /* Do not emit expensive expressions. The rationale is that - when someone writes a code like - - while (n > 45) n -= 45; - - he probably knows that n is not large, and does not want it - to be turned into n %= 45. */ - || expression_expensive_p (def, &cond_overflow_p)) + def = get_scev_final_value (loop, def, &rewrite_overflow, true); + if (!def) { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "not replacing:\n "); - print_gimple_stmt (dump_file, phi, 0); - fprintf (dump_file, "\n"); - } gsi_next (&psi); continue; } @@ -3904,43 +3996,11 @@ final_value_replacement_loop (class loop *loop) to a GIMPLE sequence or to a statement list (keeping this a GENERIC interface). */ def = unshare_expr (def); - auto loc = gimple_phi_arg_location (phi, exit->dest_idx); - remove_phi_node (&psi, false); - - /* Propagate constants immediately, but leave an unused initialization - around to avoid invalidating the SCEV cache. */ - if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt)) - replace_uses_by (rslt, def); - - /* Create the replacement statements. */ - gimple_seq stmts; - def = force_gimple_operand (def, &stmts, false, NULL_TREE); - gassign *ass = gimple_build_assign (rslt, def); - gimple_set_location (ass, loc); - gimple_seq_add_stmt (&stmts, ass); - - /* If def's type has undefined overflow and there were folded - casts, rewrite all stmts added for def into arithmetics - with defined overflow behavior. */ - if ((folded_casts - && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def)) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def))) - || cond_overflow_p) - { - gimple_stmt_iterator gsi2; - gsi2 = gsi_start (stmts); - while (!gsi_end_p (gsi2)) - { - gimple *stmt = gsi_stmt (gsi2); - if (is_gimple_assign (stmt) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) - rewrite_to_defined_overflow (&gsi2); - gsi_next (&gsi2); - } - } - gimple_stmt_iterator gsi = gsi_after_labels (exit->dest); - gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + + /* Advance iterator before the PHI is removed. */ + gsi_next (&psi); + apply_scev_final_value_replacement (phi, def, rewrite_overflow, true); + if (dump_file) { fprintf (dump_file, " final stmt:\n "); diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h index 41ba6b237b5..1e0defacf3b 100644 --- a/gcc/tree-scalar-evolution.h +++ b/gcc/tree-scalar-evolution.h @@ -34,8 +34,9 @@ extern tree analyze_scalar_evolution (class loop *, tree); extern tree instantiate_scev (edge, class loop *, tree); extern tree resolve_mixers (class loop *, tree, bool *); extern void gather_stats_on_scev_database (void); +extern tree get_scev_final_value (class loop *, tree, bool *, bool); +extern void apply_scev_final_value_replacement (gphi *, tree, bool, bool); extern bool final_value_replacement_loop (class loop *); -extern unsigned int scev_const_prop (void); extern bool expression_expensive_p (tree, bool *); extern bool simple_iv_with_niters (class loop *, class loop *, tree, struct affine_iv *, tree *, bool); -- 2.17.1
From 621e05c9013f1e8e116f7b348383634af59e3202 Mon Sep 17 00:00:00 2001 From: Feng Xue <fxue@os.amperecomputing.com> Date: Fri, 6 Dec 2024 10:57:52 +0800 Subject: [PATCH 1/2] Refactor final_value_replacement_loop [PR90594] This patch refactors the procedure in tree-scalar-evolution.cc in order to partially export its functionality to other module, so decomposes it to several relatively independent utility functions. 2024-12-06 Feng Xue <fxue@os.amperecomputing.com> gcc/ PR tree-optimization/90594 * tree-scalar-evolution.cc (simple_scev_final_value): New function. (apply_scev_final_value_replacement): Likewise. (final_value_replacement_loop): Call new functions. --- gcc/tree-scalar-evolution.cc | 288 ++++++++++++++++++++--------------- 1 file changed, 165 insertions(+), 123 deletions(-) diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index abb2bad7773..5733632aa78 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -3775,13 +3775,17 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, return fold_build2 (code1, type, inv, match_op[0]); } -/* Do final value replacement for LOOP, return true if we did anything. */ +/* For induction VALUE of LOOP, return true if its SCEV is simple enough that + its final value at loop exit could be directly calculated based on the + initial value and loop niter, and this value is recorded in FINAL_VALUE, + also set REWRITE_OVERFLOW to true in the case that we need to rewrite the + final value to avoid overflow UB when replacement would really happen + later. */ -bool -final_value_replacement_loop (class loop *loop) +static bool +simple_scev_final_value (class loop *loop, tree value, tree *final_value, + bool *rewrite_overflow) { - /* If we do not know exact number of iterations of the loop, we cannot - replace the final value. */ edge exit = single_exit (loop); if (!exit) return false; @@ -3790,100 +3794,170 @@ final_value_replacement_loop (class loop *loop) if (niter == chrec_dont_know) return false; - /* Ensure that it is possible to insert new statements somewhere. */ - if (!single_pred_p (exit->dest)) - split_loop_exit_edge (exit); - - /* Set stmt insertion pointer. All stmts are inserted before this point. */ + /* TODO: allow float value for fast math. */ + if (!POINTER_TYPE_P (TREE_TYPE (value)) + && !INTEGRAL_TYPE_P (TREE_TYPE (value))) + return false; class loop *ex_loop - = superloop_at_depth (loop, - loop_depth (exit->dest->loop_father) + 1); + = superloop_at_depth (loop, loop_depth (exit->dest->loop_father) + 1); - bool any = false; - gphi_iterator psi; - for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); ) + bool folded_casts; + tree def = analyze_scalar_evolution_in_loop (ex_loop, loop, value, + &folded_casts); + tree bitinv_def, bit_def; + unsigned HOST_WIDE_INT niter_num; + + if (def != chrec_dont_know) + def = compute_overall_effect_of_inner_loop (ex_loop, def); + + /* Handle bitop with invariant induction expression. + + .i.e + for (int i =0 ;i < 32; i++) + tmp &= bit2; + if bit2 is an invariant in loop which could simple to tmp &= bit2. */ + else if ((bitinv_def + = analyze_and_compute_bitop_with_inv_effect (loop, + value, niter))) + def = bitinv_def; + + /* Handle bitwise induction expression. + + .i.e. + for (int i = 0; i != 64; i+=3) + res &= ~(1UL << i); + + RES can't be analyzed out by SCEV because it is not polynomially + expressible, but in fact final value of RES can be replaced by + RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} + being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ + else if (tree_fits_uhwi_p (niter) + && (niter_num = tree_to_uhwi (niter)) != 0 + && niter_num < TYPE_PRECISION (TREE_TYPE (value)) + && (bit_def + = analyze_and_compute_bitwise_induction_effect (loop, value, + niter_num))) + def = bit_def; + + bool cond_overflow_p; + if (!tree_does_not_contain_chrecs (def) + || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) + /* Moving the computation from the loop may prolong life range + of some ssa names, which may cause problems if they appear + on abnormal edges. */ + || contains_abnormal_ssa_name_p (def) + /* Do not emit expensive expressions. The rationale is that + when someone writes a code like + + while (n > 45) n -= 45; + + he probably knows that n is not large, and does not want it + to be turned into n %= 45. */ + || expression_expensive_p (def, &cond_overflow_p)) + return false; + + *final_value = def; + + if ((folded_casts + && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def)) + && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def))) + || cond_overflow_p) + *rewrite_overflow = true; + else + *rewrite_overflow = false; + + return true; +} + +/* Given a loop closed PHI, replace it with a new assignment from its + FINAL_VALUE at loop exit. The flag REWRITE_OVERFLOW tells if we need to + rewrite expressions in FINAL_VALUE to avoid overflow UB. When FINAL_VALUE + is constant, we could just propagate the constant, however, sometimes we + have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set + to true. */ + +static void +apply_scev_final_value_replacement (gphi *phi, tree final_value, + bool rewrite_overflow, bool always_keep) +{ + tree rslt = gimple_phi_result (phi); + gphi_iterator psi = gsi_for_phi (phi); + gimple_stmt_iterator gsi; + location_t loc = gimple_phi_arg_location (phi, 0); + basic_block bb = gimple_bb (phi); + gimple_seq stmts; + + /* This is a degenerate PHI with only one argument. */ + gcc_assert (gimple_phi_num_args (phi) == 1); + + remove_phi_node (&psi, false); + + /* Propagate constants immediately. */ + if (CONSTANT_CLASS_P (final_value) + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt)) { - gphi *phi = psi.phi (); - tree rslt = PHI_RESULT (phi); - tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit); - tree def = phidef; - if (virtual_operand_p (def)) - { - gsi_next (&psi); - continue; - } + replace_uses_by (rslt, final_value); + + /* Sometimes, we need to leave an unused initialization around to avoid + invalidating the SCEV cache. */ + if (!always_keep) + return; + } + + tree def = force_gimple_operand (final_value, &stmts, false, NULL_TREE); + gassign *ass = gimple_build_assign (rslt, def); + + gimple_set_location (ass, loc); + gimple_seq_add_stmt (&stmts, ass); - if (!POINTER_TYPE_P (TREE_TYPE (def)) - && !INTEGRAL_TYPE_P (TREE_TYPE (def))) + /* If def's type has undefined overflow and there were folded casts, rewrite + all stmts added for def into arithmetics with defined overflow + behavior. */ + if (rewrite_overflow) + { + gsi = gsi_start (stmts); + while (!gsi_end_p (gsi)) { - gsi_next (&psi); - continue; + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_assign (stmt) + && arith_code_with_undefined_signed_overflow + (gimple_assign_rhs_code (stmt))) + rewrite_to_defined_overflow (&gsi); + gsi_next (&gsi); } + } - bool folded_casts; - def = analyze_scalar_evolution_in_loop (ex_loop, loop, def, - &folded_casts); + gsi = gsi_after_labels (bb); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); +} - tree bitinv_def, bit_def; - unsigned HOST_WIDE_INT niter_num; +/* Do final value replacement for LOOP, return true if we did anything. */ + +bool +final_value_replacement_loop (class loop *loop) +{ + /* If we do not know exact number of iterations of the loop, we cannot + replace the final value. */ + edge exit = single_exit (loop); + if (!exit || number_of_latch_executions (loop) == chrec_dont_know) + return false; - if (def != chrec_dont_know) - def = compute_overall_effect_of_inner_loop (ex_loop, def); + /* Ensure that it is possible to insert new statements somewhere. */ + if (!single_pred_p (exit->dest)) + split_loop_exit_edge (exit); - /* Handle bitop with invariant induction expression. + bool any = false; + gphi_iterator psi; + for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); ) + { + gphi *phi = psi.phi (); + tree rslt = PHI_RESULT (phi); + tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit); + bool rewrite_overflow; - .i.e - for (int i =0 ;i < 32; i++) - tmp &= bit2; - if bit2 is an invariant in loop which could simple to - tmp &= bit2. */ - else if ((bitinv_def - = analyze_and_compute_bitop_with_inv_effect (loop, - phidef, niter))) - def = bitinv_def; - - /* Handle bitwise induction expression. - - .i.e. - for (int i = 0; i != 64; i+=3) - res &= ~(1UL << i); - - RES can't be analyzed out by SCEV because it is not polynomially - expressible, but in fact final value of RES can be replaced by - RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63} - being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */ - else if (tree_fits_uhwi_p (niter) - && (niter_num = tree_to_uhwi (niter)) != 0 - && niter_num < TYPE_PRECISION (TREE_TYPE (phidef)) - && (bit_def - = analyze_and_compute_bitwise_induction_effect (loop, - phidef, - niter_num))) - def = bit_def; - - bool cond_overflow_p; - if (!tree_does_not_contain_chrecs (def) - || chrec_contains_symbols_defined_in_loop (def, ex_loop->num) - /* Moving the computation from the loop may prolong life range - of some ssa names, which may cause problems if they appear - on abnormal edges. */ - || contains_abnormal_ssa_name_p (def) - /* Do not emit expensive expressions. The rationale is that - when someone writes a code like - - while (n > 45) n -= 45; - - he probably knows that n is not large, and does not want it - to be turned into n %= 45. */ - || expression_expensive_p (def, &cond_overflow_p)) + if (!simple_scev_final_value (loop, def, &def, &rewrite_overflow)) { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "not replacing:\n "); - print_gimple_stmt (dump_file, phi, 0); - fprintf (dump_file, "\n"); - } gsi_next (&psi); continue; } @@ -3904,43 +3978,11 @@ final_value_replacement_loop (class loop *loop) to a GIMPLE sequence or to a statement list (keeping this a GENERIC interface). */ def = unshare_expr (def); - auto loc = gimple_phi_arg_location (phi, exit->dest_idx); - remove_phi_node (&psi, false); - - /* Propagate constants immediately, but leave an unused initialization - around to avoid invalidating the SCEV cache. */ - if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt)) - replace_uses_by (rslt, def); - - /* Create the replacement statements. */ - gimple_seq stmts; - def = force_gimple_operand (def, &stmts, false, NULL_TREE); - gassign *ass = gimple_build_assign (rslt, def); - gimple_set_location (ass, loc); - gimple_seq_add_stmt (&stmts, ass); - - /* If def's type has undefined overflow and there were folded - casts, rewrite all stmts added for def into arithmetics - with defined overflow behavior. */ - if ((folded_casts - && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def)) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def))) - || cond_overflow_p) - { - gimple_stmt_iterator gsi2; - gsi2 = gsi_start (stmts); - while (!gsi_end_p (gsi2)) - { - gimple *stmt = gsi_stmt (gsi2); - if (is_gimple_assign (stmt) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) - rewrite_to_defined_overflow (&gsi2); - gsi_next (&gsi2); - } - } - gimple_stmt_iterator gsi = gsi_after_labels (exit->dest); - gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + + /* Advance iterator before the PHI is removed. */ + gsi_next (&psi); + apply_scev_final_value_replacement (phi, def, rewrite_overflow, true); + if (dump_file) { fprintf (dump_file, " final stmt:\n "); -- 2.17.1