Message ID | e5043396-79cb-88a4-5591-08356a420298@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | predcom: Enabled by loop vect at O2 [PR100794] | expand |
"Kewen.Lin via Gcc-patches" <gcc-patches@gcc.gnu.org> writes: > Hi, > > As PR100794 shows, in the current implementation PRE bypasses > some optimization to avoid introducing loop carried dependence > which stops loop vectorizer to vectorize the loop. At -O2, > there is no downstream pass to re-catch this kind of opportunity > if loop vectorizer fails to vectorize that loop. > > This patch follows Richi's suggestion in the PR, if predcom flag > isn't set and loop vectorization will enable predcom without any > unrolling implicitly. The Power9 SPEC2017 evaluation showed it > can speed up 521.wrf_r 3.30% and 554.roms_r 1.08% at very-cheap > cost model, no remarkable impact at cheap cost model, the build > time and size impact is fine (see the PR for the details). > > By the way, I tested another proposal to guard PRE not skip the > optimization for cheap and very-cheap vect cost models, the > evaluation results showed it's fine with very cheap cost model, > but it can degrade some bmks like 521.wrf_r -9.17% and > 549.fotonik3d_r -2.07% etc. > > Bootstrapped/regtested on powerpc64le-linux-gnu P9, > x86_64-redhat-linux and aarch64-linux-gnu. > > Is it ok for trunk? > > BR, > Kewen > ----- > gcc/ChangeLog: > > PR tree-optimization/100794 > * tree-predcom.c (tree_predictive_commoning_loop): Add parameter > allow_unroll_p and only allow unrolling when it's true. > (tree_predictive_commoning): Add parameter allow_unroll_p and > adjust for it. > (run_tree_predictive_commoning): Likewise. > (class pass_predcom): Add private member allow_unroll_p. > (pass_predcom::pass_predcom): Init allow_unroll_p. > (pass_predcom::gate): Check flag_tree_loop_vectorize and > global_options_set.x_flag_predictive_commoning. > (pass_predcom::execute): Adjust for allow_unroll_p. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/100794 > * gcc.dg/tree-ssa/pr100794.c: New test. > > gcc/testsuite/gcc.dg/tree-ssa/pr100794.c | 20 +++++++++ > gcc/tree-predcom.c | 57 +++++++++++++++++------- > 2 files changed, 60 insertions(+), 17 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr100794.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c > new file mode 100644 > index 00000000000..6f707ae7fba > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details -fdisable-tree-vect" } */ > + > +extern double arr[100]; > +extern double foo (double, double); > +extern double sum; > + > +void > +test (int i_0, int i_n) > +{ > + int i; > + for (i = i_0; i < i_n - 1; i++) > + { > + double a = arr[i]; > + double b = arr[i + 1]; > + sum += a * b; > + } > +} > + > +/* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */ > diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c > index 02f911a08bb..65a93c8e505 100644 > --- a/gcc/tree-predcom.c > +++ b/gcc/tree-predcom.c > @@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains) > applied to this loop. */ > > static unsigned > -tree_predictive_commoning_loop (class loop *loop) > +tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p) > { > vec<data_reference_p> datarefs; > vec<ddr_p> dependences; > struct component *components; > vec<chain_p> chains = vNULL; > - unsigned unroll_factor; > + unsigned unroll_factor = 0; > class tree_niter_desc desc; > bool unroll = false, loop_closed_ssa = false; > > @@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop) > dump_chains (dump_file, chains); > } > > - /* Determine the unroll factor, and if the loop should be unrolled, ensure > - that its number of iterations is divisible by the factor. */ > - unroll_factor = determine_unroll_factor (chains); > - unroll = (unroll_factor > 1 > - && can_unroll_loop_p (loop, unroll_factor, &desc)); > + if (allow_unroll_p) > + /* Determine the unroll factor, and if the loop should be unrolled, ensure > + that its number of iterations is divisible by the factor. */ > + unroll_factor = determine_unroll_factor (chains); > + > + if (unroll_factor > 1) > + unroll = can_unroll_loop_p (loop, unroll_factor, &desc); > > /* Execute the predictive commoning transformations, and possibly unroll the > loop. */ > @@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop) > /* Runs predictive commoning. */ > > unsigned > -tree_predictive_commoning (void) > +tree_predictive_commoning (bool allow_unroll_p) > { > class loop *loop; > unsigned ret = 0, changed = 0; > @@ -3328,7 +3330,7 @@ tree_predictive_commoning (void) > FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) > if (optimize_loop_for_speed_p (loop)) > { > - changed |= tree_predictive_commoning_loop (loop); > + changed |= tree_predictive_commoning_loop (loop, allow_unroll_p); > } > free_original_copy_tables (); > > @@ -3355,12 +3357,12 @@ tree_predictive_commoning (void) > /* Predictive commoning Pass. */ > > static unsigned > -run_tree_predictive_commoning (struct function *fun) > +run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p) > { > if (number_of_loops (fun) <= 1) > return 0; > > - return tree_predictive_commoning (); > + return tree_predictive_commoning (allow_unroll_p); > } > > namespace { > @@ -3382,15 +3384,36 @@ class pass_predcom : public gimple_opt_pass > { > public: > pass_predcom (gcc::context *ctxt) > - : gimple_opt_pass (pass_data_predcom, ctxt) > + : gimple_opt_pass (pass_data_predcom, ctxt), > + allow_unroll_p (true) > {} > > /* opt_pass methods: */ > - virtual bool gate (function *) { return flag_predictive_commoning != 0; } > - virtual unsigned int execute (function *fun) > - { > - return run_tree_predictive_commoning (fun); > - } > + virtual bool > + gate (function *) > + { > + if (flag_predictive_commoning != 0) > + return true; > + /* Loop vectorization enables predictive commoning implicitly > + only if predictive commoning isn't set explicitly, and it > + doesn't allow unrolling. */ > + if (flag_tree_loop_vectorize > + && !global_options_set.x_flag_predictive_commoning) > + { > + allow_unroll_p = false; > + return true; > + } > + return false; > + } > + > + virtual unsigned int > + execute (function *fun) > + { > + return run_tree_predictive_commoning (fun, allow_unroll_p); > + } > + > +private: > + bool allow_unroll_p; > > }; // class pass_predcom Calculating allow_unroll_p this way doesn't look robust against changes in options caused by pragmas, etc. Would it work if we dropped the member variable and just passed flag_predictive_commoning != 0 to run_tree_predictive commoning? Thanks, Richard
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c new file mode 100644 index 00000000000..6f707ae7fba --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr100794.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-loop-vectorize -fdump-tree-pcom-details -fdisable-tree-vect" } */ + +extern double arr[100]; +extern double foo (double, double); +extern double sum; + +void +test (int i_0, int i_n) +{ + int i; + for (i = i_0; i < i_n - 1; i++) + { + double a = arr[i]; + double b = arr[i + 1]; + sum += a * b; + } +} + +/* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */ diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index 02f911a08bb..65a93c8e505 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -3178,13 +3178,13 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains) applied to this loop. */ static unsigned -tree_predictive_commoning_loop (class loop *loop) +tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p) { vec<data_reference_p> datarefs; vec<ddr_p> dependences; struct component *components; vec<chain_p> chains = vNULL; - unsigned unroll_factor; + unsigned unroll_factor = 0; class tree_niter_desc desc; bool unroll = false, loop_closed_ssa = false; @@ -3272,11 +3272,13 @@ tree_predictive_commoning_loop (class loop *loop) dump_chains (dump_file, chains); } - /* Determine the unroll factor, and if the loop should be unrolled, ensure - that its number of iterations is divisible by the factor. */ - unroll_factor = determine_unroll_factor (chains); - unroll = (unroll_factor > 1 - && can_unroll_loop_p (loop, unroll_factor, &desc)); + if (allow_unroll_p) + /* Determine the unroll factor, and if the loop should be unrolled, ensure + that its number of iterations is divisible by the factor. */ + unroll_factor = determine_unroll_factor (chains); + + if (unroll_factor > 1) + unroll = can_unroll_loop_p (loop, unroll_factor, &desc); /* Execute the predictive commoning transformations, and possibly unroll the loop. */ @@ -3319,7 +3321,7 @@ tree_predictive_commoning_loop (class loop *loop) /* Runs predictive commoning. */ unsigned -tree_predictive_commoning (void) +tree_predictive_commoning (bool allow_unroll_p) { class loop *loop; unsigned ret = 0, changed = 0; @@ -3328,7 +3330,7 @@ tree_predictive_commoning (void) FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) if (optimize_loop_for_speed_p (loop)) { - changed |= tree_predictive_commoning_loop (loop); + changed |= tree_predictive_commoning_loop (loop, allow_unroll_p); } free_original_copy_tables (); @@ -3355,12 +3357,12 @@ tree_predictive_commoning (void) /* Predictive commoning Pass. */ static unsigned -run_tree_predictive_commoning (struct function *fun) +run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p) { if (number_of_loops (fun) <= 1) return 0; - return tree_predictive_commoning (); + return tree_predictive_commoning (allow_unroll_p); } namespace { @@ -3382,15 +3384,36 @@ class pass_predcom : public gimple_opt_pass { public: pass_predcom (gcc::context *ctxt) - : gimple_opt_pass (pass_data_predcom, ctxt) + : gimple_opt_pass (pass_data_predcom, ctxt), + allow_unroll_p (true) {} /* opt_pass methods: */ - virtual bool gate (function *) { return flag_predictive_commoning != 0; } - virtual unsigned int execute (function *fun) - { - return run_tree_predictive_commoning (fun); - } + virtual bool + gate (function *) + { + if (flag_predictive_commoning != 0) + return true; + /* Loop vectorization enables predictive commoning implicitly + only if predictive commoning isn't set explicitly, and it + doesn't allow unrolling. */ + if (flag_tree_loop_vectorize + && !global_options_set.x_flag_predictive_commoning) + { + allow_unroll_p = false; + return true; + } + return false; + } + + virtual unsigned int + execute (function *fun) + { + return run_tree_predictive_commoning (fun, allow_unroll_p); + } + +private: + bool allow_unroll_p; }; // class pass_predcom