Message ID | patch-18326-tamar@arm.com |
---|---|
State | New |
Headers | show |
Series | middle-end: delay updating of dominators until later during vectorization. [PR114081] | expand |
On Sun, 25 Feb 2024, Tamar Christina wrote: > Hi All, > > The testcase shows an interesting case where we have multiple loops sharing a > live value and have an early exit that go to the same location. The additional > complication is that on x86_64 with -mavx we seem to also do prologue peeling > on the loops. > > We correctly identify which BB we need their dominators updated for, but we do > so too early. > > Instead of adding more dominator update we can solve this by for the cases with > multiple exits not to verify dominators at the end of peeling if peeling for > vectorization. > > We can then perform the final dominator updates just before vectorization when > all loop transformations are done. What's the actual CFG transform that happens between the old and the new place? I see a possible edge splitting but where is the one that makes this patch work? > This also means we reduce the number of dominator updates needed by at least > 50% and fixes the ICE. > > Bootstrapped Regtested on aarch64-none-linux-gnu and > x86_64-pc-linux-gnu no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/114081 > PR tree-optimization/113290 > * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): > Skip dominator update when multiple exit. > (vect_do_peeling): Remove multiple exit dominator update. > * tree-vect-loop.cc (vect_transform_loop): Update dominators when > multiple exits. > * tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE, > dominators_needing_update): New. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/114081 > PR tree-optimization/113290 > * gcc.dg/vect/vect-early-break_120-pr114081.c: New test. > * gcc.dg/vect/vect-early-break_121-pr114081.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > new file mode 100644 > index 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e41730fd2216f0ec8061376 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > @@ -0,0 +1,38 @@ > +/* { dg-do compile } */ > +/* { dg-add-options vect_early_break } */ > +/* { dg-require-effective-target vect_early_break } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-O3" } */ > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > + > +typedef struct filter_list_entry { > + const char *name; > + int id; > + void (*function)(); > +} filter_list_entry; > + > +static const filter_list_entry filter_list[9] = {0}; > + > +void php_zval_filter(int filter, int id1) { > + filter_list_entry filter_func; > + > + int size = 9; > + for (int i = 0; i < size; ++i) { > + if (filter_list[i].id == filter) { > + filter_func = filter_list[i]; > + goto done; > + } > + } > + > +#pragma GCC novector > + for (int i = 0; i < size; ++i) { > + if (filter_list[i].id == 0x0204) { > + filter_func = filter_list[i]; > + goto done; > + } > + } > +done: > + if (!filter_func.id) > + filter_func.function(); > +} > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > new file mode 100644 > index 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31dd1c741f9e36738515 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > @@ -0,0 +1,37 @@ > +/* { dg-do compile } */ > +/* { dg-add-options vect_early_break } */ > +/* { dg-require-effective-target vect_early_break } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-O3" } */ > + > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > + > +typedef struct filter_list_entry { > + const char *name; > + int id; > + void (*function)(); > +} filter_list_entry; > + > +static const filter_list_entry filter_list[9] = {0}; > + > +void php_zval_filter(int filter, int id1) { > + filter_list_entry filter_func; > + > + int size = 9; > + for (int i = 0; i < size; ++i) { > + if (filter_list[i].id == filter) { > + filter_func = filter_list[i]; > + goto done; > + } > + } > + > + for (int i = 0; i < size; ++i) { > + if (filter_list[i].id == 0x0204) { > + filter_func = filter_list[i]; > + goto done; > + } > + } > +done: > + if (!filter_func.id) > + filter_func.function(); > +} > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e489d306f81e090d0 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, > doms.safe_push (e->dest); > } > > - iterate_fix_dominators (CDI_DOMINATORS, doms, false); > if (updated_doms) > updated_doms->safe_splice (doms); > } > @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, > free (new_bbs); > free (bbs); > > - checking_verify_dominators (CDI_DOMINATORS); > + /* If we're peeling for vectorization then delay verifying dominators. */ > + if (!flow_loops || !multiple_exits_p) > + checking_verify_dominators (CDI_DOMINATORS); > > return new_loop; > } > @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, > epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop; > edge epilog_e = vect_epilogues ? e : scalar_e; > edge new_epilog_e = NULL; > - auto_vec<basic_block> doms; > + auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo); > epilog > = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e, > &new_epilog_e, true, &doms); > @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, > scale_loop_profile (epilog, prob_epilog, -1); > } > > - /* Recalculate the dominators after adding the guard edge. */ > - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > - iterate_fix_dominators (CDI_DOMINATORS, doms, false); > - > /* When we do not have a loop-around edge to the epilog we know > the vector loop covered at least VF scalar iterations unless > we have early breaks. > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaaeeffeae1fb900f 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) > /* Handle any code motion that we need to for early-break vectorization after > we've done peeling but just before we start vectorizing. */ > if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > - move_early_exit_stmts (loop_vinfo); > + { > + /* Recalculate the dominators. */ > + iterate_fix_dominators (CDI_DOMINATORS, > + LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), false); > + move_early_exit_stmts (loop_vinfo); > + } > > /* Schedule the SLP instances first, then handle loop vectorization > below. */ > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcbbb110c0380b7d73aa 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -961,6 +961,10 @@ public: > /* Statements whose VUSES need updating if early break vectorization is to > happen. */ > auto_vec<gimple*> early_break_vuses; > + > + /* Dominators that need to be recalculated that have been deferred until after > + all peeling. */ > + auto_vec<basic_block> dominators_needing_update; > } *loop_vec_info; > > /* Access Functions. */ > @@ -1021,6 +1025,7 @@ public: > (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) > #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb > #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses > +#define LOOP_VINFO_DOMS_NEED_UPDATE(L) (L)->dominators_needing_update > #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds > #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond > #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies > > > > >
> > The testcase shows an interesting case where we have multiple loops sharing a > > live value and have an early exit that go to the same location. The additional > > complication is that on x86_64 with -mavx we seem to also do prologue peeling > > on the loops. > > > > We correctly identify which BB we need their dominators updated for, but we do > > so too early. > > > > Instead of adding more dominator update we can solve this by for the cases with > > multiple exits not to verify dominators at the end of peeling if peeling for > > vectorization. > > > > We can then perform the final dominator updates just before vectorization when > > all loop transformations are done. > > What's the actual CFG transform that happens between the old and the new > place? I see a possible edge splitting but where is the one that makes > this patch work? It's not one but two. 1. loop 1 is prologue peeled. This ICEs because the dominator update is only happening for epilogue peeling. Note that loop 1 here dominates 21 and the ICE is: ice.c: In function 'void php_zval_filter(int, int)': ice.c:7:6: error: dominator of 14 should be 21, not 3 7 | void php_zval_filter(int filter, int id1) { | ^~~~~~~~~~~~~~~ ice.c:7:6: error: dominator of 10 should be 21, not 3 during GIMPLE pass: vect dump file: a-ice.c.179t.vect This can be simply fixed by just moving the dom update code down: diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index a5202f32e27..e88948370c6 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -1845,13 +1845,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, to the original function exit we recorded. Other exits are already correct. */ if (multiple_exits_p) - { - update_loop = new_loop; - doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header); - for (unsigned i = 0; i < doms.length (); ++i) - if (flow_bb_inside_loop_p (loop, doms[i])) - doms.unordered_remove (i); - } + update_loop = new_loop; } else /* Add the copy at entry. */ { @@ -1906,6 +1900,11 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, if (multiple_exits_p) { + doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header); + for (unsigned i = 0; i < doms.length (); ++i) + if (flow_bb_inside_loop_p (loop, doms[i])) + doms.unordered_remove (i); + for (edge e : get_loop_exit_edges (update_loop)) { edge ex; with that done, the next ICE comes along. Loop 1 is peeled again, but this time for epilogue. however loop 1 no longer dominates the exits as the prologue peeled loop does. So we don't find anything to update and ice with the second ICE: ice.c: In function 'void php_zval_filter(int, int)': ice.c:7:6: error: dominator of 14 should be 2, not 21 7 | void php_zval_filter(int filter, int id1) { | ^~~~~~~~~~~~~~~ ice.c:7:6: error: dominator of 10 should be 2, not 21 during GIMPLE pass: vect dump file: a-ice.c.179t.vect because the prologue loop no longer dominates them due to the skip edge. This is why delaying works because we know we have to update the dominators of 14 and 10, but to what we don't know yet. Tamar > > > This also means we reduce the number of dominator updates needed by at least > > 50% and fixes the ICE. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and > > x86_64-pc-linux-gnu no issues. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > PR tree-optimization/114081 > > PR tree-optimization/113290 > > * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): > > Skip dominator update when multiple exit. > > (vect_do_peeling): Remove multiple exit dominator update. > > * tree-vect-loop.cc (vect_transform_loop): Update dominators when > > multiple exits. > > * tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE, > > dominators_needing_update): New. > > > > gcc/testsuite/ChangeLog: > > > > PR tree-optimization/114081 > > PR tree-optimization/113290 > > * gcc.dg/vect/vect-early-break_120-pr114081.c: New test. > > * gcc.dg/vect/vect-early-break_121-pr114081.c: New test. > > > > --- inline copy of patch -- > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > > new file mode 100644 > > index > 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e4173 > 0fd2216f0ec8061376 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > > @@ -0,0 +1,38 @@ > > +/* { dg-do compile } */ > > +/* { dg-add-options vect_early_break } */ > > +/* { dg-require-effective-target vect_early_break } */ > > +/* { dg-require-effective-target vect_int } */ > > +/* { dg-additional-options "-O3" } */ > > + > > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > > + > > +typedef struct filter_list_entry { > > + const char *name; > > + int id; > > + void (*function)(); > > +} filter_list_entry; > > + > > +static const filter_list_entry filter_list[9] = {0}; > > + > > +void php_zval_filter(int filter, int id1) { > > + filter_list_entry filter_func; > > + > > + int size = 9; > > + for (int i = 0; i < size; ++i) { > > + if (filter_list[i].id == filter) { > > + filter_func = filter_list[i]; > > + goto done; > > + } > > + } > > + > > +#pragma GCC novector > > + for (int i = 0; i < size; ++i) { > > + if (filter_list[i].id == 0x0204) { > > + filter_func = filter_list[i]; > > + goto done; > > + } > > + } > > +done: > > + if (!filter_func.id) > > + filter_func.function(); > > +} > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > > new file mode 100644 > > index > 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31 > dd1c741f9e36738515 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > > @@ -0,0 +1,37 @@ > > +/* { dg-do compile } */ > > +/* { dg-add-options vect_early_break } */ > > +/* { dg-require-effective-target vect_early_break } */ > > +/* { dg-require-effective-target vect_int } */ > > +/* { dg-additional-options "-O3" } */ > > + > > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > > + > > +typedef struct filter_list_entry { > > + const char *name; > > + int id; > > + void (*function)(); > > +} filter_list_entry; > > + > > +static const filter_list_entry filter_list[9] = {0}; > > + > > +void php_zval_filter(int filter, int id1) { > > + filter_list_entry filter_func; > > + > > + int size = 9; > > + for (int i = 0; i < size; ++i) { > > + if (filter_list[i].id == filter) { > > + filter_func = filter_list[i]; > > + goto done; > > + } > > + } > > + > > + for (int i = 0; i < size; ++i) { > > + if (filter_list[i].id == 0x0204) { > > + filter_func = filter_list[i]; > > + goto done; > > + } > > + } > > +done: > > + if (!filter_func.id) > > + filter_func.function(); > > +} > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > > index > 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e > 489d306f81e090d0 100644 > > --- a/gcc/tree-vect-loop-manip.cc > > +++ b/gcc/tree-vect-loop-manip.cc > > @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop > *loop, edge loop_exit, > > doms.safe_push (e->dest); > > } > > > > - iterate_fix_dominators (CDI_DOMINATORS, doms, false); > > if (updated_doms) > > updated_doms->safe_splice (doms); > > } > > @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop > *loop, edge loop_exit, > > free (new_bbs); > > free (bbs); > > > > - checking_verify_dominators (CDI_DOMINATORS); > > + /* If we're peeling for vectorization then delay verifying dominators. */ > > + if (!flow_loops || !multiple_exits_p) > > + checking_verify_dominators (CDI_DOMINATORS); > > > > return new_loop; > > } > > @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree > niters, tree nitersm1, > > epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop; > > edge epilog_e = vect_epilogues ? e : scalar_e; > > edge new_epilog_e = NULL; > > - auto_vec<basic_block> doms; > > + auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE > (loop_vinfo); > > epilog > > = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e, > > &new_epilog_e, true, &doms); > > @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree > niters, tree nitersm1, > > scale_loop_profile (epilog, prob_epilog, -1); > > } > > > > - /* Recalculate the dominators after adding the guard edge. */ > > - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > > - iterate_fix_dominators (CDI_DOMINATORS, doms, false); > > - > > /* When we do not have a loop-around edge to the epilog we know > > the vector loop covered at least VF scalar iterations unless > > we have early breaks. > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > index > 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaae > effeae1fb900f 100644 > > --- a/gcc/tree-vect-loop.cc > > +++ b/gcc/tree-vect-loop.cc > > @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, > gimple *loop_vectorized_call) > > /* Handle any code motion that we need to for early-break vectorization after > > we've done peeling but just before we start vectorizing. */ > > if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > > - move_early_exit_stmts (loop_vinfo); > > + { > > + /* Recalculate the dominators. */ > > + iterate_fix_dominators (CDI_DOMINATORS, > > + LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), > false); > > + move_early_exit_stmts (loop_vinfo); > > + } > > > > /* Schedule the SLP instances first, then handle loop vectorization > > below. */ > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > > index > db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcb > bb110c0380b7d73aa 100644 > > --- a/gcc/tree-vectorizer.h > > +++ b/gcc/tree-vectorizer.h > > @@ -961,6 +961,10 @@ public: > > /* Statements whose VUSES need updating if early break vectorization is to > > happen. */ > > auto_vec<gimple*> early_break_vuses; > > + > > + /* Dominators that need to be recalculated that have been deferred until after > > + all peeling. */ > > + auto_vec<basic_block> dominators_needing_update; > > } *loop_vec_info; > > > > /* Access Functions. */ > > @@ -1021,6 +1025,7 @@ public: > > (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) > > #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb > > #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses > > +#define LOOP_VINFO_DOMS_NEED_UPDATE(L) (L)- > >dominators_needing_update > > #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds > > #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond > > #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies > > > > > > > > > > > > -- > Richard Biener <rguenther@suse.de> > SUSE Software Solutions Germany GmbH, > Frankenstrasse 146, 90461 Nuernberg, Germany; > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
On Mon, 26 Feb 2024, Tamar Christina wrote: > > > The testcase shows an interesting case where we have multiple loops sharing a > > > live value and have an early exit that go to the same location. The additional > > > complication is that on x86_64 with -mavx we seem to also do prologue peeling > > > on the loops. > > > > > > We correctly identify which BB we need their dominators updated for, but we do > > > so too early. > > > > > > Instead of adding more dominator update we can solve this by for the cases with > > > multiple exits not to verify dominators at the end of peeling if peeling for > > > vectorization. > > > > > > We can then perform the final dominator updates just before vectorization when > > > all loop transformations are done. > > > > What's the actual CFG transform that happens between the old and the new > > place? I see a possible edge splitting but where is the one that makes > > this patch work? > > It's not one but two. > 1. loop 1 is prologue peeled. This ICEs because the dominator update is only happening > for epilogue peeling. Note that loop 1 here dominates 21 and the ICE is: > > ice.c: In function 'void php_zval_filter(int, int)': > ice.c:7:6: error: dominator of 14 should be 21, not 3 > 7 | void php_zval_filter(int filter, int id1) { > | ^~~~~~~~~~~~~~~ > ice.c:7:6: error: dominator of 10 should be 21, not 3 > during GIMPLE pass: vect > dump file: a-ice.c.179t.vect > > This can be simply fixed by just moving the dom update code down: > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > index a5202f32e27..e88948370c6 100644 > --- a/gcc/tree-vect-loop-manip.cc > +++ b/gcc/tree-vect-loop-manip.cc > @@ -1845,13 +1845,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, > to the original function exit we recorded. Other exits are already > correct. */ > if (multiple_exits_p) > - { > - update_loop = new_loop; > - doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header); > - for (unsigned i = 0; i < doms.length (); ++i) > - if (flow_bb_inside_loop_p (loop, doms[i])) > - doms.unordered_remove (i); > - } > + update_loop = new_loop; > } > else /* Add the copy at entry. */ > { > @@ -1906,6 +1900,11 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, > > if (multiple_exits_p) > { > + doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header); > + for (unsigned i = 0; i < doms.length (); ++i) > + if (flow_bb_inside_loop_p (loop, doms[i])) > + doms.unordered_remove (i); > + > for (edge e : get_loop_exit_edges (update_loop)) > { > edge ex; > > with that done, the next ICE comes along. Loop 1 is peeled again, but this time for epilogue. > however loop 1 no longer dominates the exits as the prologue peeled loop does. > > So we don't find anything to update and ice with the second ICE: > > ice.c: In function 'void php_zval_filter(int, int)': > ice.c:7:6: error: dominator of 14 should be 2, not 21 > 7 | void php_zval_filter(int filter, int id1) { > | ^~~~~~~~~~~~~~~ > ice.c:7:6: error: dominator of 10 should be 2, not 21 > during GIMPLE pass: vect > dump file: a-ice.c.179t.vect OK, that seems to be because the condition around the prologue doesn't update everything for the skip_prolog case (slpeel_add_loop_guard fails to update for multi-exits). A more "general" dominance update in the face of multiple exits might be something like for (edge alt_e : loop_exits) { if (alt_e == loop_exit) continue; basic_block old_dom = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest); if (flow_bb_inside_loop_p (loop, old_dom)) { auto_vec<basic_block, 8> queue; for (auto son = first_dom_son (CDI_DOMINATORS, old_dom); son; son = next_dom_son (CDI_DOMINATORS, son)) if (!flow_bb_inside_loop_p (loop, son)) queue.safe_push (son); for (auto son : queue) set_immediate_dominator (CDI_DOMINATORS, son, get_bb_copy (old_dom)); } } basically we know the new most dominating place and we know where we wire in new edges (the exits, or in the new guard the skip point destination). From there we can look at the old immediate dominator which we know we have to adjust - but with multiple exits there might be other blocks also dominated by this block and those we need to adjust as well. The above can be possibly split out to a helper getting old_dom and the "new dom" (the get_bb_copy one) and a predicate that might generally reduce to !dominated_by_p the "exit dest". I'm not sure your way of identifying the "wrong" dominance blocks using for (edge e : get_loop_exit_edges (update_loop)) { edge ex; edge_iterator ei; FOR_EACH_EDGE (ex, ei, e->dest->succs) { /* Find the first non-fallthrough block as fall-throughs can't dominate other blocks. */ if (single_succ_p (ex->dest)) { doms.safe_push (ex->dest); ex = single_succ_edge (ex->dest); } doms.safe_push (ex->dest); } doms.safe_push (e->dest); is sound, but you might pick them up accidentially? I'm giving the above a more thorough try ... Richard. > because the prologue loop no longer dominates them due to the skip edge. This is why delaying > works because we know we have to update the dominators of 14 and 10, but to what we don't know > yet. > > Tamar > > > > > > This also means we reduce the number of dominator updates needed by at least > > > 50% and fixes the ICE. > > > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and > > > x86_64-pc-linux-gnu no issues. > > > > > > Ok for master? > > > > > > Thanks, > > > Tamar > > > > > > gcc/ChangeLog: > > > > > > PR tree-optimization/114081 > > > PR tree-optimization/113290 > > > * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): > > > Skip dominator update when multiple exit. > > > (vect_do_peeling): Remove multiple exit dominator update. > > > * tree-vect-loop.cc (vect_transform_loop): Update dominators when > > > multiple exits. > > > * tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE, > > > dominators_needing_update): New. > > > > > > gcc/testsuite/ChangeLog: > > > > > > PR tree-optimization/114081 > > > PR tree-optimization/113290 > > > * gcc.dg/vect/vect-early-break_120-pr114081.c: New test. > > > * gcc.dg/vect/vect-early-break_121-pr114081.c: New test. > > > > > > --- inline copy of patch -- > > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > > b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > > > new file mode 100644 > > > index > > 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e4173 > > 0fd2216f0ec8061376 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c > > > @@ -0,0 +1,38 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-add-options vect_early_break } */ > > > +/* { dg-require-effective-target vect_early_break } */ > > > +/* { dg-require-effective-target vect_int } */ > > > +/* { dg-additional-options "-O3" } */ > > > + > > > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > > > + > > > +typedef struct filter_list_entry { > > > + const char *name; > > > + int id; > > > + void (*function)(); > > > +} filter_list_entry; > > > + > > > +static const filter_list_entry filter_list[9] = {0}; > > > + > > > +void php_zval_filter(int filter, int id1) { > > > + filter_list_entry filter_func; > > > + > > > + int size = 9; > > > + for (int i = 0; i < size; ++i) { > > > + if (filter_list[i].id == filter) { > > > + filter_func = filter_list[i]; > > > + goto done; > > > + } > > > + } > > > + > > > +#pragma GCC novector > > > + for (int i = 0; i < size; ++i) { > > > + if (filter_list[i].id == 0x0204) { > > > + filter_func = filter_list[i]; > > > + goto done; > > > + } > > > + } > > > +done: > > > + if (!filter_func.id) > > > + filter_func.function(); > > > +} > > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > > b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > > > new file mode 100644 > > > index > > 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31 > > dd1c741f9e36738515 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c > > > @@ -0,0 +1,37 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-add-options vect_early_break } */ > > > +/* { dg-require-effective-target vect_early_break } */ > > > +/* { dg-require-effective-target vect_int } */ > > > +/* { dg-additional-options "-O3" } */ > > > + > > > +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ > > > + > > > +typedef struct filter_list_entry { > > > + const char *name; > > > + int id; > > > + void (*function)(); > > > +} filter_list_entry; > > > + > > > +static const filter_list_entry filter_list[9] = {0}; > > > + > > > +void php_zval_filter(int filter, int id1) { > > > + filter_list_entry filter_func; > > > + > > > + int size = 9; > > > + for (int i = 0; i < size; ++i) { > > > + if (filter_list[i].id == filter) { > > > + filter_func = filter_list[i]; > > > + goto done; > > > + } > > > + } > > > + > > > + for (int i = 0; i < size; ++i) { > > > + if (filter_list[i].id == 0x0204) { > > > + filter_func = filter_list[i]; > > > + goto done; > > > + } > > > + } > > > +done: > > > + if (!filter_func.id) > > > + filter_func.function(); > > > +} > > > diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc > > > index > > 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e > > 489d306f81e090d0 100644 > > > --- a/gcc/tree-vect-loop-manip.cc > > > +++ b/gcc/tree-vect-loop-manip.cc > > > @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop > > *loop, edge loop_exit, > > > doms.safe_push (e->dest); > > > } > > > > > > - iterate_fix_dominators (CDI_DOMINATORS, doms, false); > > > if (updated_doms) > > > updated_doms->safe_splice (doms); > > > } > > > @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop > > *loop, edge loop_exit, > > > free (new_bbs); > > > free (bbs); > > > > > > - checking_verify_dominators (CDI_DOMINATORS); > > > + /* If we're peeling for vectorization then delay verifying dominators. */ > > > + if (!flow_loops || !multiple_exits_p) > > > + checking_verify_dominators (CDI_DOMINATORS); > > > > > > return new_loop; > > > } > > > @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree > > niters, tree nitersm1, > > > epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop; > > > edge epilog_e = vect_epilogues ? e : scalar_e; > > > edge new_epilog_e = NULL; > > > - auto_vec<basic_block> doms; > > > + auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE > > (loop_vinfo); > > > epilog > > > = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e, > > > &new_epilog_e, true, &doms); > > > @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree > > niters, tree nitersm1, > > > scale_loop_profile (epilog, prob_epilog, -1); > > > } > > > > > > - /* Recalculate the dominators after adding the guard edge. */ > > > - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > > > - iterate_fix_dominators (CDI_DOMINATORS, doms, false); > > > - > > > /* When we do not have a loop-around edge to the epilog we know > > > the vector loop covered at least VF scalar iterations unless > > > we have early breaks. > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > > index > > 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaae > > effeae1fb900f 100644 > > > --- a/gcc/tree-vect-loop.cc > > > +++ b/gcc/tree-vect-loop.cc > > > @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, > > gimple *loop_vectorized_call) > > > /* Handle any code motion that we need to for early-break vectorization after > > > we've done peeling but just before we start vectorizing. */ > > > if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > > > - move_early_exit_stmts (loop_vinfo); > > > + { > > > + /* Recalculate the dominators. */ > > > + iterate_fix_dominators (CDI_DOMINATORS, > > > + LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), > > false); > > > + move_early_exit_stmts (loop_vinfo); > > > + } > > > > > > /* Schedule the SLP instances first, then handle loop vectorization > > > below. */ > > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > > > index > > db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcb > > bb110c0380b7d73aa 100644 > > > --- a/gcc/tree-vectorizer.h > > > +++ b/gcc/tree-vectorizer.h > > > @@ -961,6 +961,10 @@ public: > > > /* Statements whose VUSES need updating if early break vectorization is to > > > happen. */ > > > auto_vec<gimple*> early_break_vuses; > > > + > > > + /* Dominators that need to be recalculated that have been deferred until after > > > + all peeling. */ > > > + auto_vec<basic_block> dominators_needing_update; > > > } *loop_vec_info; > > > > > > /* Access Functions. */ > > > @@ -1021,6 +1025,7 @@ public: > > > (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) > > > #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb > > > #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses > > > +#define LOOP_VINFO_DOMS_NEED_UPDATE(L) (L)- > > >dominators_needing_update > > > #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds > > > #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond > > > #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies > > > > > > > > > > > > > > > > > > > -- > > Richard Biener <rguenther@suse.de> > > SUSE Software Solutions Germany GmbH, > > Frankenstrasse 146, 90461 Nuernberg, Germany; > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg) >
--- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ + +typedef struct filter_list_entry { + const char *name; + int id; + void (*function)(); +} filter_list_entry; + +static const filter_list_entry filter_list[9] = {0}; + +void php_zval_filter(int filter, int id1) { + filter_list_entry filter_func; + + int size = 9; + for (int i = 0; i < size; ++i) { + if (filter_list[i].id == filter) { + filter_func = filter_list[i]; + goto done; + } + } + +#pragma GCC novector + for (int i = 0; i < size; ++i) { + if (filter_list[i].id == 0x0204) { + filter_func = filter_list[i]; + goto done; + } + } +done: + if (!filter_func.id) + filter_func.function(); +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c new file mode 100644 index 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31dd1c741f9e36738515 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ + +typedef struct filter_list_entry { + const char *name; + int id; + void (*function)(); +} filter_list_entry; + +static const filter_list_entry filter_list[9] = {0}; + +void php_zval_filter(int filter, int id1) { + filter_list_entry filter_func; + + int size = 9; + for (int i = 0; i < size; ++i) { + if (filter_list[i].id == filter) { + filter_func = filter_list[i]; + goto done; + } + } + + for (int i = 0; i < size; ++i) { + if (filter_list[i].id == 0x0204) { + filter_func = filter_list[i]; + goto done; + } + } +done: + if (!filter_func.id) + filter_func.function(); +} diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e489d306f81e090d0 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, doms.safe_push (e->dest); } - iterate_fix_dominators (CDI_DOMINATORS, doms, false); if (updated_doms) updated_doms->safe_splice (doms); } @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, free (new_bbs); free (bbs); - checking_verify_dominators (CDI_DOMINATORS); + /* If we're peeling for vectorization then delay verifying dominators. */ + if (!flow_loops || !multiple_exits_p) + checking_verify_dominators (CDI_DOMINATORS); return new_loop; } @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop; edge epilog_e = vect_epilogues ? e : scalar_e; edge new_epilog_e = NULL; - auto_vec<basic_block> doms; + auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo); epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e, &new_epilog_e, true, &doms); @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, scale_loop_profile (epilog, prob_epilog, -1); } - /* Recalculate the dominators after adding the guard edge. */ - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) - iterate_fix_dominators (CDI_DOMINATORS, doms, false); - /* When we do not have a loop-around edge to the epilog we know the vector loop covered at least VF scalar iterations unless we have early breaks. diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaaeeffeae1fb900f 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) /* Handle any code motion that we need to for early-break vectorization after we've done peeling but just before we start vectorizing. */ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) - move_early_exit_stmts (loop_vinfo); + { + /* Recalculate the dominators. */ + iterate_fix_dominators (CDI_DOMINATORS, + LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), false); + move_early_exit_stmts (loop_vinfo); + } /* Schedule the SLP instances first, then handle loop vectorization below. */ diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcbbb110c0380b7d73aa 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -961,6 +961,10 @@ public: /* Statements whose VUSES need updating if early break vectorization is to happen. */ auto_vec<gimple*> early_break_vuses; + + /* Dominators that need to be recalculated that have been deferred until after + all peeling. */ + auto_vec<basic_block> dominators_needing_update; } *loop_vec_info; /* Access Functions. */ @@ -1021,6 +1025,7 @@ public: (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses +#define LOOP_VINFO_DOMS_NEED_UPDATE(L) (L)->dominators_needing_update #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies